xref: /isa-l/include/erasure_code.h (revision fa5b8baf84e6a18dbaad48a3fa0d1fa062ae2fe8)
1 /**********************************************************************
2   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 #ifndef _ERASURE_CODE_H_
31 #define _ERASURE_CODE_H_
32 
33 /**
34  *  @file erasure_code.h
35  *  @brief Interface to functions supporting erasure code encode and decode.
36  *
37  *  This file defines the interface to optimized functions used in erasure
38  *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
39  *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
40  *  set of coefficients.  Values for the coefficients are determined by the type
41  *  of erasure code.  Using a general dot product means that any sequence of
42  *  coefficients may be used including erasure codes based on random
43  *  coefficients.
44  *  Multiple versions of dot product are supplied to calculate 1-6 output
45  *  vectors in one pass.
46  *  Base GF multiply and divide functions can be sped up by defining
47  *  GF_LARGE_TABLES at the expense of memory size.
48  *
49  */
50 
51 #include "gf_vect_mul.h"
52 
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56 
57 /**
58  * @brief Initialize tables for fast Erasure Code encode and decode.
59  *
60  * Generates the expanded tables needed for fast encode or decode for erasure
61  * codes on blocks of data.  32bytes is generated for each input coefficient.
62  *
63  * @param k      The number of vector sources or rows in the generator matrix
64  *               for coding.
65  * @param rows   The number of output vectors to concurrently encode/decode.
66  * @param a      Pointer to sets of arrays of input coefficients used to encode
67  *               or decode data.
68  * @param gftbls Pointer to start of space for concatenated output tables
69  *               generated from input coefficients.  Must be of size 32*k*rows.
70  * @returns none
71  */
72 
73 void
74 ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls);
75 
76 /**
77  * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version.
78  *
79  * Baseline version of ec_encode_data() with same parameters.
80  */
81 
82 void
83 ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls);
84 
85 /**
86  * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
87  *
88  * Given a list of source data blocks, generate one or multiple blocks of
89  * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
90  * suitable set of coefficients, this function will perform the fast generation
91  * or decoding of Reed-Solomon type erasure codes.
92  *
93  * This function determines what instruction sets are enabled and
94  * selects the appropriate version at runtime.
95  *
96  * @param len    Length of each block of data (vector) of source or dest data.
97  * @param k      The number of vector sources or rows in the generator matrix
98  * 		 for coding.
99  * @param rows   The number of output vectors to concurrently encode/decode.
100  * @param gftbls Pointer to array of input tables generated from coding
101  * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
102  * @param data   Array of pointers to source input buffers.
103  * @param coding Array of pointers to coded output buffers.
104  * @returns none
105  */
106 
107 void
108 ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
109                unsigned char **coding);
110 
111 /**
112  * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
113  *
114  * Baseline version of ec_encode_data() with same parameters.
115  */
116 void
117 ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
118                     unsigned char **dest);
119 
120 /**
121  * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate
122  * version.
123  *
124  * Given one source data block, update one or multiple blocks of encoded data as
125  * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
126  * coefficients, this function will perform the fast generation or decoding of
127  * Reed-Solomon type erasure codes from one input source at a time.
128  *
129  * This function determines what instruction sets are enabled and selects the
130  * appropriate version at runtime.
131  *
132  * @param len    Length of each block of data (vector) of source or dest data.
133  * @param k      The number of vector sources or rows in the generator matrix
134  * 		 for coding.
135  * @param rows   The number of output vectors to concurrently encode/decode.
136  * @param vec_i  The vector index corresponding to the single input source.
137  * @param g_tbls Pointer to array of input tables generated from coding
138  * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
139  * @param data   Pointer to single input source used to update output parity.
140  * @param coding Array of pointers to coded output buffers.
141  * @returns none
142  */
143 void
144 ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
145                       unsigned char *data, unsigned char **coding);
146 
147 /**
148  * @brief Generate update for encode or decode of erasure codes from single source.
149  *
150  * Baseline version of ec_encode_data_update().
151  */
152 
153 void
154 ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
155                            unsigned char *data, unsigned char **dest);
156 
157 /**
158  * @brief GF(2^8) vector dot product, runs baseline version.
159  *
160  * Does a GF(2^8) dot product across each byte of the input array and a constant
161  * set of coefficients to produce each byte of the output. Can be used for
162  * erasure coding encode and decode. Function requires pre-calculation of a
163  * 32*vlen byte constant array based on the input coefficients.
164  *
165  * @param len    Length of each vector in bytes. Must be >= 16.
166  * @param vlen   Number of vector sources.
167  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
168  *               on the array of input coefficients. Only elements 32*CONST*j + 1
169  *               of this array are used, where j = (0, 1, 2...) and CONST is the
170  *               number of elements in the array of input coefficients. The
171  *               elements used correspond to the original input coefficients.
172  * @param src    Array of pointers to source inputs.
173  * @param dest   Pointer to destination data array.
174  * @returns none
175  */
176 
177 void
178 gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src,
179                       unsigned char *dest);
180 
181 /**
182  * @brief GF(2^8) vector dot product, runs appropriate version.
183  *
184  * Does a GF(2^8) dot product across each byte of the input array and a constant
185  * set of coefficients to produce each byte of the output. Can be used for
186  * erasure coding encode and decode. Function requires pre-calculation of a
187  * 32*vlen byte constant array based on the input coefficients.
188  *
189  * This function determines what instruction sets are enabled and
190  * selects the appropriate version at runtime.
191  *
192  * @param len    Length of each vector in bytes. Must be >= 32.
193  * @param vlen   Number of vector sources.
194  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
195  *               on the array of input coefficients.
196  * @param src    Array of pointers to source inputs.
197  * @param dest   Pointer to destination data array.
198  * @returns none
199  */
200 
201 void
202 gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src,
203                  unsigned char *dest);
204 
205 /**
206  * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
207  *
208  * Does a GF(2^8) multiply across each byte of input source with expanded
209  * constant and add to destination array. Can be used for erasure coding encode
210  * and decode update when only one source is available at a time. Function
211  * requires pre-calculation of a 32*vec byte constant array based on the input
212  * coefficients.
213  *
214  * This function determines what instruction sets are enabled and selects the
215  * appropriate version at runtime.
216  *
217  * @param len    Length of each vector in bytes. Must be >= 64.
218  * @param vec    The number of vector sources or rows in the generator matrix
219  * 		 for coding.
220  * @param vec_i  The vector index corresponding to the single input source.
221  * @param gftbls Pointer to array of input tables generated from coding
222  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
223  * @param src    Array of pointers to source inputs.
224  * @param dest   Pointer to destination data array.
225  * @returns none
226  */
227 
228 void
229 gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
230             unsigned char *dest);
231 
232 /**
233  * @brief GF(2^8) vector multiply accumulate, baseline version.
234  *
235  * Baseline version of gf_vect_mad() with same parameters.
236  */
237 
238 void
239 gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
240                  unsigned char *dest);
241 
242 // x86 only
243 #if defined(__i386__) || defined(__x86_64__)
244 
245 /**
246  * @brief Generate or decode erasure codes on blocks of data.
247  *
248  * Arch specific version of ec_encode_data() with same parameters.
249  * @requires SSE4.1
250  */
251 void
252 ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
253                    unsigned char **coding);
254 
255 /**
256  * @brief Generate or decode erasure codes on blocks of data.
257  *
258  * Arch specific version of ec_encode_data() with same parameters.
259  * @requires AVX
260  */
261 void
262 ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
263                    unsigned char **coding);
264 
265 /**
266  * @brief Generate or decode erasure codes on blocks of data.
267  *
268  * Arch specific version of ec_encode_data() with same parameters.
269  * @requires AVX2
270  */
271 void
272 ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
273                     unsigned char **coding);
274 
275 /**
276  * @brief Generate update for encode or decode of erasure codes from single source.
277  *
278  * Arch specific version of ec_encode_data_update() with same parameters.
279  * @requires SSE4.1
280  */
281 
282 void
283 ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
284                           unsigned char *data, unsigned char **coding);
285 
286 /**
287  * @brief Generate update for encode or decode of erasure codes from single source.
288  *
289  * Arch specific version of ec_encode_data_update() with same parameters.
290  * @requires AVX
291  */
292 
293 void
294 ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
295                           unsigned char *data, unsigned char **coding);
296 
297 /**
298  * @brief Generate update for encode or decode of erasure codes from single source.
299  *
300  * Arch specific version of ec_encode_data_update() with same parameters.
301  * @requires AVX2
302  */
303 
304 void
305 ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
306                            unsigned char *data, unsigned char **coding);
307 
308 /**
309  * @brief GF(2^8) vector dot product.
310  *
311  * Does a GF(2^8) dot product across each byte of the input array and a constant
312  * set of coefficients to produce each byte of the output. Can be used for
313  * erasure coding encode and decode. Function requires pre-calculation of a
314  * 32*vlen byte constant array based on the input coefficients.
315  * @requires SSE4.1
316  *
317  * @param len    Length of each vector in bytes. Must be >= 16.
318  * @param vlen   Number of vector sources.
319  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
320  *               on the array of input coefficients.
321  * @param src    Array of pointers to source inputs.
322  * @param dest   Pointer to destination data array.
323  * @returns none
324  */
325 
326 void
327 gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
328                      unsigned char *dest);
329 
330 /**
331  * @brief GF(2^8) vector dot product.
332  *
333  * Does a GF(2^8) dot product across each byte of the input array and a constant
334  * set of coefficients to produce each byte of the output. Can be used for
335  * erasure coding encode and decode. Function requires pre-calculation of a
336  * 32*vlen byte constant array based on the input coefficients.
337  * @requires AVX
338  *
339  * @param len    Length of each vector in bytes. Must be >= 16.
340  * @param vlen   Number of vector sources.
341  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
342  *               on the array of input coefficients.
343  * @param src    Array of pointers to source inputs.
344  * @param dest   Pointer to destination data array.
345  * @returns none
346  */
347 
348 void
349 gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
350                      unsigned char *dest);
351 
352 /**
353  * @brief GF(2^8) vector dot product.
354  *
355  * Does a GF(2^8) dot product across each byte of the input array and a constant
356  * set of coefficients to produce each byte of the output. Can be used for
357  * erasure coding encode and decode. Function requires pre-calculation of a
358  * 32*vlen byte constant array based on the input coefficients.
359  * @requires AVX2
360  *
361  * @param len    Length of each vector in bytes. Must be >= 32.
362  * @param vlen   Number of vector sources.
363  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
364  *               on the array of input coefficients.
365  * @param src    Array of pointers to source inputs.
366  * @param dest   Pointer to destination data array.
367  * @returns none
368  */
369 
370 void
371 gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
372                       unsigned char *dest);
373 
374 /**
375  * @brief GF(2^8) vector dot product with two outputs.
376  *
377  * Vector dot product optimized to calculate two outputs at a time. Does two
378  * GF(2^8) dot products across each byte of the input array and two constant
379  * sets of coefficients to produce each byte of the outputs. Can be used for
380  * erasure coding encode and decode. Function requires pre-calculation of a
381  * 2*32*vlen byte constant array based on the two sets of input coefficients.
382  * @requires SSE4.1
383  *
384  * @param len    Length of each vector in bytes. Must be >= 16.
385  * @param vlen   Number of vector sources.
386  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
387  *               based on the array of input coefficients.
388  * @param src    Array of pointers to source inputs.
389  * @param dest   Array of pointers to destination data buffers.
390  * @returns none
391  */
392 
393 void
394 gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
395                       unsigned char **dest);
396 
397 /**
398  * @brief GF(2^8) vector dot product with two outputs.
399  *
400  * Vector dot product optimized to calculate two outputs at a time. Does two
401  * GF(2^8) dot products across each byte of the input array and two constant
402  * sets of coefficients to produce each byte of the outputs. Can be used for
403  * erasure coding encode and decode. Function requires pre-calculation of a
404  * 2*32*vlen byte constant array based on the two sets of input coefficients.
405  * @requires AVX
406  *
407  * @param len    Length of each vector in bytes. Must be >= 16.
408  * @param vlen   Number of vector sources.
409  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
410  *               based on the array of input coefficients.
411  * @param src    Array of pointers to source inputs.
412  * @param dest   Array of pointers to destination data buffers.
413  * @returns none
414  */
415 
416 void
417 gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
418                       unsigned char **dest);
419 
420 /**
421  * @brief GF(2^8) vector dot product with two outputs.
422  *
423  * Vector dot product optimized to calculate two outputs at a time. Does two
424  * GF(2^8) dot products across each byte of the input array and two constant
425  * sets of coefficients to produce each byte of the outputs. Can be used for
426  * erasure coding encode and decode. Function requires pre-calculation of a
427  * 2*32*vlen byte constant array based on the two sets of input coefficients.
428  * @requires AVX2
429  *
430  * @param len    Length of each vector in bytes. Must be >= 32.
431  * @param vlen   Number of vector sources.
432  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
433  *               based on the array of input coefficients.
434  * @param src    Array of pointers to source inputs.
435  * @param dest   Array of pointers to destination data buffers.
436  * @returns none
437  */
438 
439 void
440 gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
441                        unsigned char **dest);
442 
443 /**
444  * @brief GF(2^8) vector dot product with three outputs.
445  *
446  * Vector dot product optimized to calculate three outputs at a time. Does three
447  * GF(2^8) dot products across each byte of the input array and three constant
448  * sets of coefficients to produce each byte of the outputs. Can be used for
449  * erasure coding encode and decode. Function requires pre-calculation of a
450  * 3*32*vlen byte constant array based on the three sets of input coefficients.
451  * @requires SSE4.1
452  *
453  * @param len    Length of each vector in bytes. Must be >= 16.
454  * @param vlen   Number of vector sources.
455  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
456  *               based on the array of input coefficients.
457  * @param src    Array of pointers to source inputs.
458  * @param dest   Array of pointers to destination data buffers.
459  * @returns none
460  */
461 
462 void
463 gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
464                       unsigned char **dest);
465 
466 /**
467  * @brief GF(2^8) vector dot product with three outputs.
468  *
469  * Vector dot product optimized to calculate three outputs at a time. Does three
470  * GF(2^8) dot products across each byte of the input array and three constant
471  * sets of coefficients to produce each byte of the outputs. Can be used for
472  * erasure coding encode and decode. Function requires pre-calculation of a
473  * 3*32*vlen byte constant array based on the three sets of input coefficients.
474  * @requires AVX
475  *
476  * @param len    Length of each vector in bytes. Must be >= 16.
477  * @param vlen   Number of vector sources.
478  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
479  *               based on the array of input coefficients.
480  * @param src    Array of pointers to source inputs.
481  * @param dest   Array of pointers to destination data buffers.
482  * @returns none
483  */
484 
485 void
486 gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
487                       unsigned char **dest);
488 
489 /**
490  * @brief GF(2^8) vector dot product with three outputs.
491  *
492  * Vector dot product optimized to calculate three outputs at a time. Does three
493  * GF(2^8) dot products across each byte of the input array and three constant
494  * sets of coefficients to produce each byte of the outputs. Can be used for
495  * erasure coding encode and decode. Function requires pre-calculation of a
496  * 3*32*vlen byte constant array based on the three sets of input coefficients.
497  * @requires AVX2
498  *
499  * @param len    Length of each vector in bytes. Must be >= 32.
500  * @param vlen   Number of vector sources.
501  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
502  *               based on the array of input coefficients.
503  * @param src    Array of pointers to source inputs.
504  * @param dest   Array of pointers to destination data buffers.
505  * @returns none
506  */
507 
508 void
509 gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
510                        unsigned char **dest);
511 
512 /**
513  * @brief GF(2^8) vector dot product with four outputs.
514  *
515  * Vector dot product optimized to calculate four outputs at a time. Does four
516  * GF(2^8) dot products across each byte of the input array and four constant
517  * sets of coefficients to produce each byte of the outputs. Can be used for
518  * erasure coding encode and decode. Function requires pre-calculation of a
519  * 4*32*vlen byte constant array based on the four sets of input coefficients.
520  * @requires SSE4.1
521  *
522  * @param len    Length of each vector in bytes. Must be >= 16.
523  * @param vlen   Number of vector sources.
524  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
525  *               based on the array of input coefficients.
526  * @param src    Array of pointers to source inputs.
527  * @param dest   Array of pointers to destination data buffers.
528  * @returns none
529  */
530 
531 void
532 gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
533                       unsigned char **dest);
534 
535 /**
536  * @brief GF(2^8) vector dot product with four outputs.
537  *
538  * Vector dot product optimized to calculate four outputs at a time. Does four
539  * GF(2^8) dot products across each byte of the input array and four constant
540  * sets of coefficients to produce each byte of the outputs. Can be used for
541  * erasure coding encode and decode. Function requires pre-calculation of a
542  * 4*32*vlen byte constant array based on the four sets of input coefficients.
543  * @requires AVX
544  *
545  * @param len    Length of each vector in bytes. Must be >= 16.
546  * @param vlen   Number of vector sources.
547  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
548  *               based on the array of input coefficients.
549  * @param src    Array of pointers to source inputs.
550  * @param dest   Array of pointers to destination data buffers.
551  * @returns none
552  */
553 
554 void
555 gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
556                       unsigned char **dest);
557 
558 /**
559  * @brief GF(2^8) vector dot product with four outputs.
560  *
561  * Vector dot product optimized to calculate four outputs at a time. Does four
562  * GF(2^8) dot products across each byte of the input array and four constant
563  * sets of coefficients to produce each byte of the outputs. Can be used for
564  * erasure coding encode and decode. Function requires pre-calculation of a
565  * 4*32*vlen byte constant array based on the four sets of input coefficients.
566  * @requires AVX2
567  *
568  * @param len    Length of each vector in bytes. Must be >= 32.
569  * @param vlen   Number of vector sources.
570  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
571  *               based on the array of input coefficients.
572  * @param src    Array of pointers to source inputs.
573  * @param dest   Array of pointers to destination data buffers.
574  * @returns none
575  */
576 
577 void
578 gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
579                        unsigned char **dest);
580 
581 /**
582  * @brief GF(2^8) vector dot product with five outputs.
583  *
584  * Vector dot product optimized to calculate five outputs at a time. Does five
585  * GF(2^8) dot products across each byte of the input array and five constant
586  * sets of coefficients to produce each byte of the outputs. Can be used for
587  * erasure coding encode and decode. Function requires pre-calculation of a
588  * 5*32*vlen byte constant array based on the five sets of input coefficients.
589  * @requires SSE4.1
590  *
591  * @param len    Length of each vector in bytes. Must >= 16.
592  * @param vlen   Number of vector sources.
593  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
594  *               based on the array of input coefficients.
595  * @param src    Array of pointers to source inputs.
596  * @param dest   Array of pointers to destination data buffers.
597  * @returns none
598  */
599 
600 void
601 gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
602                       unsigned char **dest);
603 
604 /**
605  * @brief GF(2^8) vector dot product with five outputs.
606  *
607  * Vector dot product optimized to calculate five outputs at a time. Does five
608  * GF(2^8) dot products across each byte of the input array and five constant
609  * sets of coefficients to produce each byte of the outputs. Can be used for
610  * erasure coding encode and decode. Function requires pre-calculation of a
611  * 5*32*vlen byte constant array based on the five sets of input coefficients.
612  * @requires AVX
613  *
614  * @param len    Length of each vector in bytes. Must >= 16.
615  * @param vlen   Number of vector sources.
616  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
617  *               based on the array of input coefficients.
618  * @param src    Array of pointers to source inputs.
619  * @param dest   Array of pointers to destination data buffers.
620  * @returns none
621  */
622 
623 void
624 gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
625                       unsigned char **dest);
626 
627 /**
628  * @brief GF(2^8) vector dot product with five outputs.
629  *
630  * Vector dot product optimized to calculate five outputs at a time. Does five
631  * GF(2^8) dot products across each byte of the input array and five constant
632  * sets of coefficients to produce each byte of the outputs. Can be used for
633  * erasure coding encode and decode. Function requires pre-calculation of a
634  * 5*32*vlen byte constant array based on the five sets of input coefficients.
635  * @requires AVX2
636  *
637  * @param len    Length of each vector in bytes. Must >= 32.
638  * @param vlen   Number of vector sources.
639  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
640  *               based on the array of input coefficients.
641  * @param src    Array of pointers to source inputs.
642  * @param dest   Array of pointers to destination data buffers.
643  * @returns none
644  */
645 
646 void
647 gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
648                        unsigned char **dest);
649 
650 /**
651  * @brief GF(2^8) vector dot product with six outputs.
652  *
653  * Vector dot product optimized to calculate six outputs at a time. Does six
654  * GF(2^8) dot products across each byte of the input array and six constant
655  * sets of coefficients to produce each byte of the outputs. Can be used for
656  * erasure coding encode and decode. Function requires pre-calculation of a
657  * 6*32*vlen byte constant array based on the six sets of input coefficients.
658  * @requires SSE4.1
659  *
660  * @param len    Length of each vector in bytes. Must be >= 16.
661  * @param vlen   Number of vector sources.
662  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
663  *               based on the array of input coefficients.
664  * @param src    Array of pointers to source inputs.
665  * @param dest   Array of pointers to destination data buffers.
666  * @returns none
667  */
668 
669 void
670 gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
671                       unsigned char **dest);
672 
673 /**
674  * @brief GF(2^8) vector dot product with six outputs.
675  *
676  * Vector dot product optimized to calculate six outputs at a time. Does six
677  * GF(2^8) dot products across each byte of the input array and six constant
678  * sets of coefficients to produce each byte of the outputs. Can be used for
679  * erasure coding encode and decode. Function requires pre-calculation of a
680  * 6*32*vlen byte constant array based on the six sets of input coefficients.
681  * @requires AVX
682  *
683  * @param len    Length of each vector in bytes. Must be >= 16.
684  * @param vlen   Number of vector sources.
685  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
686  *               based on the array of input coefficients.
687  * @param src    Array of pointers to source inputs.
688  * @param dest   Array of pointers to destination data buffers.
689  * @returns none
690  */
691 
692 void
693 gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
694                       unsigned char **dest);
695 
696 /**
697  * @brief GF(2^8) vector dot product with six outputs.
698  *
699  * Vector dot product optimized to calculate six outputs at a time. Does six
700  * GF(2^8) dot products across each byte of the input array and six constant
701  * sets of coefficients to produce each byte of the outputs. Can be used for
702  * erasure coding encode and decode. Function requires pre-calculation of a
703  * 6*32*vlen byte constant array based on the six sets of input coefficients.
704  * @requires AVX2
705  *
706  * @param len    Length of each vector in bytes. Must be >= 32.
707  * @param vlen   Number of vector sources.
708  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
709  *               based on the array of input coefficients.
710  * @param src    Array of pointers to source inputs.
711  * @param dest   Array of pointers to destination data buffers.
712  * @returns none
713  */
714 
715 void
716 gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
717                        unsigned char **dest);
718 
719 /**
720  * @brief GF(2^8) vector multiply accumulate, arch specific version.
721  *
722  * Arch specific version of gf_vect_mad() with same parameters.
723  * @requires SSE4.1
724  */
725 
726 void
727 gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
728                 unsigned char *dest);
729 /**
730  * @brief GF(2^8) vector multiply accumulate, arch specific version.
731  *
732  * Arch specific version of gf_vect_mad() with same parameters.
733  * @requires AVX
734  */
735 
736 void
737 gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
738                 unsigned char *dest);
739 
740 /**
741  * @brief GF(2^8) vector multiply accumulate, arch specific version.
742  *
743  * Arch specific version of gf_vect_mad() with same parameters.
744  * @requires AVX2
745  */
746 
747 void
748 gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
749                  unsigned char *dest);
750 
751 /**
752  * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
753  *
754  * Does a GF(2^8) multiply across each byte of input source with expanded
755  * constants and add to destination arrays. Can be used for erasure coding
756  * encode and decode update when only one source is available at a
757  * time. Function requires pre-calculation of a 32*vec byte constant array based
758  * on the input coefficients.
759  * @requires SSE4.1
760  *
761  * @param len    Length of each vector in bytes. Must be >= 32.
762  * @param vec    The number of vector sources or rows in the generator matrix
763  * 		 for coding.
764  * @param vec_i  The vector index corresponding to the single input source.
765  * @param gftbls Pointer to array of input tables generated from coding
766  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
767  * @param src    Pointer to source input array.
768  * @param dest   Array of pointers to destination input/outputs.
769  * @returns none
770  */
771 
772 void
773 gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
774                  unsigned char **dest);
775 
776 /**
777  * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
778  * @requires AVX
779  */
780 void
781 gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
782                  unsigned char **dest);
783 /**
784  * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
785  * @requires AVX2
786  */
787 void
788 gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
789                   unsigned char **dest);
790 
791 /**
792  * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
793  *
794  * Does a GF(2^8) multiply across each byte of input source with expanded
795  * constants and add to destination arrays. Can be used for erasure coding
796  * encode and decode update when only one source is available at a
797  * time. Function requires pre-calculation of a 32*vec byte constant array based
798  * on the input coefficients.
799  * @requires SSE4.1
800  *
801  * @param len    Length of each vector in bytes. Must be >= 32.
802  * @param vec    The number of vector sources or rows in the generator matrix
803  * 		 for coding.
804  * @param vec_i  The vector index corresponding to the single input source.
805  * @param gftbls Pointer to array of input tables generated from coding
806  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
807  * @param src    Pointer to source input array.
808  * @param dest   Array of pointers to destination input/outputs.
809  * @returns none
810  */
811 
812 void
813 gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
814                  unsigned char **dest);
815 
816 /**
817  * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
818  * @requires AVX
819  */
820 void
821 gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
822                  unsigned char **dest);
823 
824 /**
825  * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
826  * @requires AVX2
827  */
828 void
829 gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
830                   unsigned char **dest);
831 
832 /**
833  * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
834  *
835  * Does a GF(2^8) multiply across each byte of input source with expanded
836  * constants and add to destination arrays. Can be used for erasure coding
837  * encode and decode update when only one source is available at a
838  * time. Function requires pre-calculation of a 32*vec byte constant array based
839  * on the input coefficients.
840  * @requires SSE4.1
841  *
842  * @param len    Length of each vector in bytes. Must be >= 32.
843  * @param vec    The number of vector sources or rows in the generator matrix
844  * 		 for coding.
845  * @param vec_i  The vector index corresponding to the single input source.
846  * @param gftbls Pointer to array of input tables generated from coding
847  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
848  * @param src    Pointer to source input array.
849  * @param dest   Array of pointers to destination input/outputs.
850  * @returns none
851  */
852 
853 void
854 gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
855                  unsigned char **dest);
856 
857 /**
858  * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
859  * @requires AVX
860  */
861 void
862 gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
863                  unsigned char **dest);
864 /**
865  * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
866  * @requires AVX2
867  */
868 void
869 gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
870                   unsigned char **dest);
871 
872 /**
873  * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
874  * @requires SSE4.1
875  */
876 void
877 gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
878                  unsigned char **dest);
879 
880 /**
881  * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
882  * @requires AVX
883  */
884 void
885 gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
886                  unsigned char **dest);
887 /**
888  * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
889  * @requires AVX2
890  */
891 void
892 gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
893                   unsigned char **dest);
894 
895 /**
896  * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
897  * @requires SSE4.1
898  */
899 void
900 gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
901                  unsigned char **dest);
902 /**
903  * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
904  * @requires AVX
905  */
906 void
907 gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
908                  unsigned char **dest);
909 
910 /**
911  * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
912  * @requires AVX2
913  */
914 void
915 gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
916                   unsigned char **dest);
917 
918 #endif
919 
920 /**********************************************************************
921  * The remaining are lib support functions used in GF(2^8) operations.
922  */
923 
924 /**
925  * @brief Single element GF(2^8) multiply.
926  *
927  * @param a  Multiplicand a
928  * @param b  Multiplicand b
929  * @returns  Product of a and b in GF(2^8)
930  */
931 
932 unsigned char
933 gf_mul(unsigned char a, unsigned char b);
934 
935 /**
936  * @brief Single element GF(2^8) inverse.
937  *
938  * @param a  Input element
939  * @returns  Field element b such that a x b = {1}
940  */
941 
942 unsigned char
943 gf_inv(unsigned char a);
944 
945 /**
946  * @brief Generate a matrix of coefficients to be used for encoding.
947  *
948  * Vandermonde matrix example of encoding coefficients where high portion of
949  * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
950  * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
951  * erasure encoding but does not guarantee invertable for every sub matrix. For
952  * large pairs of m and k it is possible to find cases where the decode matrix
953  * chosen from sources and parity is not invertable. Users may want to adjust
954  * for certain pairs m and k. If m and k satisfy one of the following
955  * inequalities, no adjustment is required:
956  *
957  * - k <= 3
958  * - k = 4, m <= 25
959  * - k = 5, m <= 10
960  * - k <= 21, m-k = 4
961  * - m - k <= 3.
962  *
963  * @param a  [m x k] array to hold coefficients
964  * @param m  number of rows in matrix corresponding to srcs + parity.
965  * @param k  number of columns in matrix corresponding to srcs.
966  * @returns  none
967  */
968 
969 void
970 gf_gen_rs_matrix(unsigned char *a, int m, int k);
971 
972 /**
973  * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
974  *
975  * Cauchy matrix example of encoding coefficients where high portion of matrix
976  * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
977  * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
978  *
979  * @param a  [m x k] array to hold coefficients
980  * @param m  number of rows in matrix corresponding to srcs + parity.
981  * @param k  number of columns in matrix corresponding to srcs.
982  * @returns  none
983  */
984 
985 void
986 gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
987 
988 /**
989  * @brief Invert a matrix in GF(2^8)
990  *
991  * Attempts to construct an n x n inverse of the input matrix. Returns non-zero
992  * if singular. Will always destroy input matrix in process.
993  *
994  * @param in  input matrix, destroyed by invert process
995  * @param out output matrix such that [in] x [out] = [I] - identity matrix
996  * @param n   size of matrix [nxn]
997  * @returns 0 successful, other fail on singular input matrix
998  */
999 
1000 int
1001 gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
1002 
1003 /*************************************************************/
1004 
1005 #ifdef __cplusplus
1006 }
1007 #endif
1008 
1009 #endif //_ERASURE_CODE_H_
1010