xref: /isa-l/include/erasure_code.h (revision 5be1ba2215613379677fb85cccb327627bdac6a0)
1 /**********************************************************************
2   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 
31 #ifndef _ERASURE_CODE_H_
32 #define _ERASURE_CODE_H_
33 
34 /**
35  *  @file erasure_code.h
36  *  @brief Interface to functions supporting erasure code encode and decode.
37  *
38  *  This file defines the interface to optimized functions used in erasure
39  *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
40  *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
41  *  set of coefficients.  Values for the coefficients are determined by the type
42  *  of erasure code.  Using a general dot product means that any sequence of
43  *  coefficients may be used including erasure codes based on random
44  *  coefficients.
45  *  Multiple versions of dot product are supplied to calculate 1-6 output
46  *  vectors in one pass.
47  *  Base GF multiply and divide functions can be sped up by defining
48  *  GF_LARGE_TABLES at the expense of memory size.
49  *
50  */
51 
52 #include "gf_vect_mul.h"
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 /**
59  * @brief Initialize tables for fast Erasure Code encode and decode.
60  *
61  * Generates the expanded tables needed for fast encode or decode for erasure
62  * codes on blocks of data.  32bytes is generated for each input coefficient.
63  *
64  * @param k      The number of vector sources or rows in the generator matrix
65  *               for coding.
66  * @param rows   The number of output vectors to concurrently encode/decode.
67  * @param a      Pointer to sets of arrays of input coefficients used to encode
68  *               or decode data.
69  * @param gftbls Pointer to start of space for concatenated output tables
70  *               generated from input coefficients.  Must be of size 32*k*rows.
71  * @returns none
72  */
73 
74 void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
75 
76 /**
77  * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
78  *
79  * Given a list of source data blocks, generate one or multiple blocks of
80  * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
81  * suitable set of coefficients, this function will perform the fast generation
82  * or decoding of Reed-Solomon type erasure codes.
83  *
84  * This function determines what instruction sets are enabled and
85  * selects the appropriate version at runtime.
86  *
87  * @param len    Length of each block of data (vector) of source or dest data.
88  * @param k      The number of vector sources or rows in the generator matrix
89  * 		 for coding.
90  * @param rows   The number of output vectors to concurrently encode/decode.
91  * @param gftbls Pointer to array of input tables generated from coding
92  * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
93  * @param data   Array of pointers to source input buffers.
94  * @param coding Array of pointers to coded output buffers.
95  * @returns none
96  */
97 
98 void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
99 		    unsigned char **coding);
100 
101 /**
102  * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
103  *
104  * Baseline version of ec_encode_data() with same parameters.
105  */
106 void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
107 			 unsigned char **dest);
108 
109 /**
110  * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
111  *
112  * Given one source data block, update one or multiple blocks of encoded data as
113  * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
114  * coefficients, this function will perform the fast generation or decoding of
115  * Reed-Solomon type erasure codes from one input source at a time.
116  *
117  * This function determines what instruction sets are enabled and selects the
118  * appropriate version at runtime.
119  *
120  * @param len    Length of each block of data (vector) of source or dest data.
121  * @param k      The number of vector sources or rows in the generator matrix
122  * 		 for coding.
123  * @param rows   The number of output vectors to concurrently encode/decode.
124  * @param vec_i  The vector index corresponding to the single input source.
125  * @param g_tbls Pointer to array of input tables generated from coding
126  * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
127  * @param data   Pointer to single input source used to update output parity.
128  * @param coding Array of pointers to coded output buffers.
129  * @returns none
130  */
131 void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
132 			   unsigned char *data, unsigned char **coding);
133 
134 /**
135  * @brief Generate update for encode or decode of erasure codes from single source.
136  *
137  * Baseline version of ec_encode_data_update().
138  */
139 
140 void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
141 				unsigned char *data, unsigned char **dest);
142 
143 /**
144  * @brief GF(2^8) vector dot product, runs baseline version.
145  *
146  * Does a GF(2^8) dot product across each byte of the input array and a constant
147  * set of coefficients to produce each byte of the output. Can be used for
148  * erasure coding encode and decode. Function requires pre-calculation of a
149  * 32*vlen byte constant array based on the input coefficients.
150  *
151  * @param len    Length of each vector in bytes. Must be >= 16.
152  * @param vlen   Number of vector sources.
153  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
154  *               on the array of input coefficients. Only elements 32*CONST*j + 1
155  *               of this array are used, where j = (0, 1, 2...) and CONST is the
156  *               number of elements in the array of input coefficients. The
157  *               elements used correspond to the original input coefficients.
158  * @param src    Array of pointers to source inputs.
159  * @param dest   Pointer to destination data array.
160  * @returns none
161  */
162 
163 
164 void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
165                         unsigned char **src, unsigned char *dest);
166 
167 /**
168  * @brief GF(2^8) vector dot product, runs appropriate version.
169  *
170  * Does a GF(2^8) dot product across each byte of the input array and a constant
171  * set of coefficients to produce each byte of the output. Can be used for
172  * erasure coding encode and decode. Function requires pre-calculation of a
173  * 32*vlen byte constant array based on the input coefficients.
174  *
175  * This function determines what instruction sets are enabled and
176  * selects the appropriate version at runtime.
177  *
178  * @param len    Length of each vector in bytes. Must be >= 32.
179  * @param vlen   Number of vector sources.
180  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
181  *               on the array of input coefficients.
182  * @param src    Array of pointers to source inputs.
183  * @param dest   Pointer to destination data array.
184  * @returns none
185  */
186 
187 void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
188                         unsigned char **src, unsigned char *dest);
189 
190 /**
191  * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
192  *
193  * Does a GF(2^8) multiply across each byte of input source with expanded
194  * constant and add to destination array. Can be used for erasure coding encode
195  * and decode update when only one source is available at a time. Function
196  * requires pre-calculation of a 32*vec byte constant array based on the input
197  * coefficients.
198  *
199  * This function determines what instruction sets are enabled and selects the
200  * appropriate version at runtime.
201  *
202  * @param len    Length of each vector in bytes. Must be >= 32.
203  * @param vec    The number of vector sources or rows in the generator matrix
204  * 		 for coding.
205  * @param vec_i  The vector index corresponding to the single input source.
206  * @param gftbls Pointer to array of input tables generated from coding
207  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
208  * @param src    Array of pointers to source inputs.
209  * @param dest   Pointer to destination data array.
210  * @returns none
211  */
212 
213 void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
214 		 unsigned char *dest);
215 
216 /**
217  * @brief GF(2^8) vector multiply accumulate, baseline version.
218  *
219  * Baseline version of gf_vect_mad() with same parameters.
220  */
221 
222 void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
223 		      unsigned char *dest);
224 
225 // x86 only
226 #if defined(__i386__) || defined(__x86_64__)
227 
228 /**
229  * @brief Generate or decode erasure codes on blocks of data.
230  *
231  * Arch specific version of ec_encode_data() with same parameters.
232  * @requires SSE4.1
233  */
234 void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
235 			unsigned char **coding);
236 
237 /**
238  * @brief Generate or decode erasure codes on blocks of data.
239  *
240  * Arch specific version of ec_encode_data() with same parameters.
241  * @requires AVX
242  */
243 void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
244 			unsigned char **coding);
245 
246 /**
247  * @brief Generate or decode erasure codes on blocks of data.
248  *
249  * Arch specific version of ec_encode_data() with same parameters.
250  * @requires AVX2
251  */
252 void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
253 			 unsigned char **coding);
254 
255 /**
256  * @brief Generate update for encode or decode of erasure codes from single source.
257  *
258  * Arch specific version of ec_encode_data_update() with same parameters.
259  * @requires SSE4.1
260  */
261 
262 void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
263 			       unsigned char *data, unsigned char **coding);
264 
265 /**
266  * @brief Generate update for encode or decode of erasure codes from single source.
267  *
268  * Arch specific version of ec_encode_data_update() with same parameters.
269  * @requires AVX
270  */
271 
272 void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
273 			       unsigned char *data, unsigned char **coding);
274 
275 /**
276  * @brief Generate update for encode or decode of erasure codes from single source.
277  *
278  * Arch specific version of ec_encode_data_update() with same parameters.
279  * @requires AVX2
280  */
281 
282 void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
283 				unsigned char *data, unsigned char **coding);
284 
285 /**
286  * @brief GF(2^8) vector dot product.
287  *
288  * Does a GF(2^8) dot product across each byte of the input array and a constant
289  * set of coefficients to produce each byte of the output. Can be used for
290  * erasure coding encode and decode. Function requires pre-calculation of a
291  * 32*vlen byte constant array based on the input coefficients.
292  * @requires SSE4.1
293  *
294  * @param len    Length of each vector in bytes. Must be >= 16.
295  * @param vlen   Number of vector sources.
296  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
297  *               on the array of input coefficients.
298  * @param src    Array of pointers to source inputs.
299  * @param dest   Pointer to destination data array.
300  * @returns none
301  */
302 
303 void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
304 			unsigned char **src, unsigned char *dest);
305 
306 /**
307  * @brief GF(2^8) vector dot product.
308  *
309  * Does a GF(2^8) dot product across each byte of the input array and a constant
310  * set of coefficients to produce each byte of the output. Can be used for
311  * erasure coding encode and decode. Function requires pre-calculation of a
312  * 32*vlen byte constant array based on the input coefficients.
313  * @requires AVX
314  *
315  * @param len    Length of each vector in bytes. Must be >= 16.
316  * @param vlen   Number of vector sources.
317  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
318  *               on the array of input coefficients.
319  * @param src    Array of pointers to source inputs.
320  * @param dest   Pointer to destination data array.
321  * @returns none
322  */
323 
324 void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
325 			unsigned char **src, unsigned char *dest);
326 
327 /**
328  * @brief GF(2^8) vector dot product.
329  *
330  * Does a GF(2^8) dot product across each byte of the input array and a constant
331  * set of coefficients to produce each byte of the output. Can be used for
332  * erasure coding encode and decode. Function requires pre-calculation of a
333  * 32*vlen byte constant array based on the input coefficients.
334  * @requires AVX2
335  *
336  * @param len    Length of each vector in bytes. Must be >= 32.
337  * @param vlen   Number of vector sources.
338  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
339  *               on the array of input coefficients.
340  * @param src    Array of pointers to source inputs.
341  * @param dest   Pointer to destination data array.
342  * @returns none
343  */
344 
345 void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
346 			unsigned char **src, unsigned char *dest);
347 
348 /**
349  * @brief GF(2^8) vector dot product with two outputs.
350  *
351  * Vector dot product optimized to calculate two outputs at a time. Does two
352  * GF(2^8) dot products across each byte of the input array and two constant
353  * sets of coefficients to produce each byte of the outputs. Can be used for
354  * erasure coding encode and decode. Function requires pre-calculation of a
355  * 2*32*vlen byte constant array based on the two sets of input coefficients.
356  * @requires SSE4.1
357  *
358  * @param len    Length of each vector in bytes. Must be >= 16.
359  * @param vlen   Number of vector sources.
360  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
361  *               based on the array of input coefficients.
362  * @param src    Array of pointers to source inputs.
363  * @param dest   Array of pointers to destination data buffers.
364  * @returns none
365  */
366 
367 void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
368 			unsigned char **src, unsigned char **dest);
369 
370 /**
371  * @brief GF(2^8) vector dot product with two outputs.
372  *
373  * Vector dot product optimized to calculate two outputs at a time. Does two
374  * GF(2^8) dot products across each byte of the input array and two constant
375  * sets of coefficients to produce each byte of the outputs. Can be used for
376  * erasure coding encode and decode. Function requires pre-calculation of a
377  * 2*32*vlen byte constant array based on the two sets of input coefficients.
378  * @requires AVX
379  *
380  * @param len    Length of each vector in bytes. Must be >= 16.
381  * @param vlen   Number of vector sources.
382  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
383  *               based on the array of input coefficients.
384  * @param src    Array of pointers to source inputs.
385  * @param dest   Array of pointers to destination data buffers.
386  * @returns none
387  */
388 
389 void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
390 			unsigned char **src, unsigned char **dest);
391 
392 /**
393  * @brief GF(2^8) vector dot product with two outputs.
394  *
395  * Vector dot product optimized to calculate two outputs at a time. Does two
396  * GF(2^8) dot products across each byte of the input array and two constant
397  * sets of coefficients to produce each byte of the outputs. Can be used for
398  * erasure coding encode and decode. Function requires pre-calculation of a
399  * 2*32*vlen byte constant array based on the two sets of input coefficients.
400  * @requires AVX2
401  *
402  * @param len    Length of each vector in bytes. Must be >= 32.
403  * @param vlen   Number of vector sources.
404  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
405  *               based on the array of input coefficients.
406  * @param src    Array of pointers to source inputs.
407  * @param dest   Array of pointers to destination data buffers.
408  * @returns none
409  */
410 
411 void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
412 			unsigned char **src, unsigned char **dest);
413 
414 /**
415  * @brief GF(2^8) vector dot product with three outputs.
416  *
417  * Vector dot product optimized to calculate three outputs at a time. Does three
418  * GF(2^8) dot products across each byte of the input array and three constant
419  * sets of coefficients to produce each byte of the outputs. Can be used for
420  * erasure coding encode and decode. Function requires pre-calculation of a
421  * 3*32*vlen byte constant array based on the three sets of input coefficients.
422  * @requires SSE4.1
423  *
424  * @param len    Length of each vector in bytes. Must be >= 16.
425  * @param vlen   Number of vector sources.
426  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
427  *               based on the array of input coefficients.
428  * @param src    Array of pointers to source inputs.
429  * @param dest   Array of pointers to destination data buffers.
430  * @returns none
431  */
432 
433 void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
434 			unsigned char **src, unsigned char **dest);
435 
436 /**
437  * @brief GF(2^8) vector dot product with three outputs.
438  *
439  * Vector dot product optimized to calculate three outputs at a time. Does three
440  * GF(2^8) dot products across each byte of the input array and three constant
441  * sets of coefficients to produce each byte of the outputs. Can be used for
442  * erasure coding encode and decode. Function requires pre-calculation of a
443  * 3*32*vlen byte constant array based on the three sets of input coefficients.
444  * @requires AVX
445  *
446  * @param len    Length of each vector in bytes. Must be >= 16.
447  * @param vlen   Number of vector sources.
448  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
449  *               based on the array of input coefficients.
450  * @param src    Array of pointers to source inputs.
451  * @param dest   Array of pointers to destination data buffers.
452  * @returns none
453  */
454 
455 void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
456 			unsigned char **src, unsigned char **dest);
457 
458 /**
459  * @brief GF(2^8) vector dot product with three outputs.
460  *
461  * Vector dot product optimized to calculate three outputs at a time. Does three
462  * GF(2^8) dot products across each byte of the input array and three constant
463  * sets of coefficients to produce each byte of the outputs. Can be used for
464  * erasure coding encode and decode. Function requires pre-calculation of a
465  * 3*32*vlen byte constant array based on the three sets of input coefficients.
466  * @requires AVX2
467  *
468  * @param len    Length of each vector in bytes. Must be >= 32.
469  * @param vlen   Number of vector sources.
470  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
471  *               based on the array of input coefficients.
472  * @param src    Array of pointers to source inputs.
473  * @param dest   Array of pointers to destination data buffers.
474  * @returns none
475  */
476 
477 void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
478 			unsigned char **src, unsigned char **dest);
479 
480 /**
481  * @brief GF(2^8) vector dot product with four outputs.
482  *
483  * Vector dot product optimized to calculate four outputs at a time. Does four
484  * GF(2^8) dot products across each byte of the input array and four constant
485  * sets of coefficients to produce each byte of the outputs. Can be used for
486  * erasure coding encode and decode. Function requires pre-calculation of a
487  * 4*32*vlen byte constant array based on the four sets of input coefficients.
488  * @requires SSE4.1
489  *
490  * @param len    Length of each vector in bytes. Must be >= 16.
491  * @param vlen   Number of vector sources.
492  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
493  *               based on the array of input coefficients.
494  * @param src    Array of pointers to source inputs.
495  * @param dest   Array of pointers to destination data buffers.
496  * @returns none
497  */
498 
499 void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
500 			unsigned char **src, unsigned char **dest);
501 
502 /**
503  * @brief GF(2^8) vector dot product with four outputs.
504  *
505  * Vector dot product optimized to calculate four outputs at a time. Does four
506  * GF(2^8) dot products across each byte of the input array and four constant
507  * sets of coefficients to produce each byte of the outputs. Can be used for
508  * erasure coding encode and decode. Function requires pre-calculation of a
509  * 4*32*vlen byte constant array based on the four sets of input coefficients.
510  * @requires AVX
511  *
512  * @param len    Length of each vector in bytes. Must be >= 16.
513  * @param vlen   Number of vector sources.
514  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
515  *               based on the array of input coefficients.
516  * @param src    Array of pointers to source inputs.
517  * @param dest   Array of pointers to destination data buffers.
518  * @returns none
519  */
520 
521 void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
522 			unsigned char **src, unsigned char **dest);
523 
524 /**
525  * @brief GF(2^8) vector dot product with four outputs.
526  *
527  * Vector dot product optimized to calculate four outputs at a time. Does four
528  * GF(2^8) dot products across each byte of the input array and four constant
529  * sets of coefficients to produce each byte of the outputs. Can be used for
530  * erasure coding encode and decode. Function requires pre-calculation of a
531  * 4*32*vlen byte constant array based on the four sets of input coefficients.
532  * @requires AVX2
533  *
534  * @param len    Length of each vector in bytes. Must be >= 32.
535  * @param vlen   Number of vector sources.
536  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
537  *               based on the array of input coefficients.
538  * @param src    Array of pointers to source inputs.
539  * @param dest   Array of pointers to destination data buffers.
540  * @returns none
541  */
542 
543 void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
544 			unsigned char **src, unsigned char **dest);
545 
546 /**
547  * @brief GF(2^8) vector dot product with five outputs.
548  *
549  * Vector dot product optimized to calculate five outputs at a time. Does five
550  * GF(2^8) dot products across each byte of the input array and five constant
551  * sets of coefficients to produce each byte of the outputs. Can be used for
552  * erasure coding encode and decode. Function requires pre-calculation of a
553  * 5*32*vlen byte constant array based on the five sets of input coefficients.
554  * @requires SSE4.1
555  *
556  * @param len    Length of each vector in bytes. Must >= 16.
557  * @param vlen   Number of vector sources.
558  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
559  *               based on the array of input coefficients.
560  * @param src    Array of pointers to source inputs.
561  * @param dest   Array of pointers to destination data buffers.
562  * @returns none
563  */
564 
565 void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
566 			unsigned char **src, unsigned char **dest);
567 
568 /**
569  * @brief GF(2^8) vector dot product with five outputs.
570  *
571  * Vector dot product optimized to calculate five outputs at a time. Does five
572  * GF(2^8) dot products across each byte of the input array and five constant
573  * sets of coefficients to produce each byte of the outputs. Can be used for
574  * erasure coding encode and decode. Function requires pre-calculation of a
575  * 5*32*vlen byte constant array based on the five sets of input coefficients.
576  * @requires AVX
577  *
578  * @param len    Length of each vector in bytes. Must >= 16.
579  * @param vlen   Number of vector sources.
580  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
581  *               based on the array of input coefficients.
582  * @param src    Array of pointers to source inputs.
583  * @param dest   Array of pointers to destination data buffers.
584  * @returns none
585  */
586 
587 void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
588 			unsigned char **src, unsigned char **dest);
589 
590 /**
591  * @brief GF(2^8) vector dot product with five outputs.
592  *
593  * Vector dot product optimized to calculate five outputs at a time. Does five
594  * GF(2^8) dot products across each byte of the input array and five constant
595  * sets of coefficients to produce each byte of the outputs. Can be used for
596  * erasure coding encode and decode. Function requires pre-calculation of a
597  * 5*32*vlen byte constant array based on the five sets of input coefficients.
598  * @requires AVX2
599  *
600  * @param len    Length of each vector in bytes. Must >= 32.
601  * @param vlen   Number of vector sources.
602  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
603  *               based on the array of input coefficients.
604  * @param src    Array of pointers to source inputs.
605  * @param dest   Array of pointers to destination data buffers.
606  * @returns none
607  */
608 
609 void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
610 			unsigned char **src, unsigned char **dest);
611 
612 /**
613  * @brief GF(2^8) vector dot product with six outputs.
614  *
615  * Vector dot product optimized to calculate six outputs at a time. Does six
616  * GF(2^8) dot products across each byte of the input array and six constant
617  * sets of coefficients to produce each byte of the outputs. Can be used for
618  * erasure coding encode and decode. Function requires pre-calculation of a
619  * 6*32*vlen byte constant array based on the six sets of input coefficients.
620  * @requires SSE4.1
621  *
622  * @param len    Length of each vector in bytes. Must be >= 16.
623  * @param vlen   Number of vector sources.
624  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
625  *               based on the array of input coefficients.
626  * @param src    Array of pointers to source inputs.
627  * @param dest   Array of pointers to destination data buffers.
628  * @returns none
629  */
630 
631 void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
632 			unsigned char **src, unsigned char **dest);
633 
634 /**
635  * @brief GF(2^8) vector dot product with six outputs.
636  *
637  * Vector dot product optimized to calculate six outputs at a time. Does six
638  * GF(2^8) dot products across each byte of the input array and six constant
639  * sets of coefficients to produce each byte of the outputs. Can be used for
640  * erasure coding encode and decode. Function requires pre-calculation of a
641  * 6*32*vlen byte constant array based on the six sets of input coefficients.
642  * @requires AVX
643  *
644  * @param len    Length of each vector in bytes. Must be >= 16.
645  * @param vlen   Number of vector sources.
646  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
647  *               based on the array of input coefficients.
648  * @param src    Array of pointers to source inputs.
649  * @param dest   Array of pointers to destination data buffers.
650  * @returns none
651  */
652 
653 void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
654 			unsigned char **src, unsigned char **dest);
655 
656 /**
657  * @brief GF(2^8) vector dot product with six outputs.
658  *
659  * Vector dot product optimized to calculate six outputs at a time. Does six
660  * GF(2^8) dot products across each byte of the input array and six constant
661  * sets of coefficients to produce each byte of the outputs. Can be used for
662  * erasure coding encode and decode. Function requires pre-calculation of a
663  * 6*32*vlen byte constant array based on the six sets of input coefficients.
664  * @requires AVX2
665  *
666  * @param len    Length of each vector in bytes. Must be >= 32.
667  * @param vlen   Number of vector sources.
668  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
669  *               based on the array of input coefficients.
670  * @param src    Array of pointers to source inputs.
671  * @param dest   Array of pointers to destination data buffers.
672  * @returns none
673  */
674 
675 void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
676 			unsigned char **src, unsigned char **dest);
677 
678 /**
679  * @brief GF(2^8) vector multiply accumulate, arch specific version.
680  *
681  * Arch specific version of gf_vect_mad() with same parameters.
682  * @requires SSE4.1
683  */
684 
685 void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
686 		     unsigned char *dest);
687 /**
688  * @brief GF(2^8) vector multiply accumulate, arch specific version.
689  *
690  * Arch specific version of gf_vect_mad() with same parameters.
691  * @requires AVX
692  */
693 
694 void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
695 		     unsigned char *dest);
696 
697 /**
698  * @brief GF(2^8) vector multiply accumulate, arch specific version.
699  *
700  * Arch specific version of gf_vect_mad() with same parameters.
701  * @requires AVX2
702  */
703 
704 void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
705 		      unsigned char *dest);
706 
707 
708 /**
709  * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
710  *
711  * Does a GF(2^8) multiply across each byte of input source with expanded
712  * constants and add to destination arrays. Can be used for erasure coding
713  * encode and decode update when only one source is available at a
714  * time. Function requires pre-calculation of a 32*vec byte constant array based
715  * on the input coefficients.
716  * @requires SSE4.1
717  *
718  * @param len    Length of each vector in bytes. Must be >= 32.
719  * @param vec    The number of vector sources or rows in the generator matrix
720  * 		 for coding.
721  * @param vec_i  The vector index corresponding to the single input source.
722  * @param gftbls Pointer to array of input tables generated from coding
723  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
724  * @param src    Pointer to source input array.
725  * @param dest   Array of pointers to destination input/outputs.
726  * @returns none
727  */
728 
729 void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
730 		      unsigned char **dest);
731 
732 /**
733  * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
734  * @requires AVX
735  */
736 void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
737 		      unsigned char **dest);
738 /**
739  * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
740  * @requires AVX2
741  */
742 void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
743 		       unsigned char **dest);
744 
745 /**
746  * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
747  *
748  * Does a GF(2^8) multiply across each byte of input source with expanded
749  * constants and add to destination arrays. Can be used for erasure coding
750  * encode and decode update when only one source is available at a
751  * time. Function requires pre-calculation of a 32*vec byte constant array based
752  * on the input coefficients.
753  * @requires SSE4.1
754  *
755  * @param len    Length of each vector in bytes. Must be >= 32.
756  * @param vec    The number of vector sources or rows in the generator matrix
757  * 		 for coding.
758  * @param vec_i  The vector index corresponding to the single input source.
759  * @param gftbls Pointer to array of input tables generated from coding
760  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
761  * @param src    Pointer to source input array.
762  * @param dest   Array of pointers to destination input/outputs.
763  * @returns none
764  */
765 
766 void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
767 		      unsigned char **dest);
768 
769 /**
770  * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
771  * @requires AVX
772  */
773 void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
774 		      unsigned char **dest);
775 
776 /**
777  * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
778  * @requires AVX2
779  */
780 void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
781 		       unsigned char **dest);
782 
783 /**
784  * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
785  *
786  * Does a GF(2^8) multiply across each byte of input source with expanded
787  * constants and add to destination arrays. Can be used for erasure coding
788  * encode and decode update when only one source is available at a
789  * time. Function requires pre-calculation of a 32*vec byte constant array based
790  * on the input coefficients.
791  * @requires SSE4.1
792  *
793  * @param len    Length of each vector in bytes. Must be >= 32.
794  * @param vec    The number of vector sources or rows in the generator matrix
795  * 		 for coding.
796  * @param vec_i  The vector index corresponding to the single input source.
797  * @param gftbls Pointer to array of input tables generated from coding
798  * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
799  * @param src    Pointer to source input array.
800  * @param dest   Array of pointers to destination input/outputs.
801  * @returns none
802  */
803 
804 void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
805 		      unsigned char **dest);
806 
807 /**
808  * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
809  * @requires AVX
810  */
811 void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
812 		      unsigned char **dest);
813 /**
814  * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
815  * @requires AVX2
816  */
817 void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
818 		       unsigned char **dest);
819 
820 /**
821  * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
822  * @requires SSE4.1
823  */
824 void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
825 		      unsigned char **dest);
826 
827 /**
828  * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
829  * @requires AVX
830  */
831 void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
832 		      unsigned char **dest);
833 /**
834  * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
835  * @requires AVX2
836  */
837 void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
838 		       unsigned char **dest);
839 
840 /**
841  * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
842  * @requires SSE4.1
843  */
844 void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
845 		      unsigned char **dest);
846 /**
847  * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
848  * @requires AVX
849  */
850 void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
851 		      unsigned char **dest);
852 
853 /**
854  * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
855  * @requires AVX2
856  */
857 void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
858 		       unsigned char **dest);
859 
860 #endif
861 
862 /**********************************************************************
863  * The remaining are lib support functions used in GF(2^8) operations.
864  */
865 
866 /**
867  * @brief Single element GF(2^8) multiply.
868  *
869  * @param a  Multiplicand a
870  * @param b  Multiplicand b
871  * @returns  Product of a and b in GF(2^8)
872  */
873 
874 unsigned char gf_mul(unsigned char a, unsigned char b);
875 
876 /**
877  * @brief Single element GF(2^8) inverse.
878  *
879  * @param a  Input element
880  * @returns  Field element b such that a x b = {1}
881  */
882 
883 unsigned char gf_inv(unsigned char a);
884 
885 /**
886  * @brief Generate a matrix of coefficients to be used for encoding.
887  *
888  * Vandermonde matrix example of encoding coefficients where high portion of
889  * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
890  * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
891  * erasure encoding but does not guarantee invertable for every sub matrix. For
892  * large pairs of m and k it is possible to find cases where the decode matrix
893  * chosen from sources and parity is not invertable. Users may want to adjust
894  * for certain pairs m and k. If m and k satisfy one of the following
895  * inequalities, no adjustment is required:
896  *
897  * - k <= 3
898  * - k = 4, m <= 25
899  * - k = 5, m <= 10
900  * - k <= 21, m-k = 4
901  * - m - k <= 3.
902  *
903  * @param a  [m x k] array to hold coefficients
904  * @param m  number of rows in matrix corresponding to srcs + parity.
905  * @param k  number of columns in matrix corresponding to srcs.
906  * @returns  none
907  */
908 
909 void gf_gen_rs_matrix(unsigned char *a, int m, int k);
910 
911 /**
912  * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
913  *
914  * Cauchy matrix example of encoding coefficients where high portion of matrix
915  * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
916  * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
917  *
918  * @param a  [m x k] array to hold coefficients
919  * @param m  number of rows in matrix corresponding to srcs + parity.
920  * @param k  number of columns in matrix corresponding to srcs.
921  * @returns  none
922  */
923 
924 void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
925 
926 /**
927  * @brief Invert a matrix in GF(2^8)
928  *
929  * @param in  input matrix
930  * @param out output matrix such that [in] x [out] = [I] - identity matrix
931  * @param n   size of matrix [nxn]
932  * @returns 0 successful, other fail on singular input matrix
933  */
934 
935 int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
936 
937 
938 /*************************************************************/
939 
940 #ifdef __cplusplus
941 }
942 #endif
943 
944 #endif //_ERASURE_CODE_H_
945