1 /********************************************************************** 2 Copyright(c) 2011-2015 Intel Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 **********************************************************************/ 29 30 31 #ifndef _ERASURE_CODE_H_ 32 #define _ERASURE_CODE_H_ 33 34 /** 35 * @file erasure_code.h 36 * @brief Interface to functions supporting erasure code encode and decode. 37 * 38 * This file defines the interface to optimized functions used in erasure 39 * codes. Encode and decode of erasures in GF(2^8) are made by calculating the 40 * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a 41 * set of coefficients. Values for the coefficients are determined by the type 42 * of erasure code. Using a general dot product means that any sequence of 43 * coefficients may be used including erasure codes based on random 44 * coefficients. 45 * Multiple versions of dot product are supplied to calculate 1-6 output 46 * vectors in one pass. 47 * Base GF multiply and divide functions can be sped up by defining 48 * GF_LARGE_TABLES at the expense of memory size. 49 * 50 */ 51 52 #include "gf_vect_mul.h" 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif 57 58 /** 59 * @brief Initialize tables for fast Erasure Code encode and decode. 60 * 61 * Generates the expanded tables needed for fast encode or decode for erasure 62 * codes on blocks of data. 32bytes is generated for each input coefficient. 63 * 64 * @param k The number of vector sources or rows in the generator matrix 65 * for coding. 66 * @param rows The number of output vectors to concurrently encode/decode. 67 * @param a Pointer to sets of arrays of input coefficients used to encode 68 * or decode data. 69 * @param gftbls Pointer to start of space for concatenated output tables 70 * generated from input coefficients. Must be of size 32*k*rows. 71 * @returns none 72 */ 73 74 void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); 75 76 /** 77 * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. 78 * 79 * Given a list of source data blocks, generate one or multiple blocks of 80 * encoded data as specified by a matrix of GF(2^8) coefficients. When given a 81 * suitable set of coefficients, this function will perform the fast generation 82 * or decoding of Reed-Solomon type erasure codes. 83 * 84 * This function determines what instruction sets are enabled and 85 * selects the appropriate version at runtime. 86 * 87 * @param len Length of each block of data (vector) of source or dest data. 88 * @param k The number of vector sources or rows in the generator matrix 89 * for coding. 90 * @param rows The number of output vectors to concurrently encode/decode. 91 * @param gftbls Pointer to array of input tables generated from coding 92 * coefficients in ec_init_tables(). Must be of size 32*k*rows 93 * @param data Array of pointers to source input buffers. 94 * @param coding Array of pointers to coded output buffers. 95 * @returns none 96 */ 97 98 void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 99 unsigned char **coding); 100 101 /** 102 * @brief Generate or decode erasure codes on blocks of data, runs baseline version. 103 * 104 * Baseline version of ec_encode_data() with same parameters. 105 */ 106 void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, 107 unsigned char **dest); 108 109 /** 110 * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version. 111 * 112 * Given one source data block, update one or multiple blocks of encoded data as 113 * specified by a matrix of GF(2^8) coefficients. When given a suitable set of 114 * coefficients, this function will perform the fast generation or decoding of 115 * Reed-Solomon type erasure codes from one input source at a time. 116 * 117 * This function determines what instruction sets are enabled and selects the 118 * appropriate version at runtime. 119 * 120 * @param len Length of each block of data (vector) of source or dest data. 121 * @param k The number of vector sources or rows in the generator matrix 122 * for coding. 123 * @param rows The number of output vectors to concurrently encode/decode. 124 * @param vec_i The vector index corresponding to the single input source. 125 * @param g_tbls Pointer to array of input tables generated from coding 126 * coefficients in ec_init_tables(). Must be of size 32*k*rows 127 * @param data Pointer to single input source used to update output parity. 128 * @param coding Array of pointers to coded output buffers. 129 * @returns none 130 */ 131 void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 132 unsigned char *data, unsigned char **coding); 133 134 /** 135 * @brief Generate update for encode or decode of erasure codes from single source. 136 * 137 * Baseline version of ec_encode_data_update(). 138 */ 139 140 void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, 141 unsigned char *data, unsigned char **dest); 142 143 /** 144 * @brief GF(2^8) vector dot product, runs baseline version. 145 * 146 * Does a GF(2^8) dot product across each byte of the input array and a constant 147 * set of coefficients to produce each byte of the output. Can be used for 148 * erasure coding encode and decode. Function requires pre-calculation of a 149 * 32*vlen byte constant array based on the input coefficients. 150 * 151 * @param len Length of each vector in bytes. Must be >= 16. 152 * @param vlen Number of vector sources. 153 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 154 * on the array of input coefficients. Only elements 32*CONST*j + 1 155 * of this array are used, where j = (0, 1, 2...) and CONST is the 156 * number of elements in the array of input coefficients. The 157 * elements used correspond to the original input coefficients. 158 * @param src Array of pointers to source inputs. 159 * @param dest Pointer to destination data array. 160 * @returns none 161 */ 162 163 164 void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, 165 unsigned char **src, unsigned char *dest); 166 167 /** 168 * @brief GF(2^8) vector dot product, runs appropriate version. 169 * 170 * Does a GF(2^8) dot product across each byte of the input array and a constant 171 * set of coefficients to produce each byte of the output. Can be used for 172 * erasure coding encode and decode. Function requires pre-calculation of a 173 * 32*vlen byte constant array based on the input coefficients. 174 * 175 * This function determines what instruction sets are enabled and 176 * selects the appropriate version at runtime. 177 * 178 * @param len Length of each vector in bytes. Must be >= 32. 179 * @param vlen Number of vector sources. 180 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 181 * on the array of input coefficients. 182 * @param src Array of pointers to source inputs. 183 * @param dest Pointer to destination data array. 184 * @returns none 185 */ 186 187 void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, 188 unsigned char **src, unsigned char *dest); 189 190 /** 191 * @brief GF(2^8) vector multiply accumulate, runs appropriate version. 192 * 193 * Does a GF(2^8) multiply across each byte of input source with expanded 194 * constant and add to destination array. Can be used for erasure coding encode 195 * and decode update when only one source is available at a time. Function 196 * requires pre-calculation of a 32*vec byte constant array based on the input 197 * coefficients. 198 * 199 * This function determines what instruction sets are enabled and selects the 200 * appropriate version at runtime. 201 * 202 * @param len Length of each vector in bytes. Must be >= 32. 203 * @param vec The number of vector sources or rows in the generator matrix 204 * for coding. 205 * @param vec_i The vector index corresponding to the single input source. 206 * @param gftbls Pointer to array of input tables generated from coding 207 * coefficients in ec_init_tables(). Must be of size 32*vec. 208 * @param src Array of pointers to source inputs. 209 * @param dest Pointer to destination data array. 210 * @returns none 211 */ 212 213 void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 214 unsigned char *dest); 215 216 /** 217 * @brief GF(2^8) vector multiply accumulate, baseline version. 218 * 219 * Baseline version of gf_vect_mad() with same parameters. 220 */ 221 222 void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, 223 unsigned char *dest); 224 225 // x86 only 226 #if defined(__i386__) || defined(__x86_64__) 227 228 /** 229 * @brief Generate or decode erasure codes on blocks of data. 230 * 231 * Arch specific version of ec_encode_data() with same parameters. 232 * @requires SSE4.1 233 */ 234 void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 235 unsigned char **coding); 236 237 /** 238 * @brief Generate or decode erasure codes on blocks of data. 239 * 240 * Arch specific version of ec_encode_data() with same parameters. 241 * @requires AVX 242 */ 243 void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 244 unsigned char **coding); 245 246 /** 247 * @brief Generate or decode erasure codes on blocks of data. 248 * 249 * Arch specific version of ec_encode_data() with same parameters. 250 * @requires AVX2 251 */ 252 void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 253 unsigned char **coding); 254 255 /** 256 * @brief Generate update for encode or decode of erasure codes from single source. 257 * 258 * Arch specific version of ec_encode_data_update() with same parameters. 259 * @requires SSE4.1 260 */ 261 262 void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 263 unsigned char *data, unsigned char **coding); 264 265 /** 266 * @brief Generate update for encode or decode of erasure codes from single source. 267 * 268 * Arch specific version of ec_encode_data_update() with same parameters. 269 * @requires AVX 270 */ 271 272 void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 273 unsigned char *data, unsigned char **coding); 274 275 /** 276 * @brief Generate update for encode or decode of erasure codes from single source. 277 * 278 * Arch specific version of ec_encode_data_update() with same parameters. 279 * @requires AVX2 280 */ 281 282 void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 283 unsigned char *data, unsigned char **coding); 284 285 /** 286 * @brief GF(2^8) vector dot product. 287 * 288 * Does a GF(2^8) dot product across each byte of the input array and a constant 289 * set of coefficients to produce each byte of the output. Can be used for 290 * erasure coding encode and decode. Function requires pre-calculation of a 291 * 32*vlen byte constant array based on the input coefficients. 292 * @requires SSE4.1 293 * 294 * @param len Length of each vector in bytes. Must be >= 16. 295 * @param vlen Number of vector sources. 296 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 297 * on the array of input coefficients. 298 * @param src Array of pointers to source inputs. 299 * @param dest Pointer to destination data array. 300 * @returns none 301 */ 302 303 void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 304 unsigned char **src, unsigned char *dest); 305 306 /** 307 * @brief GF(2^8) vector dot product. 308 * 309 * Does a GF(2^8) dot product across each byte of the input array and a constant 310 * set of coefficients to produce each byte of the output. Can be used for 311 * erasure coding encode and decode. Function requires pre-calculation of a 312 * 32*vlen byte constant array based on the input coefficients. 313 * @requires AVX 314 * 315 * @param len Length of each vector in bytes. Must be >= 16. 316 * @param vlen Number of vector sources. 317 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 318 * on the array of input coefficients. 319 * @param src Array of pointers to source inputs. 320 * @param dest Pointer to destination data array. 321 * @returns none 322 */ 323 324 void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 325 unsigned char **src, unsigned char *dest); 326 327 /** 328 * @brief GF(2^8) vector dot product. 329 * 330 * Does a GF(2^8) dot product across each byte of the input array and a constant 331 * set of coefficients to produce each byte of the output. Can be used for 332 * erasure coding encode and decode. Function requires pre-calculation of a 333 * 32*vlen byte constant array based on the input coefficients. 334 * @requires AVX2 335 * 336 * @param len Length of each vector in bytes. Must be >= 32. 337 * @param vlen Number of vector sources. 338 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 339 * on the array of input coefficients. 340 * @param src Array of pointers to source inputs. 341 * @param dest Pointer to destination data array. 342 * @returns none 343 */ 344 345 void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 346 unsigned char **src, unsigned char *dest); 347 348 /** 349 * @brief GF(2^8) vector dot product with two outputs. 350 * 351 * Vector dot product optimized to calculate two outputs at a time. Does two 352 * GF(2^8) dot products across each byte of the input array and two constant 353 * sets of coefficients to produce each byte of the outputs. Can be used for 354 * erasure coding encode and decode. Function requires pre-calculation of a 355 * 2*32*vlen byte constant array based on the two sets of input coefficients. 356 * @requires SSE4.1 357 * 358 * @param len Length of each vector in bytes. Must be >= 16. 359 * @param vlen Number of vector sources. 360 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 361 * based on the array of input coefficients. 362 * @param src Array of pointers to source inputs. 363 * @param dest Array of pointers to destination data buffers. 364 * @returns none 365 */ 366 367 void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 368 unsigned char **src, unsigned char **dest); 369 370 /** 371 * @brief GF(2^8) vector dot product with two outputs. 372 * 373 * Vector dot product optimized to calculate two outputs at a time. Does two 374 * GF(2^8) dot products across each byte of the input array and two constant 375 * sets of coefficients to produce each byte of the outputs. Can be used for 376 * erasure coding encode and decode. Function requires pre-calculation of a 377 * 2*32*vlen byte constant array based on the two sets of input coefficients. 378 * @requires AVX 379 * 380 * @param len Length of each vector in bytes. Must be >= 16. 381 * @param vlen Number of vector sources. 382 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 383 * based on the array of input coefficients. 384 * @param src Array of pointers to source inputs. 385 * @param dest Array of pointers to destination data buffers. 386 * @returns none 387 */ 388 389 void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 390 unsigned char **src, unsigned char **dest); 391 392 /** 393 * @brief GF(2^8) vector dot product with two outputs. 394 * 395 * Vector dot product optimized to calculate two outputs at a time. Does two 396 * GF(2^8) dot products across each byte of the input array and two constant 397 * sets of coefficients to produce each byte of the outputs. Can be used for 398 * erasure coding encode and decode. Function requires pre-calculation of a 399 * 2*32*vlen byte constant array based on the two sets of input coefficients. 400 * @requires AVX2 401 * 402 * @param len Length of each vector in bytes. Must be >= 32. 403 * @param vlen Number of vector sources. 404 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 405 * based on the array of input coefficients. 406 * @param src Array of pointers to source inputs. 407 * @param dest Array of pointers to destination data buffers. 408 * @returns none 409 */ 410 411 void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 412 unsigned char **src, unsigned char **dest); 413 414 /** 415 * @brief GF(2^8) vector dot product with three outputs. 416 * 417 * Vector dot product optimized to calculate three outputs at a time. Does three 418 * GF(2^8) dot products across each byte of the input array and three constant 419 * sets of coefficients to produce each byte of the outputs. Can be used for 420 * erasure coding encode and decode. Function requires pre-calculation of a 421 * 3*32*vlen byte constant array based on the three sets of input coefficients. 422 * @requires SSE4.1 423 * 424 * @param len Length of each vector in bytes. Must be >= 16. 425 * @param vlen Number of vector sources. 426 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 427 * based on the array of input coefficients. 428 * @param src Array of pointers to source inputs. 429 * @param dest Array of pointers to destination data buffers. 430 * @returns none 431 */ 432 433 void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 434 unsigned char **src, unsigned char **dest); 435 436 /** 437 * @brief GF(2^8) vector dot product with three outputs. 438 * 439 * Vector dot product optimized to calculate three outputs at a time. Does three 440 * GF(2^8) dot products across each byte of the input array and three constant 441 * sets of coefficients to produce each byte of the outputs. Can be used for 442 * erasure coding encode and decode. Function requires pre-calculation of a 443 * 3*32*vlen byte constant array based on the three sets of input coefficients. 444 * @requires AVX 445 * 446 * @param len Length of each vector in bytes. Must be >= 16. 447 * @param vlen Number of vector sources. 448 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 449 * based on the array of input coefficients. 450 * @param src Array of pointers to source inputs. 451 * @param dest Array of pointers to destination data buffers. 452 * @returns none 453 */ 454 455 void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 456 unsigned char **src, unsigned char **dest); 457 458 /** 459 * @brief GF(2^8) vector dot product with three outputs. 460 * 461 * Vector dot product optimized to calculate three outputs at a time. Does three 462 * GF(2^8) dot products across each byte of the input array and three constant 463 * sets of coefficients to produce each byte of the outputs. Can be used for 464 * erasure coding encode and decode. Function requires pre-calculation of a 465 * 3*32*vlen byte constant array based on the three sets of input coefficients. 466 * @requires AVX2 467 * 468 * @param len Length of each vector in bytes. Must be >= 32. 469 * @param vlen Number of vector sources. 470 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 471 * based on the array of input coefficients. 472 * @param src Array of pointers to source inputs. 473 * @param dest Array of pointers to destination data buffers. 474 * @returns none 475 */ 476 477 void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 478 unsigned char **src, unsigned char **dest); 479 480 /** 481 * @brief GF(2^8) vector dot product with four outputs. 482 * 483 * Vector dot product optimized to calculate four outputs at a time. Does four 484 * GF(2^8) dot products across each byte of the input array and four constant 485 * sets of coefficients to produce each byte of the outputs. Can be used for 486 * erasure coding encode and decode. Function requires pre-calculation of a 487 * 4*32*vlen byte constant array based on the four sets of input coefficients. 488 * @requires SSE4.1 489 * 490 * @param len Length of each vector in bytes. Must be >= 16. 491 * @param vlen Number of vector sources. 492 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 493 * based on the array of input coefficients. 494 * @param src Array of pointers to source inputs. 495 * @param dest Array of pointers to destination data buffers. 496 * @returns none 497 */ 498 499 void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 500 unsigned char **src, unsigned char **dest); 501 502 /** 503 * @brief GF(2^8) vector dot product with four outputs. 504 * 505 * Vector dot product optimized to calculate four outputs at a time. Does four 506 * GF(2^8) dot products across each byte of the input array and four constant 507 * sets of coefficients to produce each byte of the outputs. Can be used for 508 * erasure coding encode and decode. Function requires pre-calculation of a 509 * 4*32*vlen byte constant array based on the four sets of input coefficients. 510 * @requires AVX 511 * 512 * @param len Length of each vector in bytes. Must be >= 16. 513 * @param vlen Number of vector sources. 514 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 515 * based on the array of input coefficients. 516 * @param src Array of pointers to source inputs. 517 * @param dest Array of pointers to destination data buffers. 518 * @returns none 519 */ 520 521 void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 522 unsigned char **src, unsigned char **dest); 523 524 /** 525 * @brief GF(2^8) vector dot product with four outputs. 526 * 527 * Vector dot product optimized to calculate four outputs at a time. Does four 528 * GF(2^8) dot products across each byte of the input array and four constant 529 * sets of coefficients to produce each byte of the outputs. Can be used for 530 * erasure coding encode and decode. Function requires pre-calculation of a 531 * 4*32*vlen byte constant array based on the four sets of input coefficients. 532 * @requires AVX2 533 * 534 * @param len Length of each vector in bytes. Must be >= 32. 535 * @param vlen Number of vector sources. 536 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 537 * based on the array of input coefficients. 538 * @param src Array of pointers to source inputs. 539 * @param dest Array of pointers to destination data buffers. 540 * @returns none 541 */ 542 543 void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 544 unsigned char **src, unsigned char **dest); 545 546 /** 547 * @brief GF(2^8) vector dot product with five outputs. 548 * 549 * Vector dot product optimized to calculate five outputs at a time. Does five 550 * GF(2^8) dot products across each byte of the input array and five constant 551 * sets of coefficients to produce each byte of the outputs. Can be used for 552 * erasure coding encode and decode. Function requires pre-calculation of a 553 * 5*32*vlen byte constant array based on the five sets of input coefficients. 554 * @requires SSE4.1 555 * 556 * @param len Length of each vector in bytes. Must >= 16. 557 * @param vlen Number of vector sources. 558 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 559 * based on the array of input coefficients. 560 * @param src Array of pointers to source inputs. 561 * @param dest Array of pointers to destination data buffers. 562 * @returns none 563 */ 564 565 void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 566 unsigned char **src, unsigned char **dest); 567 568 /** 569 * @brief GF(2^8) vector dot product with five outputs. 570 * 571 * Vector dot product optimized to calculate five outputs at a time. Does five 572 * GF(2^8) dot products across each byte of the input array and five constant 573 * sets of coefficients to produce each byte of the outputs. Can be used for 574 * erasure coding encode and decode. Function requires pre-calculation of a 575 * 5*32*vlen byte constant array based on the five sets of input coefficients. 576 * @requires AVX 577 * 578 * @param len Length of each vector in bytes. Must >= 16. 579 * @param vlen Number of vector sources. 580 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 581 * based on the array of input coefficients. 582 * @param src Array of pointers to source inputs. 583 * @param dest Array of pointers to destination data buffers. 584 * @returns none 585 */ 586 587 void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 588 unsigned char **src, unsigned char **dest); 589 590 /** 591 * @brief GF(2^8) vector dot product with five outputs. 592 * 593 * Vector dot product optimized to calculate five outputs at a time. Does five 594 * GF(2^8) dot products across each byte of the input array and five constant 595 * sets of coefficients to produce each byte of the outputs. Can be used for 596 * erasure coding encode and decode. Function requires pre-calculation of a 597 * 5*32*vlen byte constant array based on the five sets of input coefficients. 598 * @requires AVX2 599 * 600 * @param len Length of each vector in bytes. Must >= 32. 601 * @param vlen Number of vector sources. 602 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 603 * based on the array of input coefficients. 604 * @param src Array of pointers to source inputs. 605 * @param dest Array of pointers to destination data buffers. 606 * @returns none 607 */ 608 609 void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 610 unsigned char **src, unsigned char **dest); 611 612 /** 613 * @brief GF(2^8) vector dot product with six outputs. 614 * 615 * Vector dot product optimized to calculate six outputs at a time. Does six 616 * GF(2^8) dot products across each byte of the input array and six constant 617 * sets of coefficients to produce each byte of the outputs. Can be used for 618 * erasure coding encode and decode. Function requires pre-calculation of a 619 * 6*32*vlen byte constant array based on the six sets of input coefficients. 620 * @requires SSE4.1 621 * 622 * @param len Length of each vector in bytes. Must be >= 16. 623 * @param vlen Number of vector sources. 624 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 625 * based on the array of input coefficients. 626 * @param src Array of pointers to source inputs. 627 * @param dest Array of pointers to destination data buffers. 628 * @returns none 629 */ 630 631 void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, 632 unsigned char **src, unsigned char **dest); 633 634 /** 635 * @brief GF(2^8) vector dot product with six outputs. 636 * 637 * Vector dot product optimized to calculate six outputs at a time. Does six 638 * GF(2^8) dot products across each byte of the input array and six constant 639 * sets of coefficients to produce each byte of the outputs. Can be used for 640 * erasure coding encode and decode. Function requires pre-calculation of a 641 * 6*32*vlen byte constant array based on the six sets of input coefficients. 642 * @requires AVX 643 * 644 * @param len Length of each vector in bytes. Must be >= 16. 645 * @param vlen Number of vector sources. 646 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 647 * based on the array of input coefficients. 648 * @param src Array of pointers to source inputs. 649 * @param dest Array of pointers to destination data buffers. 650 * @returns none 651 */ 652 653 void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, 654 unsigned char **src, unsigned char **dest); 655 656 /** 657 * @brief GF(2^8) vector dot product with six outputs. 658 * 659 * Vector dot product optimized to calculate six outputs at a time. Does six 660 * GF(2^8) dot products across each byte of the input array and six constant 661 * sets of coefficients to produce each byte of the outputs. Can be used for 662 * erasure coding encode and decode. Function requires pre-calculation of a 663 * 6*32*vlen byte constant array based on the six sets of input coefficients. 664 * @requires AVX2 665 * 666 * @param len Length of each vector in bytes. Must be >= 32. 667 * @param vlen Number of vector sources. 668 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 669 * based on the array of input coefficients. 670 * @param src Array of pointers to source inputs. 671 * @param dest Array of pointers to destination data buffers. 672 * @returns none 673 */ 674 675 void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, 676 unsigned char **src, unsigned char **dest); 677 678 /** 679 * @brief GF(2^8) vector multiply accumulate, arch specific version. 680 * 681 * Arch specific version of gf_vect_mad() with same parameters. 682 * @requires SSE4.1 683 */ 684 685 void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 686 unsigned char *dest); 687 /** 688 * @brief GF(2^8) vector multiply accumulate, arch specific version. 689 * 690 * Arch specific version of gf_vect_mad() with same parameters. 691 * @requires AVX 692 */ 693 694 void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 695 unsigned char *dest); 696 697 /** 698 * @brief GF(2^8) vector multiply accumulate, arch specific version. 699 * 700 * Arch specific version of gf_vect_mad() with same parameters. 701 * @requires AVX2 702 */ 703 704 void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 705 unsigned char *dest); 706 707 708 /** 709 * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. 710 * 711 * Does a GF(2^8) multiply across each byte of input source with expanded 712 * constants and add to destination arrays. Can be used for erasure coding 713 * encode and decode update when only one source is available at a 714 * time. Function requires pre-calculation of a 32*vec byte constant array based 715 * on the input coefficients. 716 * @requires SSE4.1 717 * 718 * @param len Length of each vector in bytes. Must be >= 32. 719 * @param vec The number of vector sources or rows in the generator matrix 720 * for coding. 721 * @param vec_i The vector index corresponding to the single input source. 722 * @param gftbls Pointer to array of input tables generated from coding 723 * coefficients in ec_init_tables(). Must be of size 32*vec. 724 * @param src Pointer to source input array. 725 * @param dest Array of pointers to destination input/outputs. 726 * @returns none 727 */ 728 729 void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 730 unsigned char **dest); 731 732 /** 733 * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). 734 * @requires AVX 735 */ 736 void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 737 unsigned char **dest); 738 /** 739 * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). 740 * @requires AVX2 741 */ 742 void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 743 unsigned char **dest); 744 745 /** 746 * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. 747 * 748 * Does a GF(2^8) multiply across each byte of input source with expanded 749 * constants and add to destination arrays. Can be used for erasure coding 750 * encode and decode update when only one source is available at a 751 * time. Function requires pre-calculation of a 32*vec byte constant array based 752 * on the input coefficients. 753 * @requires SSE4.1 754 * 755 * @param len Length of each vector in bytes. Must be >= 32. 756 * @param vec The number of vector sources or rows in the generator matrix 757 * for coding. 758 * @param vec_i The vector index corresponding to the single input source. 759 * @param gftbls Pointer to array of input tables generated from coding 760 * coefficients in ec_init_tables(). Must be of size 32*vec. 761 * @param src Pointer to source input array. 762 * @param dest Array of pointers to destination input/outputs. 763 * @returns none 764 */ 765 766 void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 767 unsigned char **dest); 768 769 /** 770 * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). 771 * @requires AVX 772 */ 773 void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 774 unsigned char **dest); 775 776 /** 777 * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). 778 * @requires AVX2 779 */ 780 void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 781 unsigned char **dest); 782 783 /** 784 * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. 785 * 786 * Does a GF(2^8) multiply across each byte of input source with expanded 787 * constants and add to destination arrays. Can be used for erasure coding 788 * encode and decode update when only one source is available at a 789 * time. Function requires pre-calculation of a 32*vec byte constant array based 790 * on the input coefficients. 791 * @requires SSE4.1 792 * 793 * @param len Length of each vector in bytes. Must be >= 32. 794 * @param vec The number of vector sources or rows in the generator matrix 795 * for coding. 796 * @param vec_i The vector index corresponding to the single input source. 797 * @param gftbls Pointer to array of input tables generated from coding 798 * coefficients in ec_init_tables(). Must be of size 32*vec. 799 * @param src Pointer to source input array. 800 * @param dest Array of pointers to destination input/outputs. 801 * @returns none 802 */ 803 804 void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 805 unsigned char **dest); 806 807 /** 808 * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). 809 * @requires AVX 810 */ 811 void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 812 unsigned char **dest); 813 /** 814 * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). 815 * @requires AVX2 816 */ 817 void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 818 unsigned char **dest); 819 820 /** 821 * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. 822 * @requires SSE4.1 823 */ 824 void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 825 unsigned char **dest); 826 827 /** 828 * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. 829 * @requires AVX 830 */ 831 void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 832 unsigned char **dest); 833 /** 834 * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. 835 * @requires AVX2 836 */ 837 void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 838 unsigned char **dest); 839 840 /** 841 * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. 842 * @requires SSE4.1 843 */ 844 void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 845 unsigned char **dest); 846 /** 847 * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. 848 * @requires AVX 849 */ 850 void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 851 unsigned char **dest); 852 853 /** 854 * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. 855 * @requires AVX2 856 */ 857 void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 858 unsigned char **dest); 859 860 #endif 861 862 /********************************************************************** 863 * The remaining are lib support functions used in GF(2^8) operations. 864 */ 865 866 /** 867 * @brief Single element GF(2^8) multiply. 868 * 869 * @param a Multiplicand a 870 * @param b Multiplicand b 871 * @returns Product of a and b in GF(2^8) 872 */ 873 874 unsigned char gf_mul(unsigned char a, unsigned char b); 875 876 /** 877 * @brief Single element GF(2^8) inverse. 878 * 879 * @param a Input element 880 * @returns Field element b such that a x b = {1} 881 */ 882 883 unsigned char gf_inv(unsigned char a); 884 885 /** 886 * @brief Generate a matrix of coefficients to be used for encoding. 887 * 888 * Vandermonde matrix example of encoding coefficients where high portion of 889 * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} 890 * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in 891 * erasure encoding but does not guarantee invertable for every sub matrix. For 892 * large pairs of m and k it is possible to find cases where the decode matrix 893 * chosen from sources and parity is not invertable. Users may want to adjust 894 * for certain pairs m and k. If m and k satisfy one of the following 895 * inequalities, no adjustment is required: 896 * 897 * - k <= 3 898 * - k = 4, m <= 25 899 * - k = 5, m <= 10 900 * - k <= 21, m-k = 4 901 * - m - k <= 3. 902 * 903 * @param a [m x k] array to hold coefficients 904 * @param m number of rows in matrix corresponding to srcs + parity. 905 * @param k number of columns in matrix corresponding to srcs. 906 * @returns none 907 */ 908 909 void gf_gen_rs_matrix(unsigned char *a, int m, int k); 910 911 /** 912 * @brief Generate a Cauchy matrix of coefficients to be used for encoding. 913 * 914 * Cauchy matrix example of encoding coefficients where high portion of matrix 915 * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, 916 * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. 917 * 918 * @param a [m x k] array to hold coefficients 919 * @param m number of rows in matrix corresponding to srcs + parity. 920 * @param k number of columns in matrix corresponding to srcs. 921 * @returns none 922 */ 923 924 void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); 925 926 /** 927 * @brief Invert a matrix in GF(2^8) 928 * 929 * @param in input matrix 930 * @param out output matrix such that [in] x [out] = [I] - identity matrix 931 * @param n size of matrix [nxn] 932 * @returns 0 successful, other fail on singular input matrix 933 */ 934 935 int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); 936 937 938 /*************************************************************/ 939 940 #ifdef __cplusplus 941 } 942 #endif 943 944 #endif //_ERASURE_CODE_H_ 945