1 /********************************************************************** 2 Copyright(c) 2011-2015 Intel Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 **********************************************************************/ 29 30 #ifndef _ERASURE_CODE_H_ 31 #define _ERASURE_CODE_H_ 32 33 /** 34 * @file erasure_code.h 35 * @brief Interface to functions supporting erasure code encode and decode. 36 * 37 * This file defines the interface to optimized functions used in erasure 38 * codes. Encode and decode of erasures in GF(2^8) are made by calculating the 39 * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a 40 * set of coefficients. Values for the coefficients are determined by the type 41 * of erasure code. Using a general dot product means that any sequence of 42 * coefficients may be used including erasure codes based on random 43 * coefficients. 44 * Multiple versions of dot product are supplied to calculate 1-6 output 45 * vectors in one pass. 46 * Base GF multiply and divide functions can be sped up by defining 47 * GF_LARGE_TABLES at the expense of memory size. 48 * 49 */ 50 51 #include "gf_vect_mul.h" 52 53 #ifdef __cplusplus 54 extern "C" { 55 #endif 56 57 /** 58 * @brief Initialize tables for fast Erasure Code encode and decode. 59 * 60 * Generates the expanded tables needed for fast encode or decode for erasure 61 * codes on blocks of data. 32bytes is generated for each input coefficient. 62 * 63 * @param k The number of vector sources or rows in the generator matrix 64 * for coding. 65 * @param rows The number of output vectors to concurrently encode/decode. 66 * @param a Pointer to sets of arrays of input coefficients used to encode 67 * or decode data. 68 * @param gftbls Pointer to start of space for concatenated output tables 69 * generated from input coefficients. Must be of size 32*k*rows. 70 * @returns none 71 */ 72 73 void 74 ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls); 75 76 /** 77 * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version. 78 * 79 * Baseline version of ec_encode_data() with same parameters. 80 */ 81 82 void 83 ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls); 84 85 /** 86 * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. 87 * 88 * Given a list of source data blocks, generate one or multiple blocks of 89 * encoded data as specified by a matrix of GF(2^8) coefficients. When given a 90 * suitable set of coefficients, this function will perform the fast generation 91 * or decoding of Reed-Solomon type erasure codes. 92 * 93 * This function determines what instruction sets are enabled and 94 * selects the appropriate version at runtime. 95 * 96 * @param len Length of each block of data (vector) of source or dest data. 97 * @param k The number of vector sources or rows in the generator matrix 98 * for coding. 99 * @param rows The number of output vectors to concurrently encode/decode. 100 * @param gftbls Pointer to array of input tables generated from coding 101 * coefficients in ec_init_tables(). Must be of size 32*k*rows 102 * @param data Array of pointers to source input buffers. 103 * @param coding Array of pointers to coded output buffers. 104 * @returns none 105 */ 106 107 void 108 ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 109 unsigned char **coding); 110 111 /** 112 * @brief Generate or decode erasure codes on blocks of data, runs baseline version. 113 * 114 * Baseline version of ec_encode_data() with same parameters. 115 */ 116 void 117 ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, 118 unsigned char **dest); 119 120 /** 121 * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate 122 * version. 123 * 124 * Given one source data block, update one or multiple blocks of encoded data as 125 * specified by a matrix of GF(2^8) coefficients. When given a suitable set of 126 * coefficients, this function will perform the fast generation or decoding of 127 * Reed-Solomon type erasure codes from one input source at a time. 128 * 129 * This function determines what instruction sets are enabled and selects the 130 * appropriate version at runtime. 131 * 132 * @param len Length of each block of data (vector) of source or dest data. 133 * @param k The number of vector sources or rows in the generator matrix 134 * for coding. 135 * @param rows The number of output vectors to concurrently encode/decode. 136 * @param vec_i The vector index corresponding to the single input source. 137 * @param g_tbls Pointer to array of input tables generated from coding 138 * coefficients in ec_init_tables(). Must be of size 32*k*rows 139 * @param data Pointer to single input source used to update output parity. 140 * @param coding Array of pointers to coded output buffers. 141 * @returns none 142 */ 143 void 144 ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 145 unsigned char *data, unsigned char **coding); 146 147 /** 148 * @brief Generate update for encode or decode of erasure codes from single source. 149 * 150 * Baseline version of ec_encode_data_update(). 151 */ 152 153 void 154 ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, 155 unsigned char *data, unsigned char **dest); 156 157 /** 158 * @brief GF(2^8) vector dot product, runs baseline version. 159 * 160 * Does a GF(2^8) dot product across each byte of the input array and a constant 161 * set of coefficients to produce each byte of the output. Can be used for 162 * erasure coding encode and decode. Function requires pre-calculation of a 163 * 32*vlen byte constant array based on the input coefficients. 164 * 165 * @param len Length of each vector in bytes. Must be >= 16. 166 * @param vlen Number of vector sources. 167 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 168 * on the array of input coefficients. Only elements 32*CONST*j + 1 169 * of this array are used, where j = (0, 1, 2...) and CONST is the 170 * number of elements in the array of input coefficients. The 171 * elements used correspond to the original input coefficients. 172 * @param src Array of pointers to source inputs. 173 * @param dest Pointer to destination data array. 174 * @returns none 175 */ 176 177 void 178 gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src, 179 unsigned char *dest); 180 181 /** 182 * @brief GF(2^8) vector dot product, runs appropriate version. 183 * 184 * Does a GF(2^8) dot product across each byte of the input array and a constant 185 * set of coefficients to produce each byte of the output. Can be used for 186 * erasure coding encode and decode. Function requires pre-calculation of a 187 * 32*vlen byte constant array based on the input coefficients. 188 * 189 * This function determines what instruction sets are enabled and 190 * selects the appropriate version at runtime. 191 * 192 * @param len Length of each vector in bytes. Must be >= 32. 193 * @param vlen Number of vector sources. 194 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 195 * on the array of input coefficients. 196 * @param src Array of pointers to source inputs. 197 * @param dest Pointer to destination data array. 198 * @returns none 199 */ 200 201 void 202 gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src, 203 unsigned char *dest); 204 205 /** 206 * @brief GF(2^8) vector multiply accumulate, runs appropriate version. 207 * 208 * Does a GF(2^8) multiply across each byte of input source with expanded 209 * constant and add to destination array. Can be used for erasure coding encode 210 * and decode update when only one source is available at a time. Function 211 * requires pre-calculation of a 32*vec byte constant array based on the input 212 * coefficients. 213 * 214 * This function determines what instruction sets are enabled and selects the 215 * appropriate version at runtime. 216 * 217 * @param len Length of each vector in bytes. Must be >= 64. 218 * @param vec The number of vector sources or rows in the generator matrix 219 * for coding. 220 * @param vec_i The vector index corresponding to the single input source. 221 * @param gftbls Pointer to array of input tables generated from coding 222 * coefficients in ec_init_tables(). Must be of size 32*vec. 223 * @param src Array of pointers to source inputs. 224 * @param dest Pointer to destination data array. 225 * @returns none 226 */ 227 228 void 229 gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 230 unsigned char *dest); 231 232 /** 233 * @brief GF(2^8) vector multiply accumulate, baseline version. 234 * 235 * Baseline version of gf_vect_mad() with same parameters. 236 */ 237 238 void 239 gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, 240 unsigned char *dest); 241 242 // x86 only 243 #if defined(__i386__) || defined(__x86_64__) 244 245 /** 246 * @brief Generate or decode erasure codes on blocks of data. 247 * 248 * Arch specific version of ec_encode_data() with same parameters. 249 * @requires SSE4.1 250 */ 251 void 252 ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 253 unsigned char **coding); 254 255 /** 256 * @brief Generate or decode erasure codes on blocks of data. 257 * 258 * Arch specific version of ec_encode_data() with same parameters. 259 * @requires AVX 260 */ 261 void 262 ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 263 unsigned char **coding); 264 265 /** 266 * @brief Generate or decode erasure codes on blocks of data. 267 * 268 * Arch specific version of ec_encode_data() with same parameters. 269 * @requires AVX2 270 */ 271 void 272 ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 273 unsigned char **coding); 274 275 /** 276 * @brief Generate update for encode or decode of erasure codes from single source. 277 * 278 * Arch specific version of ec_encode_data_update() with same parameters. 279 * @requires SSE4.1 280 */ 281 282 void 283 ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 284 unsigned char *data, unsigned char **coding); 285 286 /** 287 * @brief Generate update for encode or decode of erasure codes from single source. 288 * 289 * Arch specific version of ec_encode_data_update() with same parameters. 290 * @requires AVX 291 */ 292 293 void 294 ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 295 unsigned char *data, unsigned char **coding); 296 297 /** 298 * @brief Generate update for encode or decode of erasure codes from single source. 299 * 300 * Arch specific version of ec_encode_data_update() with same parameters. 301 * @requires AVX2 302 */ 303 304 void 305 ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 306 unsigned char *data, unsigned char **coding); 307 308 /** 309 * @brief GF(2^8) vector dot product. 310 * 311 * Does a GF(2^8) dot product across each byte of the input array and a constant 312 * set of coefficients to produce each byte of the output. Can be used for 313 * erasure coding encode and decode. Function requires pre-calculation of a 314 * 32*vlen byte constant array based on the input coefficients. 315 * @requires SSE4.1 316 * 317 * @param len Length of each vector in bytes. Must be >= 16. 318 * @param vlen Number of vector sources. 319 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 320 * on the array of input coefficients. 321 * @param src Array of pointers to source inputs. 322 * @param dest Pointer to destination data array. 323 * @returns none 324 */ 325 326 void 327 gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 328 unsigned char *dest); 329 330 /** 331 * @brief GF(2^8) vector dot product. 332 * 333 * Does a GF(2^8) dot product across each byte of the input array and a constant 334 * set of coefficients to produce each byte of the output. Can be used for 335 * erasure coding encode and decode. Function requires pre-calculation of a 336 * 32*vlen byte constant array based on the input coefficients. 337 * @requires AVX 338 * 339 * @param len Length of each vector in bytes. Must be >= 16. 340 * @param vlen Number of vector sources. 341 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 342 * on the array of input coefficients. 343 * @param src Array of pointers to source inputs. 344 * @param dest Pointer to destination data array. 345 * @returns none 346 */ 347 348 void 349 gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 350 unsigned char *dest); 351 352 /** 353 * @brief GF(2^8) vector dot product. 354 * 355 * Does a GF(2^8) dot product across each byte of the input array and a constant 356 * set of coefficients to produce each byte of the output. Can be used for 357 * erasure coding encode and decode. Function requires pre-calculation of a 358 * 32*vlen byte constant array based on the input coefficients. 359 * @requires AVX2 360 * 361 * @param len Length of each vector in bytes. Must be >= 32. 362 * @param vlen Number of vector sources. 363 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 364 * on the array of input coefficients. 365 * @param src Array of pointers to source inputs. 366 * @param dest Pointer to destination data array. 367 * @returns none 368 */ 369 370 void 371 gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 372 unsigned char *dest); 373 374 /** 375 * @brief GF(2^8) vector dot product with two outputs. 376 * 377 * Vector dot product optimized to calculate two outputs at a time. Does two 378 * GF(2^8) dot products across each byte of the input array and two constant 379 * sets of coefficients to produce each byte of the outputs. Can be used for 380 * erasure coding encode and decode. Function requires pre-calculation of a 381 * 2*32*vlen byte constant array based on the two sets of input coefficients. 382 * @requires SSE4.1 383 * 384 * @param len Length of each vector in bytes. Must be >= 16. 385 * @param vlen Number of vector sources. 386 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 387 * based on the array of input coefficients. 388 * @param src Array of pointers to source inputs. 389 * @param dest Array of pointers to destination data buffers. 390 * @returns none 391 */ 392 393 void 394 gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 395 unsigned char **dest); 396 397 /** 398 * @brief GF(2^8) vector dot product with two outputs. 399 * 400 * Vector dot product optimized to calculate two outputs at a time. Does two 401 * GF(2^8) dot products across each byte of the input array and two constant 402 * sets of coefficients to produce each byte of the outputs. Can be used for 403 * erasure coding encode and decode. Function requires pre-calculation of a 404 * 2*32*vlen byte constant array based on the two sets of input coefficients. 405 * @requires AVX 406 * 407 * @param len Length of each vector in bytes. Must be >= 16. 408 * @param vlen Number of vector sources. 409 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 410 * based on the array of input coefficients. 411 * @param src Array of pointers to source inputs. 412 * @param dest Array of pointers to destination data buffers. 413 * @returns none 414 */ 415 416 void 417 gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 418 unsigned char **dest); 419 420 /** 421 * @brief GF(2^8) vector dot product with two outputs. 422 * 423 * Vector dot product optimized to calculate two outputs at a time. Does two 424 * GF(2^8) dot products across each byte of the input array and two constant 425 * sets of coefficients to produce each byte of the outputs. Can be used for 426 * erasure coding encode and decode. Function requires pre-calculation of a 427 * 2*32*vlen byte constant array based on the two sets of input coefficients. 428 * @requires AVX2 429 * 430 * @param len Length of each vector in bytes. Must be >= 32. 431 * @param vlen Number of vector sources. 432 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 433 * based on the array of input coefficients. 434 * @param src Array of pointers to source inputs. 435 * @param dest Array of pointers to destination data buffers. 436 * @returns none 437 */ 438 439 void 440 gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 441 unsigned char **dest); 442 443 /** 444 * @brief GF(2^8) vector dot product with three outputs. 445 * 446 * Vector dot product optimized to calculate three outputs at a time. Does three 447 * GF(2^8) dot products across each byte of the input array and three constant 448 * sets of coefficients to produce each byte of the outputs. Can be used for 449 * erasure coding encode and decode. Function requires pre-calculation of a 450 * 3*32*vlen byte constant array based on the three sets of input coefficients. 451 * @requires SSE4.1 452 * 453 * @param len Length of each vector in bytes. Must be >= 16. 454 * @param vlen Number of vector sources. 455 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 456 * based on the array of input coefficients. 457 * @param src Array of pointers to source inputs. 458 * @param dest Array of pointers to destination data buffers. 459 * @returns none 460 */ 461 462 void 463 gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 464 unsigned char **dest); 465 466 /** 467 * @brief GF(2^8) vector dot product with three outputs. 468 * 469 * Vector dot product optimized to calculate three outputs at a time. Does three 470 * GF(2^8) dot products across each byte of the input array and three constant 471 * sets of coefficients to produce each byte of the outputs. Can be used for 472 * erasure coding encode and decode. Function requires pre-calculation of a 473 * 3*32*vlen byte constant array based on the three sets of input coefficients. 474 * @requires AVX 475 * 476 * @param len Length of each vector in bytes. Must be >= 16. 477 * @param vlen Number of vector sources. 478 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 479 * based on the array of input coefficients. 480 * @param src Array of pointers to source inputs. 481 * @param dest Array of pointers to destination data buffers. 482 * @returns none 483 */ 484 485 void 486 gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 487 unsigned char **dest); 488 489 /** 490 * @brief GF(2^8) vector dot product with three outputs. 491 * 492 * Vector dot product optimized to calculate three outputs at a time. Does three 493 * GF(2^8) dot products across each byte of the input array and three constant 494 * sets of coefficients to produce each byte of the outputs. Can be used for 495 * erasure coding encode and decode. Function requires pre-calculation of a 496 * 3*32*vlen byte constant array based on the three sets of input coefficients. 497 * @requires AVX2 498 * 499 * @param len Length of each vector in bytes. Must be >= 32. 500 * @param vlen Number of vector sources. 501 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 502 * based on the array of input coefficients. 503 * @param src Array of pointers to source inputs. 504 * @param dest Array of pointers to destination data buffers. 505 * @returns none 506 */ 507 508 void 509 gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 510 unsigned char **dest); 511 512 /** 513 * @brief GF(2^8) vector dot product with four outputs. 514 * 515 * Vector dot product optimized to calculate four outputs at a time. Does four 516 * GF(2^8) dot products across each byte of the input array and four constant 517 * sets of coefficients to produce each byte of the outputs. Can be used for 518 * erasure coding encode and decode. Function requires pre-calculation of a 519 * 4*32*vlen byte constant array based on the four sets of input coefficients. 520 * @requires SSE4.1 521 * 522 * @param len Length of each vector in bytes. Must be >= 16. 523 * @param vlen Number of vector sources. 524 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 525 * based on the array of input coefficients. 526 * @param src Array of pointers to source inputs. 527 * @param dest Array of pointers to destination data buffers. 528 * @returns none 529 */ 530 531 void 532 gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 533 unsigned char **dest); 534 535 /** 536 * @brief GF(2^8) vector dot product with four outputs. 537 * 538 * Vector dot product optimized to calculate four outputs at a time. Does four 539 * GF(2^8) dot products across each byte of the input array and four constant 540 * sets of coefficients to produce each byte of the outputs. Can be used for 541 * erasure coding encode and decode. Function requires pre-calculation of a 542 * 4*32*vlen byte constant array based on the four sets of input coefficients. 543 * @requires AVX 544 * 545 * @param len Length of each vector in bytes. Must be >= 16. 546 * @param vlen Number of vector sources. 547 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 548 * based on the array of input coefficients. 549 * @param src Array of pointers to source inputs. 550 * @param dest Array of pointers to destination data buffers. 551 * @returns none 552 */ 553 554 void 555 gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 556 unsigned char **dest); 557 558 /** 559 * @brief GF(2^8) vector dot product with four outputs. 560 * 561 * Vector dot product optimized to calculate four outputs at a time. Does four 562 * GF(2^8) dot products across each byte of the input array and four constant 563 * sets of coefficients to produce each byte of the outputs. Can be used for 564 * erasure coding encode and decode. Function requires pre-calculation of a 565 * 4*32*vlen byte constant array based on the four sets of input coefficients. 566 * @requires AVX2 567 * 568 * @param len Length of each vector in bytes. Must be >= 32. 569 * @param vlen Number of vector sources. 570 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 571 * based on the array of input coefficients. 572 * @param src Array of pointers to source inputs. 573 * @param dest Array of pointers to destination data buffers. 574 * @returns none 575 */ 576 577 void 578 gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 579 unsigned char **dest); 580 581 /** 582 * @brief GF(2^8) vector dot product with five outputs. 583 * 584 * Vector dot product optimized to calculate five outputs at a time. Does five 585 * GF(2^8) dot products across each byte of the input array and five constant 586 * sets of coefficients to produce each byte of the outputs. Can be used for 587 * erasure coding encode and decode. Function requires pre-calculation of a 588 * 5*32*vlen byte constant array based on the five sets of input coefficients. 589 * @requires SSE4.1 590 * 591 * @param len Length of each vector in bytes. Must >= 16. 592 * @param vlen Number of vector sources. 593 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 594 * based on the array of input coefficients. 595 * @param src Array of pointers to source inputs. 596 * @param dest Array of pointers to destination data buffers. 597 * @returns none 598 */ 599 600 void 601 gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 602 unsigned char **dest); 603 604 /** 605 * @brief GF(2^8) vector dot product with five outputs. 606 * 607 * Vector dot product optimized to calculate five outputs at a time. Does five 608 * GF(2^8) dot products across each byte of the input array and five constant 609 * sets of coefficients to produce each byte of the outputs. Can be used for 610 * erasure coding encode and decode. Function requires pre-calculation of a 611 * 5*32*vlen byte constant array based on the five sets of input coefficients. 612 * @requires AVX 613 * 614 * @param len Length of each vector in bytes. Must >= 16. 615 * @param vlen Number of vector sources. 616 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 617 * based on the array of input coefficients. 618 * @param src Array of pointers to source inputs. 619 * @param dest Array of pointers to destination data buffers. 620 * @returns none 621 */ 622 623 void 624 gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 625 unsigned char **dest); 626 627 /** 628 * @brief GF(2^8) vector dot product with five outputs. 629 * 630 * Vector dot product optimized to calculate five outputs at a time. Does five 631 * GF(2^8) dot products across each byte of the input array and five constant 632 * sets of coefficients to produce each byte of the outputs. Can be used for 633 * erasure coding encode and decode. Function requires pre-calculation of a 634 * 5*32*vlen byte constant array based on the five sets of input coefficients. 635 * @requires AVX2 636 * 637 * @param len Length of each vector in bytes. Must >= 32. 638 * @param vlen Number of vector sources. 639 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 640 * based on the array of input coefficients. 641 * @param src Array of pointers to source inputs. 642 * @param dest Array of pointers to destination data buffers. 643 * @returns none 644 */ 645 646 void 647 gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 648 unsigned char **dest); 649 650 /** 651 * @brief GF(2^8) vector dot product with six outputs. 652 * 653 * Vector dot product optimized to calculate six outputs at a time. Does six 654 * GF(2^8) dot products across each byte of the input array and six constant 655 * sets of coefficients to produce each byte of the outputs. Can be used for 656 * erasure coding encode and decode. Function requires pre-calculation of a 657 * 6*32*vlen byte constant array based on the six sets of input coefficients. 658 * @requires SSE4.1 659 * 660 * @param len Length of each vector in bytes. Must be >= 16. 661 * @param vlen Number of vector sources. 662 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 663 * based on the array of input coefficients. 664 * @param src Array of pointers to source inputs. 665 * @param dest Array of pointers to destination data buffers. 666 * @returns none 667 */ 668 669 void 670 gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 671 unsigned char **dest); 672 673 /** 674 * @brief GF(2^8) vector dot product with six outputs. 675 * 676 * Vector dot product optimized to calculate six outputs at a time. Does six 677 * GF(2^8) dot products across each byte of the input array and six constant 678 * sets of coefficients to produce each byte of the outputs. Can be used for 679 * erasure coding encode and decode. Function requires pre-calculation of a 680 * 6*32*vlen byte constant array based on the six sets of input coefficients. 681 * @requires AVX 682 * 683 * @param len Length of each vector in bytes. Must be >= 16. 684 * @param vlen Number of vector sources. 685 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 686 * based on the array of input coefficients. 687 * @param src Array of pointers to source inputs. 688 * @param dest Array of pointers to destination data buffers. 689 * @returns none 690 */ 691 692 void 693 gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 694 unsigned char **dest); 695 696 /** 697 * @brief GF(2^8) vector dot product with six outputs. 698 * 699 * Vector dot product optimized to calculate six outputs at a time. Does six 700 * GF(2^8) dot products across each byte of the input array and six constant 701 * sets of coefficients to produce each byte of the outputs. Can be used for 702 * erasure coding encode and decode. Function requires pre-calculation of a 703 * 6*32*vlen byte constant array based on the six sets of input coefficients. 704 * @requires AVX2 705 * 706 * @param len Length of each vector in bytes. Must be >= 32. 707 * @param vlen Number of vector sources. 708 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 709 * based on the array of input coefficients. 710 * @param src Array of pointers to source inputs. 711 * @param dest Array of pointers to destination data buffers. 712 * @returns none 713 */ 714 715 void 716 gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 717 unsigned char **dest); 718 719 /** 720 * @brief GF(2^8) vector multiply accumulate, arch specific version. 721 * 722 * Arch specific version of gf_vect_mad() with same parameters. 723 * @requires SSE4.1 724 */ 725 726 void 727 gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 728 unsigned char *dest); 729 /** 730 * @brief GF(2^8) vector multiply accumulate, arch specific version. 731 * 732 * Arch specific version of gf_vect_mad() with same parameters. 733 * @requires AVX 734 */ 735 736 void 737 gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 738 unsigned char *dest); 739 740 /** 741 * @brief GF(2^8) vector multiply accumulate, arch specific version. 742 * 743 * Arch specific version of gf_vect_mad() with same parameters. 744 * @requires AVX2 745 */ 746 747 void 748 gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 749 unsigned char *dest); 750 751 /** 752 * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. 753 * 754 * Does a GF(2^8) multiply across each byte of input source with expanded 755 * constants and add to destination arrays. Can be used for erasure coding 756 * encode and decode update when only one source is available at a 757 * time. Function requires pre-calculation of a 32*vec byte constant array based 758 * on the input coefficients. 759 * @requires SSE4.1 760 * 761 * @param len Length of each vector in bytes. Must be >= 32. 762 * @param vec The number of vector sources or rows in the generator matrix 763 * for coding. 764 * @param vec_i The vector index corresponding to the single input source. 765 * @param gftbls Pointer to array of input tables generated from coding 766 * coefficients in ec_init_tables(). Must be of size 32*vec. 767 * @param src Pointer to source input array. 768 * @param dest Array of pointers to destination input/outputs. 769 * @returns none 770 */ 771 772 void 773 gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 774 unsigned char **dest); 775 776 /** 777 * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). 778 * @requires AVX 779 */ 780 void 781 gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 782 unsigned char **dest); 783 /** 784 * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). 785 * @requires AVX2 786 */ 787 void 788 gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 789 unsigned char **dest); 790 791 /** 792 * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. 793 * 794 * Does a GF(2^8) multiply across each byte of input source with expanded 795 * constants and add to destination arrays. Can be used for erasure coding 796 * encode and decode update when only one source is available at a 797 * time. Function requires pre-calculation of a 32*vec byte constant array based 798 * on the input coefficients. 799 * @requires SSE4.1 800 * 801 * @param len Length of each vector in bytes. Must be >= 32. 802 * @param vec The number of vector sources or rows in the generator matrix 803 * for coding. 804 * @param vec_i The vector index corresponding to the single input source. 805 * @param gftbls Pointer to array of input tables generated from coding 806 * coefficients in ec_init_tables(). Must be of size 32*vec. 807 * @param src Pointer to source input array. 808 * @param dest Array of pointers to destination input/outputs. 809 * @returns none 810 */ 811 812 void 813 gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 814 unsigned char **dest); 815 816 /** 817 * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). 818 * @requires AVX 819 */ 820 void 821 gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 822 unsigned char **dest); 823 824 /** 825 * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). 826 * @requires AVX2 827 */ 828 void 829 gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 830 unsigned char **dest); 831 832 /** 833 * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. 834 * 835 * Does a GF(2^8) multiply across each byte of input source with expanded 836 * constants and add to destination arrays. Can be used for erasure coding 837 * encode and decode update when only one source is available at a 838 * time. Function requires pre-calculation of a 32*vec byte constant array based 839 * on the input coefficients. 840 * @requires SSE4.1 841 * 842 * @param len Length of each vector in bytes. Must be >= 32. 843 * @param vec The number of vector sources or rows in the generator matrix 844 * for coding. 845 * @param vec_i The vector index corresponding to the single input source. 846 * @param gftbls Pointer to array of input tables generated from coding 847 * coefficients in ec_init_tables(). Must be of size 32*vec. 848 * @param src Pointer to source input array. 849 * @param dest Array of pointers to destination input/outputs. 850 * @returns none 851 */ 852 853 void 854 gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 855 unsigned char **dest); 856 857 /** 858 * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). 859 * @requires AVX 860 */ 861 void 862 gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 863 unsigned char **dest); 864 /** 865 * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). 866 * @requires AVX2 867 */ 868 void 869 gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 870 unsigned char **dest); 871 872 /** 873 * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. 874 * @requires SSE4.1 875 */ 876 void 877 gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 878 unsigned char **dest); 879 880 /** 881 * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. 882 * @requires AVX 883 */ 884 void 885 gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 886 unsigned char **dest); 887 /** 888 * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. 889 * @requires AVX2 890 */ 891 void 892 gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 893 unsigned char **dest); 894 895 /** 896 * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. 897 * @requires SSE4.1 898 */ 899 void 900 gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 901 unsigned char **dest); 902 /** 903 * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. 904 * @requires AVX 905 */ 906 void 907 gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 908 unsigned char **dest); 909 910 /** 911 * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. 912 * @requires AVX2 913 */ 914 void 915 gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 916 unsigned char **dest); 917 918 #endif 919 920 /********************************************************************** 921 * The remaining are lib support functions used in GF(2^8) operations. 922 */ 923 924 /** 925 * @brief Single element GF(2^8) multiply. 926 * 927 * @param a Multiplicand a 928 * @param b Multiplicand b 929 * @returns Product of a and b in GF(2^8) 930 */ 931 932 unsigned char 933 gf_mul(unsigned char a, unsigned char b); 934 935 /** 936 * @brief Single element GF(2^8) inverse. 937 * 938 * @param a Input element 939 * @returns Field element b such that a x b = {1} 940 */ 941 942 unsigned char 943 gf_inv(unsigned char a); 944 945 /** 946 * @brief Generate a matrix of coefficients to be used for encoding. 947 * 948 * Vandermonde matrix example of encoding coefficients where high portion of 949 * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} 950 * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in 951 * erasure encoding but does not guarantee invertable for every sub matrix. For 952 * large pairs of m and k it is possible to find cases where the decode matrix 953 * chosen from sources and parity is not invertable. Users may want to adjust 954 * for certain pairs m and k. If m and k satisfy one of the following 955 * inequalities, no adjustment is required: 956 * 957 * - k <= 3 958 * - k = 4, m <= 25 959 * - k = 5, m <= 10 960 * - k <= 21, m-k = 4 961 * - m - k <= 3. 962 * 963 * @param a [m x k] array to hold coefficients 964 * @param m number of rows in matrix corresponding to srcs + parity. 965 * @param k number of columns in matrix corresponding to srcs. 966 * @returns none 967 */ 968 969 void 970 gf_gen_rs_matrix(unsigned char *a, int m, int k); 971 972 /** 973 * @brief Generate a Cauchy matrix of coefficients to be used for encoding. 974 * 975 * Cauchy matrix example of encoding coefficients where high portion of matrix 976 * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, 977 * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. 978 * 979 * @param a [m x k] array to hold coefficients 980 * @param m number of rows in matrix corresponding to srcs + parity. 981 * @param k number of columns in matrix corresponding to srcs. 982 * @returns none 983 */ 984 985 void 986 gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); 987 988 /** 989 * @brief Invert a matrix in GF(2^8) 990 * 991 * Attempts to construct an n x n inverse of the input matrix. Returns non-zero 992 * if singular. Will always destroy input matrix in process. 993 * 994 * @param in input matrix, destroyed by invert process 995 * @param out output matrix such that [in] x [out] = [I] - identity matrix 996 * @param n size of matrix [nxn] 997 * @returns 0 successful, other fail on singular input matrix 998 */ 999 1000 int 1001 gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); 1002 1003 /*************************************************************/ 1004 1005 #ifdef __cplusplus 1006 } 1007 #endif 1008 1009 #endif //_ERASURE_CODE_H_ 1010