100c1efc1SGreg Tucker /********************************************************************** 200c1efc1SGreg Tucker Copyright(c) 2011-2015 Intel Corporation All rights reserved. 300c1efc1SGreg Tucker 400c1efc1SGreg Tucker Redistribution and use in source and binary forms, with or without 500c1efc1SGreg Tucker modification, are permitted provided that the following conditions 600c1efc1SGreg Tucker are met: 700c1efc1SGreg Tucker * Redistributions of source code must retain the above copyright 800c1efc1SGreg Tucker notice, this list of conditions and the following disclaimer. 900c1efc1SGreg Tucker * Redistributions in binary form must reproduce the above copyright 1000c1efc1SGreg Tucker notice, this list of conditions and the following disclaimer in 1100c1efc1SGreg Tucker the documentation and/or other materials provided with the 1200c1efc1SGreg Tucker distribution. 1300c1efc1SGreg Tucker * Neither the name of Intel Corporation nor the names of its 1400c1efc1SGreg Tucker contributors may be used to endorse or promote products derived 1500c1efc1SGreg Tucker from this software without specific prior written permission. 1600c1efc1SGreg Tucker 1700c1efc1SGreg Tucker THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1800c1efc1SGreg Tucker "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1900c1efc1SGreg Tucker LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2000c1efc1SGreg Tucker A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2100c1efc1SGreg Tucker OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2200c1efc1SGreg Tucker SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2300c1efc1SGreg Tucker LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2400c1efc1SGreg Tucker DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2500c1efc1SGreg Tucker THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2600c1efc1SGreg Tucker (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2700c1efc1SGreg Tucker OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2800c1efc1SGreg Tucker **********************************************************************/ 2900c1efc1SGreg Tucker 3000c1efc1SGreg Tucker #ifndef _ERASURE_CODE_H_ 3100c1efc1SGreg Tucker #define _ERASURE_CODE_H_ 3200c1efc1SGreg Tucker 3300c1efc1SGreg Tucker /** 3400c1efc1SGreg Tucker * @file erasure_code.h 3500c1efc1SGreg Tucker * @brief Interface to functions supporting erasure code encode and decode. 3600c1efc1SGreg Tucker * 3700c1efc1SGreg Tucker * This file defines the interface to optimized functions used in erasure 3800c1efc1SGreg Tucker * codes. Encode and decode of erasures in GF(2^8) are made by calculating the 3900c1efc1SGreg Tucker * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a 4000c1efc1SGreg Tucker * set of coefficients. Values for the coefficients are determined by the type 4100c1efc1SGreg Tucker * of erasure code. Using a general dot product means that any sequence of 4200c1efc1SGreg Tucker * coefficients may be used including erasure codes based on random 4300c1efc1SGreg Tucker * coefficients. 4400c1efc1SGreg Tucker * Multiple versions of dot product are supplied to calculate 1-6 output 4500c1efc1SGreg Tucker * vectors in one pass. 4600c1efc1SGreg Tucker * Base GF multiply and divide functions can be sped up by defining 4700c1efc1SGreg Tucker * GF_LARGE_TABLES at the expense of memory size. 4800c1efc1SGreg Tucker * 4900c1efc1SGreg Tucker */ 5000c1efc1SGreg Tucker 5100c1efc1SGreg Tucker #include "gf_vect_mul.h" 5200c1efc1SGreg Tucker 5300c1efc1SGreg Tucker #ifdef __cplusplus 5400c1efc1SGreg Tucker extern "C" { 5500c1efc1SGreg Tucker #endif 5600c1efc1SGreg Tucker 5700c1efc1SGreg Tucker /** 5800c1efc1SGreg Tucker * @brief Initialize tables for fast Erasure Code encode and decode. 5900c1efc1SGreg Tucker * 6000c1efc1SGreg Tucker * Generates the expanded tables needed for fast encode or decode for erasure 6100c1efc1SGreg Tucker * codes on blocks of data. 32bytes is generated for each input coefficient. 6200c1efc1SGreg Tucker * 6300c1efc1SGreg Tucker * @param k The number of vector sources or rows in the generator matrix 6400c1efc1SGreg Tucker * for coding. 6500c1efc1SGreg Tucker * @param rows The number of output vectors to concurrently encode/decode. 6600c1efc1SGreg Tucker * @param a Pointer to sets of arrays of input coefficients used to encode 6700c1efc1SGreg Tucker * or decode data. 6800c1efc1SGreg Tucker * @param gftbls Pointer to start of space for concatenated output tables 6900c1efc1SGreg Tucker * generated from input coefficients. Must be of size 32*k*rows. 7000c1efc1SGreg Tucker * @returns none 7100c1efc1SGreg Tucker */ 7200c1efc1SGreg Tucker 73*fa5b8bafSMarcel Cornu void 74*fa5b8bafSMarcel Cornu ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls); 7500c1efc1SGreg Tucker 7600c1efc1SGreg Tucker /** 77f971f023SPablo de Lara * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version. 78f971f023SPablo de Lara * 79f971f023SPablo de Lara * Baseline version of ec_encode_data() with same parameters. 80f971f023SPablo de Lara */ 81f971f023SPablo de Lara 82*fa5b8bafSMarcel Cornu void 83*fa5b8bafSMarcel Cornu ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls); 84f971f023SPablo de Lara 85f971f023SPablo de Lara /** 8600c1efc1SGreg Tucker * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. 8700c1efc1SGreg Tucker * 8800c1efc1SGreg Tucker * Given a list of source data blocks, generate one or multiple blocks of 8900c1efc1SGreg Tucker * encoded data as specified by a matrix of GF(2^8) coefficients. When given a 9000c1efc1SGreg Tucker * suitable set of coefficients, this function will perform the fast generation 9100c1efc1SGreg Tucker * or decoding of Reed-Solomon type erasure codes. 9200c1efc1SGreg Tucker * 9300c1efc1SGreg Tucker * This function determines what instruction sets are enabled and 9400c1efc1SGreg Tucker * selects the appropriate version at runtime. 9500c1efc1SGreg Tucker * 9600c1efc1SGreg Tucker * @param len Length of each block of data (vector) of source or dest data. 9700c1efc1SGreg Tucker * @param k The number of vector sources or rows in the generator matrix 9800c1efc1SGreg Tucker * for coding. 9900c1efc1SGreg Tucker * @param rows The number of output vectors to concurrently encode/decode. 10000c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding 10100c1efc1SGreg Tucker * coefficients in ec_init_tables(). Must be of size 32*k*rows 10200c1efc1SGreg Tucker * @param data Array of pointers to source input buffers. 10300c1efc1SGreg Tucker * @param coding Array of pointers to coded output buffers. 10400c1efc1SGreg Tucker * @returns none 10500c1efc1SGreg Tucker */ 10600c1efc1SGreg Tucker 107*fa5b8bafSMarcel Cornu void 108*fa5b8bafSMarcel Cornu ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 10900c1efc1SGreg Tucker unsigned char **coding); 11000c1efc1SGreg Tucker 11100c1efc1SGreg Tucker /** 11200c1efc1SGreg Tucker * @brief Generate or decode erasure codes on blocks of data, runs baseline version. 11300c1efc1SGreg Tucker * 11400c1efc1SGreg Tucker * Baseline version of ec_encode_data() with same parameters. 11500c1efc1SGreg Tucker */ 116*fa5b8bafSMarcel Cornu void 117*fa5b8bafSMarcel Cornu ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, 11800c1efc1SGreg Tucker unsigned char **dest); 11900c1efc1SGreg Tucker 12000c1efc1SGreg Tucker /** 121*fa5b8bafSMarcel Cornu * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate 122*fa5b8bafSMarcel Cornu * version. 12300c1efc1SGreg Tucker * 12400c1efc1SGreg Tucker * Given one source data block, update one or multiple blocks of encoded data as 12500c1efc1SGreg Tucker * specified by a matrix of GF(2^8) coefficients. When given a suitable set of 12600c1efc1SGreg Tucker * coefficients, this function will perform the fast generation or decoding of 12700c1efc1SGreg Tucker * Reed-Solomon type erasure codes from one input source at a time. 12800c1efc1SGreg Tucker * 12900c1efc1SGreg Tucker * This function determines what instruction sets are enabled and selects the 13000c1efc1SGreg Tucker * appropriate version at runtime. 13100c1efc1SGreg Tucker * 13200c1efc1SGreg Tucker * @param len Length of each block of data (vector) of source or dest data. 13300c1efc1SGreg Tucker * @param k The number of vector sources or rows in the generator matrix 13400c1efc1SGreg Tucker * for coding. 13500c1efc1SGreg Tucker * @param rows The number of output vectors to concurrently encode/decode. 13600c1efc1SGreg Tucker * @param vec_i The vector index corresponding to the single input source. 13700c1efc1SGreg Tucker * @param g_tbls Pointer to array of input tables generated from coding 13800c1efc1SGreg Tucker * coefficients in ec_init_tables(). Must be of size 32*k*rows 13900c1efc1SGreg Tucker * @param data Pointer to single input source used to update output parity. 14000c1efc1SGreg Tucker * @param coding Array of pointers to coded output buffers. 14100c1efc1SGreg Tucker * @returns none 14200c1efc1SGreg Tucker */ 143*fa5b8bafSMarcel Cornu void 144*fa5b8bafSMarcel Cornu ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 14500c1efc1SGreg Tucker unsigned char *data, unsigned char **coding); 14600c1efc1SGreg Tucker 14700c1efc1SGreg Tucker /** 14800c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source. 14900c1efc1SGreg Tucker * 15023937916SJohn Kariuki * Baseline version of ec_encode_data_update(). 15123937916SJohn Kariuki */ 15223937916SJohn Kariuki 153*fa5b8bafSMarcel Cornu void 154*fa5b8bafSMarcel Cornu ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, 15523937916SJohn Kariuki unsigned char *data, unsigned char **dest); 15623937916SJohn Kariuki 15723937916SJohn Kariuki /** 15823937916SJohn Kariuki * @brief GF(2^8) vector dot product, runs baseline version. 15923937916SJohn Kariuki * 16023937916SJohn Kariuki * Does a GF(2^8) dot product across each byte of the input array and a constant 16123937916SJohn Kariuki * set of coefficients to produce each byte of the output. Can be used for 16223937916SJohn Kariuki * erasure coding encode and decode. Function requires pre-calculation of a 16323937916SJohn Kariuki * 32*vlen byte constant array based on the input coefficients. 16423937916SJohn Kariuki * 16523937916SJohn Kariuki * @param len Length of each vector in bytes. Must be >= 16. 16623937916SJohn Kariuki * @param vlen Number of vector sources. 16723937916SJohn Kariuki * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 16823937916SJohn Kariuki * on the array of input coefficients. Only elements 32*CONST*j + 1 16923937916SJohn Kariuki * of this array are used, where j = (0, 1, 2...) and CONST is the 17023937916SJohn Kariuki * number of elements in the array of input coefficients. The 17123937916SJohn Kariuki * elements used correspond to the original input coefficients. 17223937916SJohn Kariuki * @param src Array of pointers to source inputs. 17323937916SJohn Kariuki * @param dest Pointer to destination data array. 17423937916SJohn Kariuki * @returns none 17523937916SJohn Kariuki */ 17623937916SJohn Kariuki 177*fa5b8bafSMarcel Cornu void 178*fa5b8bafSMarcel Cornu gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src, 179*fa5b8bafSMarcel Cornu unsigned char *dest); 18023937916SJohn Kariuki 18123937916SJohn Kariuki /** 18223937916SJohn Kariuki * @brief GF(2^8) vector dot product, runs appropriate version. 18323937916SJohn Kariuki * 18423937916SJohn Kariuki * Does a GF(2^8) dot product across each byte of the input array and a constant 18523937916SJohn Kariuki * set of coefficients to produce each byte of the output. Can be used for 18623937916SJohn Kariuki * erasure coding encode and decode. Function requires pre-calculation of a 18723937916SJohn Kariuki * 32*vlen byte constant array based on the input coefficients. 18823937916SJohn Kariuki * 18923937916SJohn Kariuki * This function determines what instruction sets are enabled and 19023937916SJohn Kariuki * selects the appropriate version at runtime. 19123937916SJohn Kariuki * 19223937916SJohn Kariuki * @param len Length of each vector in bytes. Must be >= 32. 19323937916SJohn Kariuki * @param vlen Number of vector sources. 19423937916SJohn Kariuki * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 19523937916SJohn Kariuki * on the array of input coefficients. 19623937916SJohn Kariuki * @param src Array of pointers to source inputs. 19723937916SJohn Kariuki * @param dest Pointer to destination data array. 19823937916SJohn Kariuki * @returns none 19923937916SJohn Kariuki */ 20023937916SJohn Kariuki 201*fa5b8bafSMarcel Cornu void 202*fa5b8bafSMarcel Cornu gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src, 203*fa5b8bafSMarcel Cornu unsigned char *dest); 20423937916SJohn Kariuki 20523937916SJohn Kariuki /** 20623937916SJohn Kariuki * @brief GF(2^8) vector multiply accumulate, runs appropriate version. 20723937916SJohn Kariuki * 20823937916SJohn Kariuki * Does a GF(2^8) multiply across each byte of input source with expanded 20923937916SJohn Kariuki * constant and add to destination array. Can be used for erasure coding encode 21023937916SJohn Kariuki * and decode update when only one source is available at a time. Function 21123937916SJohn Kariuki * requires pre-calculation of a 32*vec byte constant array based on the input 21223937916SJohn Kariuki * coefficients. 21323937916SJohn Kariuki * 21423937916SJohn Kariuki * This function determines what instruction sets are enabled and selects the 21523937916SJohn Kariuki * appropriate version at runtime. 21623937916SJohn Kariuki * 2174ac0e435SGreg Tucker * @param len Length of each vector in bytes. Must be >= 64. 21823937916SJohn Kariuki * @param vec The number of vector sources or rows in the generator matrix 21923937916SJohn Kariuki * for coding. 22023937916SJohn Kariuki * @param vec_i The vector index corresponding to the single input source. 22123937916SJohn Kariuki * @param gftbls Pointer to array of input tables generated from coding 22223937916SJohn Kariuki * coefficients in ec_init_tables(). Must be of size 32*vec. 22323937916SJohn Kariuki * @param src Array of pointers to source inputs. 22423937916SJohn Kariuki * @param dest Pointer to destination data array. 22523937916SJohn Kariuki * @returns none 22623937916SJohn Kariuki */ 22723937916SJohn Kariuki 228*fa5b8bafSMarcel Cornu void 229*fa5b8bafSMarcel Cornu gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 23023937916SJohn Kariuki unsigned char *dest); 23123937916SJohn Kariuki 23223937916SJohn Kariuki /** 23323937916SJohn Kariuki * @brief GF(2^8) vector multiply accumulate, baseline version. 23423937916SJohn Kariuki * 23523937916SJohn Kariuki * Baseline version of gf_vect_mad() with same parameters. 23623937916SJohn Kariuki */ 23723937916SJohn Kariuki 238*fa5b8bafSMarcel Cornu void 239*fa5b8bafSMarcel Cornu gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, 24023937916SJohn Kariuki unsigned char *dest); 24123937916SJohn Kariuki 24223937916SJohn Kariuki // x86 only 24323937916SJohn Kariuki #if defined(__i386__) || defined(__x86_64__) 24423937916SJohn Kariuki 24523937916SJohn Kariuki /** 24623937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data. 24723937916SJohn Kariuki * 24823937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters. 24923937916SJohn Kariuki * @requires SSE4.1 25023937916SJohn Kariuki */ 251*fa5b8bafSMarcel Cornu void 252*fa5b8bafSMarcel Cornu ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 25323937916SJohn Kariuki unsigned char **coding); 25423937916SJohn Kariuki 25523937916SJohn Kariuki /** 25623937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data. 25723937916SJohn Kariuki * 25823937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters. 25923937916SJohn Kariuki * @requires AVX 26023937916SJohn Kariuki */ 261*fa5b8bafSMarcel Cornu void 262*fa5b8bafSMarcel Cornu ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 26323937916SJohn Kariuki unsigned char **coding); 26423937916SJohn Kariuki 26523937916SJohn Kariuki /** 26623937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data. 26723937916SJohn Kariuki * 26823937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters. 26923937916SJohn Kariuki * @requires AVX2 27023937916SJohn Kariuki */ 271*fa5b8bafSMarcel Cornu void 272*fa5b8bafSMarcel Cornu ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, 27323937916SJohn Kariuki unsigned char **coding); 27423937916SJohn Kariuki 27523937916SJohn Kariuki /** 27623937916SJohn Kariuki * @brief Generate update for encode or decode of erasure codes from single source. 27723937916SJohn Kariuki * 27800c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters. 27900c1efc1SGreg Tucker * @requires SSE4.1 28000c1efc1SGreg Tucker */ 28100c1efc1SGreg Tucker 282*fa5b8bafSMarcel Cornu void 283*fa5b8bafSMarcel Cornu ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 28400c1efc1SGreg Tucker unsigned char *data, unsigned char **coding); 28500c1efc1SGreg Tucker 28600c1efc1SGreg Tucker /** 28700c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source. 28800c1efc1SGreg Tucker * 28900c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters. 29000c1efc1SGreg Tucker * @requires AVX 29100c1efc1SGreg Tucker */ 29200c1efc1SGreg Tucker 293*fa5b8bafSMarcel Cornu void 294*fa5b8bafSMarcel Cornu ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 29500c1efc1SGreg Tucker unsigned char *data, unsigned char **coding); 29600c1efc1SGreg Tucker 29700c1efc1SGreg Tucker /** 29800c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source. 29900c1efc1SGreg Tucker * 30000c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters. 30100c1efc1SGreg Tucker * @requires AVX2 30200c1efc1SGreg Tucker */ 30300c1efc1SGreg Tucker 304*fa5b8bafSMarcel Cornu void 305*fa5b8bafSMarcel Cornu ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, 30600c1efc1SGreg Tucker unsigned char *data, unsigned char **coding); 30700c1efc1SGreg Tucker 30800c1efc1SGreg Tucker /** 30900c1efc1SGreg Tucker * @brief GF(2^8) vector dot product. 31000c1efc1SGreg Tucker * 31100c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant 31200c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for 31300c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 31400c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients. 31500c1efc1SGreg Tucker * @requires SSE4.1 31600c1efc1SGreg Tucker * 31700c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 31800c1efc1SGreg Tucker * @param vlen Number of vector sources. 31900c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 32000c1efc1SGreg Tucker * on the array of input coefficients. 32100c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 32200c1efc1SGreg Tucker * @param dest Pointer to destination data array. 32300c1efc1SGreg Tucker * @returns none 32400c1efc1SGreg Tucker */ 32500c1efc1SGreg Tucker 326*fa5b8bafSMarcel Cornu void 327*fa5b8bafSMarcel Cornu gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 328*fa5b8bafSMarcel Cornu unsigned char *dest); 32900c1efc1SGreg Tucker 33000c1efc1SGreg Tucker /** 33100c1efc1SGreg Tucker * @brief GF(2^8) vector dot product. 33200c1efc1SGreg Tucker * 33300c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant 33400c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for 33500c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 33600c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients. 33700c1efc1SGreg Tucker * @requires AVX 33800c1efc1SGreg Tucker * 33900c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 34000c1efc1SGreg Tucker * @param vlen Number of vector sources. 34100c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 34200c1efc1SGreg Tucker * on the array of input coefficients. 34300c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 34400c1efc1SGreg Tucker * @param dest Pointer to destination data array. 34500c1efc1SGreg Tucker * @returns none 34600c1efc1SGreg Tucker */ 34700c1efc1SGreg Tucker 348*fa5b8bafSMarcel Cornu void 349*fa5b8bafSMarcel Cornu gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 350*fa5b8bafSMarcel Cornu unsigned char *dest); 35100c1efc1SGreg Tucker 35200c1efc1SGreg Tucker /** 35300c1efc1SGreg Tucker * @brief GF(2^8) vector dot product. 35400c1efc1SGreg Tucker * 35500c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant 35600c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for 35700c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 35800c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients. 35900c1efc1SGreg Tucker * @requires AVX2 36000c1efc1SGreg Tucker * 36100c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 36200c1efc1SGreg Tucker * @param vlen Number of vector sources. 36300c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based 36400c1efc1SGreg Tucker * on the array of input coefficients. 36500c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 36600c1efc1SGreg Tucker * @param dest Pointer to destination data array. 36700c1efc1SGreg Tucker * @returns none 36800c1efc1SGreg Tucker */ 36900c1efc1SGreg Tucker 370*fa5b8bafSMarcel Cornu void 371*fa5b8bafSMarcel Cornu gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 372*fa5b8bafSMarcel Cornu unsigned char *dest); 37300c1efc1SGreg Tucker 37400c1efc1SGreg Tucker /** 37500c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs. 37600c1efc1SGreg Tucker * 377eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two 37800c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant 37900c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 38000c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 38100c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients. 38200c1efc1SGreg Tucker * @requires SSE4.1 38300c1efc1SGreg Tucker * 38400c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 38500c1efc1SGreg Tucker * @param vlen Number of vector sources. 38600c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 38700c1efc1SGreg Tucker * based on the array of input coefficients. 38800c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 38900c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 39000c1efc1SGreg Tucker * @returns none 39100c1efc1SGreg Tucker */ 39200c1efc1SGreg Tucker 393*fa5b8bafSMarcel Cornu void 394*fa5b8bafSMarcel Cornu gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 395*fa5b8bafSMarcel Cornu unsigned char **dest); 39600c1efc1SGreg Tucker 39700c1efc1SGreg Tucker /** 39800c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs. 39900c1efc1SGreg Tucker * 400eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two 40100c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant 40200c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 40300c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 40400c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients. 40500c1efc1SGreg Tucker * @requires AVX 40600c1efc1SGreg Tucker * 40700c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 40800c1efc1SGreg Tucker * @param vlen Number of vector sources. 40900c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 41000c1efc1SGreg Tucker * based on the array of input coefficients. 41100c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 41200c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 41300c1efc1SGreg Tucker * @returns none 41400c1efc1SGreg Tucker */ 41500c1efc1SGreg Tucker 416*fa5b8bafSMarcel Cornu void 417*fa5b8bafSMarcel Cornu gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 418*fa5b8bafSMarcel Cornu unsigned char **dest); 41900c1efc1SGreg Tucker 42000c1efc1SGreg Tucker /** 42100c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs. 42200c1efc1SGreg Tucker * 423eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two 42400c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant 42500c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 42600c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 42700c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients. 42800c1efc1SGreg Tucker * @requires AVX2 42900c1efc1SGreg Tucker * 43000c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 43100c1efc1SGreg Tucker * @param vlen Number of vector sources. 43200c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants 43300c1efc1SGreg Tucker * based on the array of input coefficients. 43400c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 43500c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 43600c1efc1SGreg Tucker * @returns none 43700c1efc1SGreg Tucker */ 43800c1efc1SGreg Tucker 439*fa5b8bafSMarcel Cornu void 440*fa5b8bafSMarcel Cornu gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 441*fa5b8bafSMarcel Cornu unsigned char **dest); 44200c1efc1SGreg Tucker 44300c1efc1SGreg Tucker /** 44400c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs. 44500c1efc1SGreg Tucker * 446eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three 44700c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant 44800c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 44900c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 45000c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients. 45100c1efc1SGreg Tucker * @requires SSE4.1 45200c1efc1SGreg Tucker * 45300c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 45400c1efc1SGreg Tucker * @param vlen Number of vector sources. 45500c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 45600c1efc1SGreg Tucker * based on the array of input coefficients. 45700c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 45800c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 45900c1efc1SGreg Tucker * @returns none 46000c1efc1SGreg Tucker */ 46100c1efc1SGreg Tucker 462*fa5b8bafSMarcel Cornu void 463*fa5b8bafSMarcel Cornu gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 464*fa5b8bafSMarcel Cornu unsigned char **dest); 46500c1efc1SGreg Tucker 46600c1efc1SGreg Tucker /** 46700c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs. 46800c1efc1SGreg Tucker * 469eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three 47000c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant 47100c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 47200c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 47300c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients. 47400c1efc1SGreg Tucker * @requires AVX 47500c1efc1SGreg Tucker * 47600c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 47700c1efc1SGreg Tucker * @param vlen Number of vector sources. 47800c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 47900c1efc1SGreg Tucker * based on the array of input coefficients. 48000c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 48100c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 48200c1efc1SGreg Tucker * @returns none 48300c1efc1SGreg Tucker */ 48400c1efc1SGreg Tucker 485*fa5b8bafSMarcel Cornu void 486*fa5b8bafSMarcel Cornu gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 487*fa5b8bafSMarcel Cornu unsigned char **dest); 48800c1efc1SGreg Tucker 48900c1efc1SGreg Tucker /** 49000c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs. 49100c1efc1SGreg Tucker * 492eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three 49300c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant 49400c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 49500c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 49600c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients. 49700c1efc1SGreg Tucker * @requires AVX2 49800c1efc1SGreg Tucker * 49900c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 50000c1efc1SGreg Tucker * @param vlen Number of vector sources. 50100c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants 50200c1efc1SGreg Tucker * based on the array of input coefficients. 50300c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 50400c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 50500c1efc1SGreg Tucker * @returns none 50600c1efc1SGreg Tucker */ 50700c1efc1SGreg Tucker 508*fa5b8bafSMarcel Cornu void 509*fa5b8bafSMarcel Cornu gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 510*fa5b8bafSMarcel Cornu unsigned char **dest); 51100c1efc1SGreg Tucker 51200c1efc1SGreg Tucker /** 51300c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs. 51400c1efc1SGreg Tucker * 515eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four 51600c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant 51700c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 51800c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 51900c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients. 52000c1efc1SGreg Tucker * @requires SSE4.1 52100c1efc1SGreg Tucker * 52200c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 52300c1efc1SGreg Tucker * @param vlen Number of vector sources. 52400c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 52500c1efc1SGreg Tucker * based on the array of input coefficients. 52600c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 52700c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 52800c1efc1SGreg Tucker * @returns none 52900c1efc1SGreg Tucker */ 53000c1efc1SGreg Tucker 531*fa5b8bafSMarcel Cornu void 532*fa5b8bafSMarcel Cornu gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 533*fa5b8bafSMarcel Cornu unsigned char **dest); 53400c1efc1SGreg Tucker 53500c1efc1SGreg Tucker /** 53600c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs. 53700c1efc1SGreg Tucker * 538eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four 53900c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant 54000c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 54100c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 54200c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients. 54300c1efc1SGreg Tucker * @requires AVX 54400c1efc1SGreg Tucker * 54500c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 54600c1efc1SGreg Tucker * @param vlen Number of vector sources. 54700c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 54800c1efc1SGreg Tucker * based on the array of input coefficients. 54900c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 55000c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 55100c1efc1SGreg Tucker * @returns none 55200c1efc1SGreg Tucker */ 55300c1efc1SGreg Tucker 554*fa5b8bafSMarcel Cornu void 555*fa5b8bafSMarcel Cornu gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 556*fa5b8bafSMarcel Cornu unsigned char **dest); 55700c1efc1SGreg Tucker 55800c1efc1SGreg Tucker /** 55900c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs. 56000c1efc1SGreg Tucker * 561eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four 56200c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant 56300c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 56400c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 56500c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients. 56600c1efc1SGreg Tucker * @requires AVX2 56700c1efc1SGreg Tucker * 56800c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 56900c1efc1SGreg Tucker * @param vlen Number of vector sources. 57000c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants 57100c1efc1SGreg Tucker * based on the array of input coefficients. 57200c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 57300c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 57400c1efc1SGreg Tucker * @returns none 57500c1efc1SGreg Tucker */ 57600c1efc1SGreg Tucker 577*fa5b8bafSMarcel Cornu void 578*fa5b8bafSMarcel Cornu gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 579*fa5b8bafSMarcel Cornu unsigned char **dest); 58000c1efc1SGreg Tucker 58100c1efc1SGreg Tucker /** 58200c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs. 58300c1efc1SGreg Tucker * 584eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five 58500c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant 58600c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 58700c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 58800c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients. 58900c1efc1SGreg Tucker * @requires SSE4.1 59000c1efc1SGreg Tucker * 59100c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must >= 16. 59200c1efc1SGreg Tucker * @param vlen Number of vector sources. 59300c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 59400c1efc1SGreg Tucker * based on the array of input coefficients. 59500c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 59600c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 59700c1efc1SGreg Tucker * @returns none 59800c1efc1SGreg Tucker */ 59900c1efc1SGreg Tucker 600*fa5b8bafSMarcel Cornu void 601*fa5b8bafSMarcel Cornu gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 602*fa5b8bafSMarcel Cornu unsigned char **dest); 60300c1efc1SGreg Tucker 60400c1efc1SGreg Tucker /** 60500c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs. 60600c1efc1SGreg Tucker * 607eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five 60800c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant 60900c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 61000c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 61100c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients. 61200c1efc1SGreg Tucker * @requires AVX 61300c1efc1SGreg Tucker * 61400c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must >= 16. 61500c1efc1SGreg Tucker * @param vlen Number of vector sources. 61600c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 61700c1efc1SGreg Tucker * based on the array of input coefficients. 61800c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 61900c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 62000c1efc1SGreg Tucker * @returns none 62100c1efc1SGreg Tucker */ 62200c1efc1SGreg Tucker 623*fa5b8bafSMarcel Cornu void 624*fa5b8bafSMarcel Cornu gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 625*fa5b8bafSMarcel Cornu unsigned char **dest); 62600c1efc1SGreg Tucker 62700c1efc1SGreg Tucker /** 62800c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs. 62900c1efc1SGreg Tucker * 630eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five 63100c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant 63200c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 63300c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 63400c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients. 63500c1efc1SGreg Tucker * @requires AVX2 63600c1efc1SGreg Tucker * 63700c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must >= 32. 63800c1efc1SGreg Tucker * @param vlen Number of vector sources. 63900c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants 64000c1efc1SGreg Tucker * based on the array of input coefficients. 64100c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 64200c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 64300c1efc1SGreg Tucker * @returns none 64400c1efc1SGreg Tucker */ 64500c1efc1SGreg Tucker 646*fa5b8bafSMarcel Cornu void 647*fa5b8bafSMarcel Cornu gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 648*fa5b8bafSMarcel Cornu unsigned char **dest); 64900c1efc1SGreg Tucker 65000c1efc1SGreg Tucker /** 65100c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs. 65200c1efc1SGreg Tucker * 653eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six 65400c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant 65500c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 65600c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 65700c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients. 65800c1efc1SGreg Tucker * @requires SSE4.1 65900c1efc1SGreg Tucker * 66000c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 66100c1efc1SGreg Tucker * @param vlen Number of vector sources. 66200c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 66300c1efc1SGreg Tucker * based on the array of input coefficients. 66400c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 66500c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 66600c1efc1SGreg Tucker * @returns none 66700c1efc1SGreg Tucker */ 66800c1efc1SGreg Tucker 669*fa5b8bafSMarcel Cornu void 670*fa5b8bafSMarcel Cornu gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, 671*fa5b8bafSMarcel Cornu unsigned char **dest); 67200c1efc1SGreg Tucker 67300c1efc1SGreg Tucker /** 67400c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs. 67500c1efc1SGreg Tucker * 676eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six 67700c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant 67800c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 67900c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 68000c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients. 68100c1efc1SGreg Tucker * @requires AVX 68200c1efc1SGreg Tucker * 68300c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 16. 68400c1efc1SGreg Tucker * @param vlen Number of vector sources. 68500c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 68600c1efc1SGreg Tucker * based on the array of input coefficients. 68700c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 68800c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 68900c1efc1SGreg Tucker * @returns none 69000c1efc1SGreg Tucker */ 69100c1efc1SGreg Tucker 692*fa5b8bafSMarcel Cornu void 693*fa5b8bafSMarcel Cornu gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, 694*fa5b8bafSMarcel Cornu unsigned char **dest); 69500c1efc1SGreg Tucker 69600c1efc1SGreg Tucker /** 69700c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs. 69800c1efc1SGreg Tucker * 699eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six 70000c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant 70100c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for 70200c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a 70300c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients. 70400c1efc1SGreg Tucker * @requires AVX2 70500c1efc1SGreg Tucker * 70600c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 70700c1efc1SGreg Tucker * @param vlen Number of vector sources. 70800c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants 70900c1efc1SGreg Tucker * based on the array of input coefficients. 71000c1efc1SGreg Tucker * @param src Array of pointers to source inputs. 71100c1efc1SGreg Tucker * @param dest Array of pointers to destination data buffers. 71200c1efc1SGreg Tucker * @returns none 71300c1efc1SGreg Tucker */ 71400c1efc1SGreg Tucker 715*fa5b8bafSMarcel Cornu void 716*fa5b8bafSMarcel Cornu gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, 717*fa5b8bafSMarcel Cornu unsigned char **dest); 71800c1efc1SGreg Tucker 71900c1efc1SGreg Tucker /** 72000c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version. 72100c1efc1SGreg Tucker * 72200c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters. 72300c1efc1SGreg Tucker * @requires SSE4.1 72400c1efc1SGreg Tucker */ 72500c1efc1SGreg Tucker 726*fa5b8bafSMarcel Cornu void 727*fa5b8bafSMarcel Cornu gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 72800c1efc1SGreg Tucker unsigned char *dest); 72900c1efc1SGreg Tucker /** 73000c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version. 73100c1efc1SGreg Tucker * 73200c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters. 73300c1efc1SGreg Tucker * @requires AVX 73400c1efc1SGreg Tucker */ 73500c1efc1SGreg Tucker 736*fa5b8bafSMarcel Cornu void 737*fa5b8bafSMarcel Cornu gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 73800c1efc1SGreg Tucker unsigned char *dest); 73900c1efc1SGreg Tucker 74000c1efc1SGreg Tucker /** 74100c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version. 74200c1efc1SGreg Tucker * 74300c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters. 74400c1efc1SGreg Tucker * @requires AVX2 74500c1efc1SGreg Tucker */ 74600c1efc1SGreg Tucker 747*fa5b8bafSMarcel Cornu void 748*fa5b8bafSMarcel Cornu gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 74900c1efc1SGreg Tucker unsigned char *dest); 75000c1efc1SGreg Tucker 75100c1efc1SGreg Tucker /** 75200c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. 75300c1efc1SGreg Tucker * 75400c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded 75500c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding 75600c1efc1SGreg Tucker * encode and decode update when only one source is available at a 75700c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based 75800c1efc1SGreg Tucker * on the input coefficients. 75900c1efc1SGreg Tucker * @requires SSE4.1 76000c1efc1SGreg Tucker * 76100c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 76200c1efc1SGreg Tucker * @param vec The number of vector sources or rows in the generator matrix 76300c1efc1SGreg Tucker * for coding. 76400c1efc1SGreg Tucker * @param vec_i The vector index corresponding to the single input source. 76500c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding 76600c1efc1SGreg Tucker * coefficients in ec_init_tables(). Must be of size 32*vec. 76700c1efc1SGreg Tucker * @param src Pointer to source input array. 76800c1efc1SGreg Tucker * @param dest Array of pointers to destination input/outputs. 76900c1efc1SGreg Tucker * @returns none 77000c1efc1SGreg Tucker */ 77100c1efc1SGreg Tucker 772*fa5b8bafSMarcel Cornu void 773*fa5b8bafSMarcel Cornu gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 77400c1efc1SGreg Tucker unsigned char **dest); 77500c1efc1SGreg Tucker 77600c1efc1SGreg Tucker /** 77700c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). 77800c1efc1SGreg Tucker * @requires AVX 77900c1efc1SGreg Tucker */ 780*fa5b8bafSMarcel Cornu void 781*fa5b8bafSMarcel Cornu gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 78200c1efc1SGreg Tucker unsigned char **dest); 78300c1efc1SGreg Tucker /** 78400c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). 78500c1efc1SGreg Tucker * @requires AVX2 78600c1efc1SGreg Tucker */ 787*fa5b8bafSMarcel Cornu void 788*fa5b8bafSMarcel Cornu gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 78900c1efc1SGreg Tucker unsigned char **dest); 79000c1efc1SGreg Tucker 79100c1efc1SGreg Tucker /** 79200c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. 79300c1efc1SGreg Tucker * 79400c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded 79500c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding 79600c1efc1SGreg Tucker * encode and decode update when only one source is available at a 79700c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based 79800c1efc1SGreg Tucker * on the input coefficients. 79900c1efc1SGreg Tucker * @requires SSE4.1 80000c1efc1SGreg Tucker * 80100c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 80200c1efc1SGreg Tucker * @param vec The number of vector sources or rows in the generator matrix 80300c1efc1SGreg Tucker * for coding. 80400c1efc1SGreg Tucker * @param vec_i The vector index corresponding to the single input source. 80500c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding 80600c1efc1SGreg Tucker * coefficients in ec_init_tables(). Must be of size 32*vec. 80700c1efc1SGreg Tucker * @param src Pointer to source input array. 80800c1efc1SGreg Tucker * @param dest Array of pointers to destination input/outputs. 80900c1efc1SGreg Tucker * @returns none 81000c1efc1SGreg Tucker */ 81100c1efc1SGreg Tucker 812*fa5b8bafSMarcel Cornu void 813*fa5b8bafSMarcel Cornu gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 81400c1efc1SGreg Tucker unsigned char **dest); 81500c1efc1SGreg Tucker 81600c1efc1SGreg Tucker /** 81700c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). 81800c1efc1SGreg Tucker * @requires AVX 81900c1efc1SGreg Tucker */ 820*fa5b8bafSMarcel Cornu void 821*fa5b8bafSMarcel Cornu gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 82200c1efc1SGreg Tucker unsigned char **dest); 82300c1efc1SGreg Tucker 82400c1efc1SGreg Tucker /** 82500c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). 82600c1efc1SGreg Tucker * @requires AVX2 82700c1efc1SGreg Tucker */ 828*fa5b8bafSMarcel Cornu void 829*fa5b8bafSMarcel Cornu gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 83000c1efc1SGreg Tucker unsigned char **dest); 83100c1efc1SGreg Tucker 83200c1efc1SGreg Tucker /** 83300c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. 83400c1efc1SGreg Tucker * 83500c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded 83600c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding 83700c1efc1SGreg Tucker * encode and decode update when only one source is available at a 83800c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based 83900c1efc1SGreg Tucker * on the input coefficients. 84000c1efc1SGreg Tucker * @requires SSE4.1 84100c1efc1SGreg Tucker * 84200c1efc1SGreg Tucker * @param len Length of each vector in bytes. Must be >= 32. 84300c1efc1SGreg Tucker * @param vec The number of vector sources or rows in the generator matrix 84400c1efc1SGreg Tucker * for coding. 84500c1efc1SGreg Tucker * @param vec_i The vector index corresponding to the single input source. 84600c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding 84700c1efc1SGreg Tucker * coefficients in ec_init_tables(). Must be of size 32*vec. 84800c1efc1SGreg Tucker * @param src Pointer to source input array. 84900c1efc1SGreg Tucker * @param dest Array of pointers to destination input/outputs. 85000c1efc1SGreg Tucker * @returns none 85100c1efc1SGreg Tucker */ 85200c1efc1SGreg Tucker 853*fa5b8bafSMarcel Cornu void 854*fa5b8bafSMarcel Cornu gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 85500c1efc1SGreg Tucker unsigned char **dest); 85600c1efc1SGreg Tucker 85700c1efc1SGreg Tucker /** 85800c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). 85900c1efc1SGreg Tucker * @requires AVX 86000c1efc1SGreg Tucker */ 861*fa5b8bafSMarcel Cornu void 862*fa5b8bafSMarcel Cornu gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 86300c1efc1SGreg Tucker unsigned char **dest); 86400c1efc1SGreg Tucker /** 86500c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). 86600c1efc1SGreg Tucker * @requires AVX2 86700c1efc1SGreg Tucker */ 868*fa5b8bafSMarcel Cornu void 869*fa5b8bafSMarcel Cornu gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 87000c1efc1SGreg Tucker unsigned char **dest); 87100c1efc1SGreg Tucker 87200c1efc1SGreg Tucker /** 87300c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. 87400c1efc1SGreg Tucker * @requires SSE4.1 87500c1efc1SGreg Tucker */ 876*fa5b8bafSMarcel Cornu void 877*fa5b8bafSMarcel Cornu gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 87800c1efc1SGreg Tucker unsigned char **dest); 87900c1efc1SGreg Tucker 88000c1efc1SGreg Tucker /** 88100c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. 88200c1efc1SGreg Tucker * @requires AVX 88300c1efc1SGreg Tucker */ 884*fa5b8bafSMarcel Cornu void 885*fa5b8bafSMarcel Cornu gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 88600c1efc1SGreg Tucker unsigned char **dest); 88700c1efc1SGreg Tucker /** 88800c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. 88900c1efc1SGreg Tucker * @requires AVX2 89000c1efc1SGreg Tucker */ 891*fa5b8bafSMarcel Cornu void 892*fa5b8bafSMarcel Cornu gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 89300c1efc1SGreg Tucker unsigned char **dest); 89400c1efc1SGreg Tucker 89500c1efc1SGreg Tucker /** 89600c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. 89700c1efc1SGreg Tucker * @requires SSE4.1 89800c1efc1SGreg Tucker */ 899*fa5b8bafSMarcel Cornu void 900*fa5b8bafSMarcel Cornu gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 90100c1efc1SGreg Tucker unsigned char **dest); 90200c1efc1SGreg Tucker /** 90300c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. 90400c1efc1SGreg Tucker * @requires AVX 90500c1efc1SGreg Tucker */ 906*fa5b8bafSMarcel Cornu void 907*fa5b8bafSMarcel Cornu gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 90800c1efc1SGreg Tucker unsigned char **dest); 90900c1efc1SGreg Tucker 91000c1efc1SGreg Tucker /** 91100c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. 91200c1efc1SGreg Tucker * @requires AVX2 91300c1efc1SGreg Tucker */ 914*fa5b8bafSMarcel Cornu void 915*fa5b8bafSMarcel Cornu gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, 91600c1efc1SGreg Tucker unsigned char **dest); 91700c1efc1SGreg Tucker 91823937916SJohn Kariuki #endif 91900c1efc1SGreg Tucker 92000c1efc1SGreg Tucker /********************************************************************** 92100c1efc1SGreg Tucker * The remaining are lib support functions used in GF(2^8) operations. 92200c1efc1SGreg Tucker */ 92300c1efc1SGreg Tucker 92400c1efc1SGreg Tucker /** 92500c1efc1SGreg Tucker * @brief Single element GF(2^8) multiply. 92600c1efc1SGreg Tucker * 92700c1efc1SGreg Tucker * @param a Multiplicand a 92800c1efc1SGreg Tucker * @param b Multiplicand b 92900c1efc1SGreg Tucker * @returns Product of a and b in GF(2^8) 93000c1efc1SGreg Tucker */ 93100c1efc1SGreg Tucker 932*fa5b8bafSMarcel Cornu unsigned char 933*fa5b8bafSMarcel Cornu gf_mul(unsigned char a, unsigned char b); 93400c1efc1SGreg Tucker 93500c1efc1SGreg Tucker /** 93600c1efc1SGreg Tucker * @brief Single element GF(2^8) inverse. 93700c1efc1SGreg Tucker * 93800c1efc1SGreg Tucker * @param a Input element 93900c1efc1SGreg Tucker * @returns Field element b such that a x b = {1} 94000c1efc1SGreg Tucker */ 94100c1efc1SGreg Tucker 942*fa5b8bafSMarcel Cornu unsigned char 943*fa5b8bafSMarcel Cornu gf_inv(unsigned char a); 94400c1efc1SGreg Tucker 94500c1efc1SGreg Tucker /** 94600c1efc1SGreg Tucker * @brief Generate a matrix of coefficients to be used for encoding. 94700c1efc1SGreg Tucker * 94800c1efc1SGreg Tucker * Vandermonde matrix example of encoding coefficients where high portion of 94900c1efc1SGreg Tucker * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} 95000c1efc1SGreg Tucker * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in 95100c1efc1SGreg Tucker * erasure encoding but does not guarantee invertable for every sub matrix. For 95282a6ac65SRoy Oursler * large pairs of m and k it is possible to find cases where the decode matrix 95382a6ac65SRoy Oursler * chosen from sources and parity is not invertable. Users may want to adjust 95482a6ac65SRoy Oursler * for certain pairs m and k. If m and k satisfy one of the following 95582a6ac65SRoy Oursler * inequalities, no adjustment is required: 95682a6ac65SRoy Oursler * 957cad5e7d4SRoy Oursler * - k <= 3 958cad5e7d4SRoy Oursler * - k = 4, m <= 25 959cad5e7d4SRoy Oursler * - k = 5, m <= 10 960cad5e7d4SRoy Oursler * - k <= 21, m-k = 4 961cad5e7d4SRoy Oursler * - m - k <= 3. 96200c1efc1SGreg Tucker * 96300c1efc1SGreg Tucker * @param a [m x k] array to hold coefficients 96400c1efc1SGreg Tucker * @param m number of rows in matrix corresponding to srcs + parity. 96500c1efc1SGreg Tucker * @param k number of columns in matrix corresponding to srcs. 96600c1efc1SGreg Tucker * @returns none 96700c1efc1SGreg Tucker */ 96800c1efc1SGreg Tucker 969*fa5b8bafSMarcel Cornu void 970*fa5b8bafSMarcel Cornu gf_gen_rs_matrix(unsigned char *a, int m, int k); 97100c1efc1SGreg Tucker 97200c1efc1SGreg Tucker /** 97300c1efc1SGreg Tucker * @brief Generate a Cauchy matrix of coefficients to be used for encoding. 97400c1efc1SGreg Tucker * 97500c1efc1SGreg Tucker * Cauchy matrix example of encoding coefficients where high portion of matrix 97600c1efc1SGreg Tucker * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, 97700c1efc1SGreg Tucker * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. 97800c1efc1SGreg Tucker * 97900c1efc1SGreg Tucker * @param a [m x k] array to hold coefficients 98000c1efc1SGreg Tucker * @param m number of rows in matrix corresponding to srcs + parity. 98100c1efc1SGreg Tucker * @param k number of columns in matrix corresponding to srcs. 98200c1efc1SGreg Tucker * @returns none 98300c1efc1SGreg Tucker */ 98400c1efc1SGreg Tucker 985*fa5b8bafSMarcel Cornu void 986*fa5b8bafSMarcel Cornu gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); 98700c1efc1SGreg Tucker 98800c1efc1SGreg Tucker /** 98900c1efc1SGreg Tucker * @brief Invert a matrix in GF(2^8) 99000c1efc1SGreg Tucker * 991ad49e580SGreg Tucker * Attempts to construct an n x n inverse of the input matrix. Returns non-zero 992ad49e580SGreg Tucker * if singular. Will always destroy input matrix in process. 993ad49e580SGreg Tucker * 994ad49e580SGreg Tucker * @param in input matrix, destroyed by invert process 99500c1efc1SGreg Tucker * @param out output matrix such that [in] x [out] = [I] - identity matrix 99600c1efc1SGreg Tucker * @param n size of matrix [nxn] 99700c1efc1SGreg Tucker * @returns 0 successful, other fail on singular input matrix 99800c1efc1SGreg Tucker */ 99900c1efc1SGreg Tucker 1000*fa5b8bafSMarcel Cornu int 1001*fa5b8bafSMarcel Cornu gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); 100200c1efc1SGreg Tucker 100300c1efc1SGreg Tucker /*************************************************************/ 100400c1efc1SGreg Tucker 100500c1efc1SGreg Tucker #ifdef __cplusplus 100600c1efc1SGreg Tucker } 100700c1efc1SGreg Tucker #endif 100800c1efc1SGreg Tucker 100900c1efc1SGreg Tucker #endif //_ERASURE_CODE_H_ 1010