isa-l/include/erasure_code.h

00c1efc1SGreg Tucker/**********************************************************************
00c1efc1SGreg Tucker  Copyright(c) 2011-2015 Intel Corporation All rights reserved.
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker  Redistribution and use in source and binary forms, with or without
00c1efc1SGreg Tucker  modification, are permitted provided that the following conditions
00c1efc1SGreg Tucker  are met:
00c1efc1SGreg Tucker    * Redistributions of source code must retain the above copyright
00c1efc1SGreg Tucker      notice, this list of conditions and the following disclaimer.
00c1efc1SGreg Tucker    * Redistributions in binary form must reproduce the above copyright
00c1efc1SGreg Tucker      notice, this list of conditions and the following disclaimer in
00c1efc1SGreg Tucker      the documentation and/or other materials provided with the
00c1efc1SGreg Tucker      distribution.
00c1efc1SGreg Tucker    * Neither the name of Intel Corporation nor the names of its
00c1efc1SGreg Tucker      contributors may be used to endorse or promote products derived
00c1efc1SGreg Tucker      from this software without specific prior written permission.
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00c1efc1SGreg Tucker  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00c1efc1SGreg Tucker  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00c1efc1SGreg Tucker  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00c1efc1SGreg Tucker  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00c1efc1SGreg Tucker  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00c1efc1SGreg Tucker  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00c1efc1SGreg Tucker  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00c1efc1SGreg Tucker  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00c1efc1SGreg Tucker  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00c1efc1SGreg Tucker  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00c1efc1SGreg Tucker**********************************************************************/
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker#ifndef _ERASURE_CODE_H_
00c1efc1SGreg Tucker#define _ERASURE_CODE_H_
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker *  @file erasure_code.h
00c1efc1SGreg Tucker *  @brief Interface to functions supporting erasure code encode and decode.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker *  This file defines the interface to optimized functions used in erasure
00c1efc1SGreg Tucker *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
00c1efc1SGreg Tucker *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
00c1efc1SGreg Tucker *  set of coefficients.  Values for the coefficients are determined by the type
00c1efc1SGreg Tucker *  of erasure code.  Using a general dot product means that any sequence of
00c1efc1SGreg Tucker *  coefficients may be used including erasure codes based on random
00c1efc1SGreg Tucker *  coefficients.
00c1efc1SGreg Tucker *  Multiple versions of dot product are supplied to calculate 1-6 output
00c1efc1SGreg Tucker *  vectors in one pass.
00c1efc1SGreg Tucker *  Base GF multiply and divide functions can be sped up by defining
00c1efc1SGreg Tucker *  GF_LARGE_TABLES at the expense of memory size.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker#include "gf_vect_mul.h"
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker#ifdef __cplusplus
00c1efc1SGreg Tuckerextern "C" {
00c1efc1SGreg Tucker#endif
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Initialize tables for fast Erasure Code encode and decode.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Generates the expanded tables needed for fast encode or decode for erasure
00c1efc1SGreg Tucker * codes on blocks of data.  32bytes is generated for each input coefficient.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param k      The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker *               for coding.
00c1efc1SGreg Tucker * @param rows   The number of output vectors to concurrently encode/decode.
00c1efc1SGreg Tucker * @param a      Pointer to sets of arrays of input coefficients used to encode
00c1efc1SGreg Tucker *               or decode data.
00c1efc1SGreg Tucker * @param gftbls Pointer to start of space for concatenated output tables
00c1efc1SGreg Tucker *               generated from input coefficients.  Must be of size 32*k*rows.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
f971f023SPablo de Lara * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version.
f971f023SPablo de Lara *
f971f023SPablo de Lara * Baseline version of ec_encode_data() with same parameters.
f971f023SPablo de Lara */
f971f023SPablo de Lara
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls);
f971f023SPablo de Lara
f971f023SPablo de Lara/**
00c1efc1SGreg Tucker * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Given a list of source data blocks, generate one or multiple blocks of
00c1efc1SGreg Tucker * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
00c1efc1SGreg Tucker * suitable set of coefficients, this function will perform the fast generation
00c1efc1SGreg Tucker * or decoding of Reed-Solomon type erasure codes.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * This function determines what instruction sets are enabled and
00c1efc1SGreg Tucker * selects the appropriate version at runtime.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each block of data (vector) of source or dest data.
00c1efc1SGreg Tucker * @param k      The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker * 		 for coding.
00c1efc1SGreg Tucker * @param rows   The number of output vectors to concurrently encode/decode.
00c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding
00c1efc1SGreg Tucker * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
00c1efc1SGreg Tucker * @param data   Array of pointers to source input buffers.
00c1efc1SGreg Tucker * @param coding Array of pointers to coded output buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
00c1efc1SGreg Tucker               unsigned char **coding);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Baseline version of ec_encode_data() with same parameters.
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
00c1efc1SGreg Tucker                    unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
*fa5b8bafSMarcel Cornu * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate
*fa5b8bafSMarcel Cornu * version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Given one source data block, update one or multiple blocks of encoded data as
00c1efc1SGreg Tucker * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
00c1efc1SGreg Tucker * coefficients, this function will perform the fast generation or decoding of
00c1efc1SGreg Tucker * Reed-Solomon type erasure codes from one input source at a time.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * This function determines what instruction sets are enabled and selects the
00c1efc1SGreg Tucker * appropriate version at runtime.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each block of data (vector) of source or dest data.
00c1efc1SGreg Tucker * @param k      The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker * 		 for coding.
00c1efc1SGreg Tucker * @param rows   The number of output vectors to concurrently encode/decode.
00c1efc1SGreg Tucker * @param vec_i  The vector index corresponding to the single input source.
00c1efc1SGreg Tucker * @param g_tbls Pointer to array of input tables generated from coding
00c1efc1SGreg Tucker * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
00c1efc1SGreg Tucker * @param data   Pointer to single input source used to update output parity.
00c1efc1SGreg Tucker * @param coding Array of pointers to coded output buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
00c1efc1SGreg Tucker                      unsigned char *data, unsigned char **coding);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source.
00c1efc1SGreg Tucker *
23937916SJohn Kariuki * Baseline version of ec_encode_data_update().
23937916SJohn Kariuki */
23937916SJohn Kariuki
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
23937916SJohn Kariuki                           unsigned char *data, unsigned char **dest);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief GF(2^8) vector dot product, runs baseline version.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Does a GF(2^8) dot product across each byte of the input array and a constant
23937916SJohn Kariuki * set of coefficients to produce each byte of the output. Can be used for
23937916SJohn Kariuki * erasure coding encode and decode. Function requires pre-calculation of a
23937916SJohn Kariuki * 32*vlen byte constant array based on the input coefficients.
23937916SJohn Kariuki *
23937916SJohn Kariuki * @param len    Length of each vector in bytes. Must be >= 16.
23937916SJohn Kariuki * @param vlen   Number of vector sources.
23937916SJohn Kariuki * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
23937916SJohn Kariuki *               on the array of input coefficients. Only elements 32*CONST*j + 1
23937916SJohn Kariuki *               of this array are used, where j = (0, 1, 2...) and CONST is the
23937916SJohn Kariuki *               number of elements in the array of input coefficients. The
23937916SJohn Kariuki *               elements used correspond to the original input coefficients.
23937916SJohn Kariuki * @param src    Array of pointers to source inputs.
23937916SJohn Kariuki * @param dest   Pointer to destination data array.
23937916SJohn Kariuki * @returns none
23937916SJohn Kariuki */
23937916SJohn Kariuki
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char *dest);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief GF(2^8) vector dot product, runs appropriate version.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Does a GF(2^8) dot product across each byte of the input array and a constant
23937916SJohn Kariuki * set of coefficients to produce each byte of the output. Can be used for
23937916SJohn Kariuki * erasure coding encode and decode. Function requires pre-calculation of a
23937916SJohn Kariuki * 32*vlen byte constant array based on the input coefficients.
23937916SJohn Kariuki *
23937916SJohn Kariuki * This function determines what instruction sets are enabled and
23937916SJohn Kariuki * selects the appropriate version at runtime.
23937916SJohn Kariuki *
23937916SJohn Kariuki * @param len    Length of each vector in bytes. Must be >= 32.
23937916SJohn Kariuki * @param vlen   Number of vector sources.
23937916SJohn Kariuki * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
23937916SJohn Kariuki *               on the array of input coefficients.
23937916SJohn Kariuki * @param src    Array of pointers to source inputs.
23937916SJohn Kariuki * @param dest   Pointer to destination data array.
23937916SJohn Kariuki * @returns none
23937916SJohn Kariuki */
23937916SJohn Kariuki
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                 unsigned char *dest);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Does a GF(2^8) multiply across each byte of input source with expanded
23937916SJohn Kariuki * constant and add to destination array. Can be used for erasure coding encode
23937916SJohn Kariuki * and decode update when only one source is available at a time. Function
23937916SJohn Kariuki * requires pre-calculation of a 32*vec byte constant array based on the input
23937916SJohn Kariuki * coefficients.
23937916SJohn Kariuki *
23937916SJohn Kariuki * This function determines what instruction sets are enabled and selects the
23937916SJohn Kariuki * appropriate version at runtime.
23937916SJohn Kariuki *
4ac0e435SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 64.
23937916SJohn Kariuki * @param vec    The number of vector sources or rows in the generator matrix
23937916SJohn Kariuki * 		 for coding.
23937916SJohn Kariuki * @param vec_i  The vector index corresponding to the single input source.
23937916SJohn Kariuki * @param gftbls Pointer to array of input tables generated from coding
23937916SJohn Kariuki * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
23937916SJohn Kariuki * @param src    Array of pointers to source inputs.
23937916SJohn Kariuki * @param dest   Pointer to destination data array.
23937916SJohn Kariuki * @returns none
23937916SJohn Kariuki */
23937916SJohn Kariuki
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
23937916SJohn Kariuki            unsigned char *dest);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief GF(2^8) vector multiply accumulate, baseline version.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Baseline version of gf_vect_mad() with same parameters.
23937916SJohn Kariuki */
23937916SJohn Kariuki
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
23937916SJohn Kariuki                 unsigned char *dest);
23937916SJohn Kariuki
23937916SJohn Kariuki// x86 only
23937916SJohn Kariuki#if defined(__i386__) || defined(__x86_64__)
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters.
23937916SJohn Kariuki * @requires SSE4.1
23937916SJohn Kariuki */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
23937916SJohn Kariuki                   unsigned char **coding);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters.
23937916SJohn Kariuki * @requires AVX
23937916SJohn Kariuki */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
23937916SJohn Kariuki                   unsigned char **coding);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief Generate or decode erasure codes on blocks of data.
23937916SJohn Kariuki *
23937916SJohn Kariuki * Arch specific version of ec_encode_data() with same parameters.
23937916SJohn Kariuki * @requires AVX2
23937916SJohn Kariuki */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
23937916SJohn Kariuki                    unsigned char **coding);
23937916SJohn Kariuki
23937916SJohn Kariuki/**
23937916SJohn Kariuki * @brief Generate update for encode or decode of erasure codes from single source.
23937916SJohn Kariuki *
00c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
00c1efc1SGreg Tucker                          unsigned char *data, unsigned char **coding);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
00c1efc1SGreg Tucker                          unsigned char *data, unsigned char **coding);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate update for encode or decode of erasure codes from single source.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Arch specific version of ec_encode_data_update() with same parameters.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornuec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
00c1efc1SGreg Tucker                           unsigned char *data, unsigned char **coding);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant
00c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
00c1efc1SGreg Tucker *               on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Pointer to destination data array.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                     unsigned char *dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant
00c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
00c1efc1SGreg Tucker *               on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Pointer to destination data array.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                     unsigned char *dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) dot product across each byte of the input array and a constant
00c1efc1SGreg Tucker * set of coefficients to produce each byte of the output. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 32*vlen byte constant array based on the input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
00c1efc1SGreg Tucker *               on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Pointer to destination data array.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char *dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with two outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate two outputs at a time. Does two
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and two constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 2*32*vlen byte constant array based on the two sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                       unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with three outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate three outputs at a time. Does three
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and three constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 3*32*vlen byte constant array based on the three sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                       unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with four outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate four outputs at a time. Does four
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and four constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 4*32*vlen byte constant array based on the four sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                       unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with five outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate five outputs at a time. Does five
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and five constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 5*32*vlen byte constant array based on the five sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                       unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 16.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                      unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector dot product with six outputs.
00c1efc1SGreg Tucker *
eaa1c18aSGreg Tucker * Vector dot product optimized to calculate six outputs at a time. Does six
00c1efc1SGreg Tucker * GF(2^8) dot products across each byte of the input array and six constant
00c1efc1SGreg Tucker * sets of coefficients to produce each byte of the outputs. Can be used for
00c1efc1SGreg Tucker * erasure coding encode and decode. Function requires pre-calculation of a
00c1efc1SGreg Tucker * 6*32*vlen byte constant array based on the six sets of input coefficients.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vlen   Number of vector sources.
00c1efc1SGreg Tucker * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
00c1efc1SGreg Tucker *               based on the array of input coefficients.
00c1efc1SGreg Tucker * @param src    Array of pointers to source inputs.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination data buffers.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
*fa5b8bafSMarcel Cornu                       unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                unsigned char *dest);
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                unsigned char *dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply accumulate, arch specific version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Arch specific version of gf_vect_mad() with same parameters.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char *dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded
00c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding
00c1efc1SGreg Tucker * encode and decode update when only one source is available at a
00c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based
00c1efc1SGreg Tucker * on the input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vec    The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker * 		 for coding.
00c1efc1SGreg Tucker * @param vec_i  The vector index corresponding to the single input source.
00c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding
00c1efc1SGreg Tucker * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
00c1efc1SGreg Tucker * @param src    Pointer to source input array.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination input/outputs.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                  unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded
00c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding
00c1efc1SGreg Tucker * encode and decode update when only one source is available at a
00c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based
00c1efc1SGreg Tucker * on the input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vec    The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker * 		 for coding.
00c1efc1SGreg Tucker * @param vec_i  The vector index corresponding to the single input source.
00c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding
00c1efc1SGreg Tucker * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
00c1efc1SGreg Tucker * @param src    Pointer to source input array.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination input/outputs.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                  unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Does a GF(2^8) multiply across each byte of input source with expanded
00c1efc1SGreg Tucker * constants and add to destination arrays. Can be used for erasure coding
00c1efc1SGreg Tucker * encode and decode update when only one source is available at a
00c1efc1SGreg Tucker * time. Function requires pre-calculation of a 32*vec byte constant array based
00c1efc1SGreg Tucker * on the input coefficients.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param len    Length of each vector in bytes. Must be >= 32.
00c1efc1SGreg Tucker * @param vec    The number of vector sources or rows in the generator matrix
00c1efc1SGreg Tucker * 		 for coding.
00c1efc1SGreg Tucker * @param vec_i  The vector index corresponding to the single input source.
00c1efc1SGreg Tucker * @param gftbls Pointer to array of input tables generated from coding
00c1efc1SGreg Tucker * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
00c1efc1SGreg Tucker * @param src    Pointer to source input array.
00c1efc1SGreg Tucker * @param dest   Array of pointers to destination input/outputs.
00c1efc1SGreg Tucker * @returns none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                  unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                  unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
00c1efc1SGreg Tucker * @requires SSE4.1
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
00c1efc1SGreg Tucker * @requires AVX
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                 unsigned char **dest);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
00c1efc1SGreg Tucker * @requires AVX2
00c1efc1SGreg Tucker */
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
00c1efc1SGreg Tucker                  unsigned char **dest);
00c1efc1SGreg Tucker
23937916SJohn Kariuki#endif
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**********************************************************************
00c1efc1SGreg Tucker * The remaining are lib support functions used in GF(2^8) operations.
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Single element GF(2^8) multiply.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param a  Multiplicand a
00c1efc1SGreg Tucker * @param b  Multiplicand b
00c1efc1SGreg Tucker * @returns  Product of a and b in GF(2^8)
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuunsigned char
*fa5b8bafSMarcel Cornugf_mul(unsigned char a, unsigned char b);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Single element GF(2^8) inverse.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param a  Input element
00c1efc1SGreg Tucker * @returns  Field element b such that a x b = {1}
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuunsigned char
*fa5b8bafSMarcel Cornugf_inv(unsigned char a);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate a matrix of coefficients to be used for encoding.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Vandermonde matrix example of encoding coefficients where high portion of
00c1efc1SGreg Tucker * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
00c1efc1SGreg Tucker * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
00c1efc1SGreg Tucker * erasure encoding but does not guarantee invertable for every sub matrix. For
82a6ac65SRoy Oursler * large pairs of m and k it is possible to find cases where the decode matrix
82a6ac65SRoy Oursler * chosen from sources and parity is not invertable. Users may want to adjust
82a6ac65SRoy Oursler * for certain pairs m and k. If m and k satisfy one of the following
82a6ac65SRoy Oursler * inequalities, no adjustment is required:
82a6ac65SRoy Oursler *
cad5e7d4SRoy Oursler * - k <= 3
cad5e7d4SRoy Oursler * - k = 4, m <= 25
cad5e7d4SRoy Oursler * - k = 5, m <= 10
cad5e7d4SRoy Oursler * - k <= 21, m-k = 4
cad5e7d4SRoy Oursler * - m - k <= 3.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param a  [m x k] array to hold coefficients
00c1efc1SGreg Tucker * @param m  number of rows in matrix corresponding to srcs + parity.
00c1efc1SGreg Tucker * @param k  number of columns in matrix corresponding to srcs.
00c1efc1SGreg Tucker * @returns  none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_gen_rs_matrix(unsigned char *a, int m, int k);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * Cauchy matrix example of encoding coefficients where high portion of matrix
00c1efc1SGreg Tucker * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
00c1efc1SGreg Tucker * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
00c1efc1SGreg Tucker *
00c1efc1SGreg Tucker * @param a  [m x k] array to hold coefficients
00c1efc1SGreg Tucker * @param m  number of rows in matrix corresponding to srcs + parity.
00c1efc1SGreg Tucker * @param k  number of columns in matrix corresponding to srcs.
00c1efc1SGreg Tucker * @returns  none
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuvoid
*fa5b8bafSMarcel Cornugf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/**
00c1efc1SGreg Tucker * @brief Invert a matrix in GF(2^8)
00c1efc1SGreg Tucker *
ad49e580SGreg Tucker * Attempts to construct an n x n inverse of the input matrix. Returns non-zero
ad49e580SGreg Tucker * if singular. Will always destroy input matrix in process.
ad49e580SGreg Tucker *
ad49e580SGreg Tucker * @param in  input matrix, destroyed by invert process
00c1efc1SGreg Tucker * @param out output matrix such that [in] x [out] = [I] - identity matrix
00c1efc1SGreg Tucker * @param n   size of matrix [nxn]
00c1efc1SGreg Tucker * @returns 0 successful, other fail on singular input matrix
00c1efc1SGreg Tucker */
00c1efc1SGreg Tucker
*fa5b8bafSMarcel Cornuint
*fa5b8bafSMarcel Cornugf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker/*************************************************************/
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker#ifdef __cplusplus
00c1efc1SGreg Tucker}
00c1efc1SGreg Tucker#endif
00c1efc1SGreg Tucker
00c1efc1SGreg Tucker#endif //_ERASURE_CODE_H_