xref: /isa-l/examples/ec/ec_piggyback_example.c (revision 699bb5bd3fd0b25ac5f39b75d039ca40362f5051)
1aaeedf60SGreg Tucker /**********************************************************************
2aaeedf60SGreg Tucker   Copyright(c) 2011-2018 Intel Corporation All rights reserved.
3aaeedf60SGreg Tucker 
4aaeedf60SGreg Tucker   Redistribution and use in source and binary forms, with or without
5aaeedf60SGreg Tucker   modification, are permitted provided that the following conditions
6aaeedf60SGreg Tucker   are met:
7aaeedf60SGreg Tucker     * Redistributions of source code must retain the above copyright
8aaeedf60SGreg Tucker       notice, this list of conditions and the following disclaimer.
9aaeedf60SGreg Tucker     * Redistributions in binary form must reproduce the above copyright
10aaeedf60SGreg Tucker       notice, this list of conditions and the following disclaimer in
11aaeedf60SGreg Tucker       the documentation and/or other materials provided with the
12aaeedf60SGreg Tucker       distribution.
13aaeedf60SGreg Tucker     * Neither the name of Intel Corporation nor the names of its
14aaeedf60SGreg Tucker       contributors may be used to endorse or promote products derived
15aaeedf60SGreg Tucker       from this software without specific prior written permission.
16aaeedf60SGreg Tucker 
17aaeedf60SGreg Tucker   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18aaeedf60SGreg Tucker   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19aaeedf60SGreg Tucker   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20aaeedf60SGreg Tucker   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21aaeedf60SGreg Tucker   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22aaeedf60SGreg Tucker   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23aaeedf60SGreg Tucker   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24aaeedf60SGreg Tucker   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25aaeedf60SGreg Tucker   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26aaeedf60SGreg Tucker   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27aaeedf60SGreg Tucker   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28aaeedf60SGreg Tucker **********************************************************************/
29aaeedf60SGreg Tucker 
30aaeedf60SGreg Tucker #include <stdio.h>
31aaeedf60SGreg Tucker #include <stdlib.h>
32aaeedf60SGreg Tucker #include <string.h>
33aaeedf60SGreg Tucker #include <getopt.h>
34aaeedf60SGreg Tucker #include "erasure_code.h"	// use <isa-l.h> instead when linking against installed
35aaeedf60SGreg Tucker #include "test.h"
36aaeedf60SGreg Tucker 
37aaeedf60SGreg Tucker #define MMAX 255
38aaeedf60SGreg Tucker #define KMAX 255
39aaeedf60SGreg Tucker 
40aaeedf60SGreg Tucker typedef unsigned char u8;
41aaeedf60SGreg Tucker int verbose = 0;
42aaeedf60SGreg Tucker 
43aaeedf60SGreg Tucker int usage(void)
44aaeedf60SGreg Tucker {
45aaeedf60SGreg Tucker 	fprintf(stderr,
46aaeedf60SGreg Tucker 		"Usage: ec_piggyback_example [options]\n"
47aaeedf60SGreg Tucker 		"  -h        Help\n"
48aaeedf60SGreg Tucker 		"  -k <val>  Number of source fragments\n"
49aaeedf60SGreg Tucker 		"  -p <val>  Number of parity fragments\n"
50aaeedf60SGreg Tucker 		"  -l <val>  Length of fragments\n"
51aaeedf60SGreg Tucker 		"  -e <val>  Simulate erasure on frag index val. Zero based. Can be repeated.\n"
52aaeedf60SGreg Tucker 		"  -v        Verbose\n"
53aaeedf60SGreg Tucker 		"  -b        Run timed benchmark\n"
54aaeedf60SGreg Tucker 		"  -s        Toggle use of sparse matrix opt\n"
55aaeedf60SGreg Tucker 		"  -r <seed> Pick random (k, p) with seed\n");
56aaeedf60SGreg Tucker 	exit(0);
57aaeedf60SGreg Tucker }
58aaeedf60SGreg Tucker 
59aaeedf60SGreg Tucker // Cauchy-based matrix
60aaeedf60SGreg Tucker void gf_gen_full_pb_cauchy_matrix(u8 * a, int m, int k)
61aaeedf60SGreg Tucker {
62aaeedf60SGreg Tucker 	int i, j, p = m - k;
63aaeedf60SGreg Tucker 
64aaeedf60SGreg Tucker 	// Identity matrix in top k x k to indicate a symetric code
65aaeedf60SGreg Tucker 	memset(a, 0, k * m);
66aaeedf60SGreg Tucker 	for (i = 0; i < k; i++)
67aaeedf60SGreg Tucker 		a[k * i + i] = 1;
68aaeedf60SGreg Tucker 
69aaeedf60SGreg Tucker 	for (i = k; i < (k + p / 2); i++) {
70aaeedf60SGreg Tucker 		for (j = 0; j < k / 2; j++)
71aaeedf60SGreg Tucker 			a[k * i + j] = gf_inv(i ^ j);
72aaeedf60SGreg Tucker 		for (; j < k; j++)
73aaeedf60SGreg Tucker 			a[k * i + j] = 0;
74aaeedf60SGreg Tucker 	}
75aaeedf60SGreg Tucker 	for (; i < m; i++) {
76aaeedf60SGreg Tucker 		for (j = 0; j < k / 2; j++)
77aaeedf60SGreg Tucker 			a[k * i + j] = 0;
78aaeedf60SGreg Tucker 		for (; j < k; j++)
79aaeedf60SGreg Tucker 			a[k * i + j] = gf_inv((i - p / 2) ^ (j - k / 2));
80aaeedf60SGreg Tucker 	}
81aaeedf60SGreg Tucker 
82aaeedf60SGreg Tucker 	// Fill in mixture of B parity depending on a few localized A sources
83aaeedf60SGreg Tucker 	int r = 0, c = 0;
84aaeedf60SGreg Tucker 	int repeat_len = k / (p - 2);
85aaeedf60SGreg Tucker 	int parity_rows = p / 2;
86aaeedf60SGreg Tucker 
87aaeedf60SGreg Tucker 	for (i = 1 + k + parity_rows; i < m; i++, r++) {
88aaeedf60SGreg Tucker 		if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1))))
89aaeedf60SGreg Tucker 			repeat_len++;
90aaeedf60SGreg Tucker 
91aaeedf60SGreg Tucker 		for (j = 0; j < repeat_len; j++, c++)
92aaeedf60SGreg Tucker 			a[k * i + c] = gf_inv((k + 1) ^ c);
93aaeedf60SGreg Tucker 	}
94aaeedf60SGreg Tucker }
95aaeedf60SGreg Tucker 
96aaeedf60SGreg Tucker // Vandermonde based matrix - not recommended due to limits when invertable
97aaeedf60SGreg Tucker void gf_gen_full_pb_vand_matrix(u8 * a, int m, int k)
98aaeedf60SGreg Tucker {
99aaeedf60SGreg Tucker 	int i, j, p = m - k;
100aaeedf60SGreg Tucker 	unsigned char q, gen = 1;
101aaeedf60SGreg Tucker 
102aaeedf60SGreg Tucker 	// Identity matrix in top k x k to indicate a symetric code
103aaeedf60SGreg Tucker 	memset(a, 0, k * m);
104aaeedf60SGreg Tucker 	for (i = 0; i < k; i++)
105aaeedf60SGreg Tucker 		a[k * i + i] = 1;
106aaeedf60SGreg Tucker 
107aaeedf60SGreg Tucker 	for (i = k; i < (k + (p / 2)); i++) {
108aaeedf60SGreg Tucker 		q = 1;
109aaeedf60SGreg Tucker 		for (j = 0; j < k / 2; j++) {
110aaeedf60SGreg Tucker 			a[k * i + j] = q;
111aaeedf60SGreg Tucker 			q = gf_mul(q, gen);
112aaeedf60SGreg Tucker 		}
113aaeedf60SGreg Tucker 		for (; j < k; j++)
114aaeedf60SGreg Tucker 			a[k * i + j] = 0;
115aaeedf60SGreg Tucker 		gen = gf_mul(gen, 2);
116aaeedf60SGreg Tucker 	}
117aaeedf60SGreg Tucker 	gen = 1;
118aaeedf60SGreg Tucker 	for (; i < m; i++) {
119aaeedf60SGreg Tucker 		q = 1;
120aaeedf60SGreg Tucker 		for (j = 0; j < k / 2; j++) {
121aaeedf60SGreg Tucker 			a[k * i + j] = 0;
122aaeedf60SGreg Tucker 		}
123aaeedf60SGreg Tucker 		for (; j < k; j++) {
124aaeedf60SGreg Tucker 			a[k * i + j] = q;
125aaeedf60SGreg Tucker 			q = gf_mul(q, gen);
126aaeedf60SGreg Tucker 		}
127aaeedf60SGreg Tucker 		gen = gf_mul(gen, 2);
128aaeedf60SGreg Tucker 	}
129aaeedf60SGreg Tucker 
130aaeedf60SGreg Tucker 	// Fill in mixture of B parity depending on a few localized A sources
131aaeedf60SGreg Tucker 	int r = 0, c = 0;
132aaeedf60SGreg Tucker 	int repeat_len = k / (p - 2);
133aaeedf60SGreg Tucker 	int parity_rows = p / 2;
134aaeedf60SGreg Tucker 
135aaeedf60SGreg Tucker 	for (i = 1 + k + parity_rows; i < m; i++, r++) {
136aaeedf60SGreg Tucker 		if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1))))
137aaeedf60SGreg Tucker 			repeat_len++;
138aaeedf60SGreg Tucker 
139aaeedf60SGreg Tucker 		for (j = 0; j < repeat_len; j++)
140aaeedf60SGreg Tucker 			a[k * i + c++] = 1;
141aaeedf60SGreg Tucker 	}
142aaeedf60SGreg Tucker }
143aaeedf60SGreg Tucker 
144aaeedf60SGreg Tucker void print_matrix(int m, int k, unsigned char *s, const char *msg)
145aaeedf60SGreg Tucker {
146aaeedf60SGreg Tucker 	int i, j;
147aaeedf60SGreg Tucker 
148aaeedf60SGreg Tucker 	printf("%s:\n", msg);
149aaeedf60SGreg Tucker 	for (i = 0; i < m; i++) {
150aaeedf60SGreg Tucker 		printf("%3d- ", i);
151aaeedf60SGreg Tucker 		for (j = 0; j < k; j++) {
152aaeedf60SGreg Tucker 			printf(" %2x", 0xff & s[j + (i * k)]);
153aaeedf60SGreg Tucker 		}
154aaeedf60SGreg Tucker 		printf("\n");
155aaeedf60SGreg Tucker 	}
156aaeedf60SGreg Tucker 	printf("\n");
157aaeedf60SGreg Tucker }
158aaeedf60SGreg Tucker 
159aaeedf60SGreg Tucker void print_list(int n, unsigned char *s, const char *msg)
160aaeedf60SGreg Tucker {
161aaeedf60SGreg Tucker 	int i;
162aaeedf60SGreg Tucker 	if (!verbose)
163aaeedf60SGreg Tucker 		return;
164aaeedf60SGreg Tucker 
165aaeedf60SGreg Tucker 	printf("%s: ", msg);
166aaeedf60SGreg Tucker 	for (i = 0; i < n; i++)
167aaeedf60SGreg Tucker 		printf(" %d", s[i]);
168aaeedf60SGreg Tucker 	printf("\n");
169aaeedf60SGreg Tucker }
170aaeedf60SGreg Tucker 
171aaeedf60SGreg Tucker static int gf_gen_decode_matrix(u8 * encode_matrix,
172aaeedf60SGreg Tucker 				u8 * decode_matrix,
173aaeedf60SGreg Tucker 				u8 * invert_matrix,
174aaeedf60SGreg Tucker 				u8 * temp_matrix,
175aaeedf60SGreg Tucker 				u8 * decode_index,
176aaeedf60SGreg Tucker 				u8 * frag_err_list, int nerrs, int k, int m);
177aaeedf60SGreg Tucker 
178aaeedf60SGreg Tucker int main(int argc, char *argv[])
179aaeedf60SGreg Tucker {
180aaeedf60SGreg Tucker 	int i, j, m, c, e, ret;
181aaeedf60SGreg Tucker 	int k = 10, p = 4, len = 8 * 1024;	// Default params
182aaeedf60SGreg Tucker 	int nerrs = 0;
183aaeedf60SGreg Tucker 	int benchmark = 0;
184aaeedf60SGreg Tucker 	int sparse_matrix_opt = 1;
185aaeedf60SGreg Tucker 
186aaeedf60SGreg Tucker 	// Fragment buffer pointers
187aaeedf60SGreg Tucker 	u8 *frag_ptrs[MMAX];
188aaeedf60SGreg Tucker 	u8 *parity_ptrs[KMAX];
189aaeedf60SGreg Tucker 	u8 *recover_srcs[KMAX];
190aaeedf60SGreg Tucker 	u8 *recover_outp[KMAX];
191aaeedf60SGreg Tucker 	u8 frag_err_list[MMAX];
192aaeedf60SGreg Tucker 
193aaeedf60SGreg Tucker 	// Coefficient matrices
194aaeedf60SGreg Tucker 	u8 *encode_matrix, *decode_matrix;
195aaeedf60SGreg Tucker 	u8 *invert_matrix, *temp_matrix;
196aaeedf60SGreg Tucker 	u8 *g_tbls;
197aaeedf60SGreg Tucker 	u8 decode_index[MMAX];
198aaeedf60SGreg Tucker 
199aaeedf60SGreg Tucker 	if (argc == 1)
200aaeedf60SGreg Tucker 		for (i = 0; i < p; i++)
201aaeedf60SGreg Tucker 			frag_err_list[nerrs++] = rand() % (k + p);
202aaeedf60SGreg Tucker 
203aaeedf60SGreg Tucker 	while ((c = getopt(argc, argv, "k:p:l:e:r:hvbs")) != -1) {
204aaeedf60SGreg Tucker 		switch (c) {
205aaeedf60SGreg Tucker 		case 'k':
206aaeedf60SGreg Tucker 			k = atoi(optarg);
207aaeedf60SGreg Tucker 			break;
208aaeedf60SGreg Tucker 		case 'p':
209aaeedf60SGreg Tucker 			p = atoi(optarg);
210aaeedf60SGreg Tucker 			break;
211aaeedf60SGreg Tucker 		case 'l':
212aaeedf60SGreg Tucker 			len = atoi(optarg);
213aaeedf60SGreg Tucker 			if (len < 0)
214aaeedf60SGreg Tucker 				usage();
215aaeedf60SGreg Tucker 			break;
216aaeedf60SGreg Tucker 		case 'e':
217aaeedf60SGreg Tucker 			e = atoi(optarg);
218aaeedf60SGreg Tucker 			frag_err_list[nerrs++] = e;
219aaeedf60SGreg Tucker 			break;
220aaeedf60SGreg Tucker 		case 'r':
221aaeedf60SGreg Tucker 			srand(atoi(optarg));
222aaeedf60SGreg Tucker 			k = (rand() % MMAX) / 4;
223aaeedf60SGreg Tucker 			k = (k < 2) ? 2 : k;
224aaeedf60SGreg Tucker 			p = (rand() % (MMAX - k)) / 4;
225aaeedf60SGreg Tucker 			p = (p < 2) ? 2 : p;
226aaeedf60SGreg Tucker 			for (i = 0; i < k && nerrs < p; i++)
227aaeedf60SGreg Tucker 				if (rand() & 1)
228aaeedf60SGreg Tucker 					frag_err_list[nerrs++] = i;
229aaeedf60SGreg Tucker 			break;
230aaeedf60SGreg Tucker 		case 'v':
231aaeedf60SGreg Tucker 			verbose++;
232aaeedf60SGreg Tucker 			break;
233aaeedf60SGreg Tucker 		case 'b':
234aaeedf60SGreg Tucker 			benchmark = 1;
235aaeedf60SGreg Tucker 			break;
236aaeedf60SGreg Tucker 		case 's':
237aaeedf60SGreg Tucker 			sparse_matrix_opt = !sparse_matrix_opt;
238aaeedf60SGreg Tucker 			break;
239aaeedf60SGreg Tucker 		case 'h':
240aaeedf60SGreg Tucker 		default:
241aaeedf60SGreg Tucker 			usage();
242aaeedf60SGreg Tucker 			break;
243aaeedf60SGreg Tucker 		}
244aaeedf60SGreg Tucker 	}
245aaeedf60SGreg Tucker 	m = k + p;
246aaeedf60SGreg Tucker 
247aaeedf60SGreg Tucker 	// Check for valid parameters
248aaeedf60SGreg Tucker 	if (m > (MMAX / 2) || k > (KMAX / 2) || m < 0 || p < 2 || k < 1) {
249aaeedf60SGreg Tucker 		printf(" Input test parameter error m=%d, k=%d, p=%d, erasures=%d\n",
250aaeedf60SGreg Tucker 		       m, k, p, nerrs);
251aaeedf60SGreg Tucker 		usage();
252aaeedf60SGreg Tucker 	}
253aaeedf60SGreg Tucker 	if (nerrs > p) {
254aaeedf60SGreg Tucker 		printf(" Number of erasures chosen exceeds power of code erasures=%d p=%d\n",
255aaeedf60SGreg Tucker 		       nerrs, p);
256aaeedf60SGreg Tucker 	}
257aaeedf60SGreg Tucker 	for (i = 0; i < nerrs; i++) {
258aaeedf60SGreg Tucker 		if (frag_err_list[i] >= m)
259aaeedf60SGreg Tucker 			printf(" fragment %d not in range\n", frag_err_list[i]);
260aaeedf60SGreg Tucker 	}
261aaeedf60SGreg Tucker 
262aaeedf60SGreg Tucker 	printf("ec_piggyback_example:\n");
263aaeedf60SGreg Tucker 
264aaeedf60SGreg Tucker 	/*
265aaeedf60SGreg Tucker 	 * One simple way to implement piggyback codes is to keep a 2x wide matrix
266aaeedf60SGreg Tucker 	 * that covers the how each parity is related to both A and B sources.  This
267aaeedf60SGreg Tucker 	 * keeps it easy to generalize in parameters m,k and the resulting sparse
268aaeedf60SGreg Tucker 	 * matrix multiplication can be optimized by pre-removal of zero items.
269aaeedf60SGreg Tucker 	 */
270aaeedf60SGreg Tucker 
271aaeedf60SGreg Tucker 	int k2 = 2 * k;
272aaeedf60SGreg Tucker 	int p2 = 2 * p;
273aaeedf60SGreg Tucker 	int m2 = k2 + p2;
274aaeedf60SGreg Tucker 	int nerrs2 = nerrs;
275aaeedf60SGreg Tucker 
276aaeedf60SGreg Tucker 	encode_matrix = malloc(m2 * k2);
277aaeedf60SGreg Tucker 	decode_matrix = malloc(m2 * k2);
278aaeedf60SGreg Tucker 	invert_matrix = malloc(m2 * k2);
279aaeedf60SGreg Tucker 	temp_matrix = malloc(m2 * k2);
280aaeedf60SGreg Tucker 	g_tbls = malloc(k2 * p2 * 32);
281aaeedf60SGreg Tucker 
282aaeedf60SGreg Tucker 	if (encode_matrix == NULL || decode_matrix == NULL
283aaeedf60SGreg Tucker 	    || invert_matrix == NULL || temp_matrix == NULL || g_tbls == NULL) {
284aaeedf60SGreg Tucker 		printf("Test failure! Error with malloc\n");
285aaeedf60SGreg Tucker 		return -1;
286aaeedf60SGreg Tucker 	}
287aaeedf60SGreg Tucker 	// Allocate the src fragments
288aaeedf60SGreg Tucker 	for (i = 0; i < k; i++) {
289aaeedf60SGreg Tucker 		if (NULL == (frag_ptrs[i] = malloc(len))) {
290aaeedf60SGreg Tucker 			printf("alloc error: Fail\n");
291aaeedf60SGreg Tucker 			return -1;
292aaeedf60SGreg Tucker 		}
293aaeedf60SGreg Tucker 	}
294aaeedf60SGreg Tucker 	// Allocate the parity fragments
295aaeedf60SGreg Tucker 	for (i = 0; i < p2; i++) {
296aaeedf60SGreg Tucker 		if (NULL == (parity_ptrs[i] = malloc(len / 2))) {
297aaeedf60SGreg Tucker 			printf("alloc error: Fail\n");
298aaeedf60SGreg Tucker 			return -1;
299aaeedf60SGreg Tucker 		}
300aaeedf60SGreg Tucker 	}
301aaeedf60SGreg Tucker 
302aaeedf60SGreg Tucker 	// Allocate buffers for recovered data
303aaeedf60SGreg Tucker 	for (i = 0; i < p2; i++) {
304aaeedf60SGreg Tucker 		if (NULL == (recover_outp[i] = malloc(len / 2))) {
305aaeedf60SGreg Tucker 			printf("alloc error: Fail\n");
306aaeedf60SGreg Tucker 			return -1;
307aaeedf60SGreg Tucker 		}
308aaeedf60SGreg Tucker 	}
309aaeedf60SGreg Tucker 
310aaeedf60SGreg Tucker 	// Fill sources with random data
311aaeedf60SGreg Tucker 	for (i = 0; i < k; i++)
312aaeedf60SGreg Tucker 		for (j = 0; j < len; j++)
313aaeedf60SGreg Tucker 			frag_ptrs[i][j] = rand();
314aaeedf60SGreg Tucker 
315aaeedf60SGreg Tucker 	printf(" encode (m,k,p)=(%d,%d,%d) len=%d\n", m, k, p, len);
316aaeedf60SGreg Tucker 
317aaeedf60SGreg Tucker 	// Pick an encode matrix.
318aaeedf60SGreg Tucker 	gf_gen_full_pb_cauchy_matrix(encode_matrix, m2, k2);
319aaeedf60SGreg Tucker 
320aaeedf60SGreg Tucker 	if (verbose)
321aaeedf60SGreg Tucker 		print_matrix(m2, k2, encode_matrix, "encode matrix");
322aaeedf60SGreg Tucker 
323aaeedf60SGreg Tucker 	// Initialize g_tbls from encode matrix
324aaeedf60SGreg Tucker 	ec_init_tables(k2, p2, &encode_matrix[k2 * k2], g_tbls);
325aaeedf60SGreg Tucker 
326aaeedf60SGreg Tucker 	// Fold A and B into single list of fragments
327aaeedf60SGreg Tucker 	for (i = 0; i < k; i++)
328aaeedf60SGreg Tucker 		frag_ptrs[i + k] = &frag_ptrs[i][len / 2];
329aaeedf60SGreg Tucker 
330aaeedf60SGreg Tucker 	if (!sparse_matrix_opt) {
331aaeedf60SGreg Tucker 		// Standard encode using no assumptions on the encode matrix
332aaeedf60SGreg Tucker 
333aaeedf60SGreg Tucker 		// Generate EC parity blocks from sources
334aaeedf60SGreg Tucker 		ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs, parity_ptrs);
335aaeedf60SGreg Tucker 
336aaeedf60SGreg Tucker 		if (benchmark) {
337*699bb5bdSRoy Oursler 			struct perf start;
338*699bb5bdSRoy Oursler 			BENCHMARK(&start, BENCHMARK_TIME,
339aaeedf60SGreg Tucker 				  ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs,
340*699bb5bdSRoy Oursler 						 parity_ptrs));
341aaeedf60SGreg Tucker 			printf("ec_piggyback_encode_std: ");
342*699bb5bdSRoy Oursler 			perf_print(start, m2 * len / 2);
343aaeedf60SGreg Tucker 		}
344aaeedf60SGreg Tucker 	} else {
345aaeedf60SGreg Tucker 		// Sparse matrix optimization - use fact that input matrix is sparse
346aaeedf60SGreg Tucker 
347aaeedf60SGreg Tucker 		// Keep an encode matrix with some zero elements removed
348aaeedf60SGreg Tucker 		u8 *encode_matrix_faster, *g_tbls_faster;
349aaeedf60SGreg Tucker 		encode_matrix_faster = malloc(m * k);
350aaeedf60SGreg Tucker 		g_tbls_faster = malloc(k * p * 32);
351aaeedf60SGreg Tucker 		if (encode_matrix_faster == NULL || g_tbls_faster == NULL) {
352aaeedf60SGreg Tucker 			printf("Test failure! Error with malloc\n");
353aaeedf60SGreg Tucker 			return -1;
354aaeedf60SGreg Tucker 		}
355aaeedf60SGreg Tucker 
356aaeedf60SGreg Tucker 		/*
357aaeedf60SGreg Tucker 		 * Pack with only the part that we know are non-zero.  Alternatively
358aaeedf60SGreg Tucker 		 * we could search and keep track of non-zero elements but for
359aaeedf60SGreg Tucker 		 * simplicity we just skip the lower quadrant.
360aaeedf60SGreg Tucker 		 */
361aaeedf60SGreg Tucker 		for (i = k, j = k2; i < m; i++, j++)
362aaeedf60SGreg Tucker 			memcpy(&encode_matrix_faster[k * i], &encode_matrix[k2 * j], k);
363aaeedf60SGreg Tucker 
364aaeedf60SGreg Tucker 		if (verbose) {
365aaeedf60SGreg Tucker 			print_matrix(p, k, &encode_matrix_faster[k * k],
366aaeedf60SGreg Tucker 				     "encode via sparse-opt");
367aaeedf60SGreg Tucker 			print_matrix(p2 / 2, k2, &encode_matrix[(k2 + p2 / 2) * k2],
368aaeedf60SGreg Tucker 				     "encode via sparse-opt");
369aaeedf60SGreg Tucker 		}
370aaeedf60SGreg Tucker 		// Initialize g_tbls from encode matrix
371aaeedf60SGreg Tucker 		ec_init_tables(k, p, &encode_matrix_faster[k * k], g_tbls_faster);
372aaeedf60SGreg Tucker 
373aaeedf60SGreg Tucker 		// Generate EC parity blocks from sources
374aaeedf60SGreg Tucker 		ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs, parity_ptrs);
375aaeedf60SGreg Tucker 		ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32], frag_ptrs,
376aaeedf60SGreg Tucker 			       &parity_ptrs[p]);
377aaeedf60SGreg Tucker 
378aaeedf60SGreg Tucker 		if (benchmark) {
379*699bb5bdSRoy Oursler 			struct perf start;
380*699bb5bdSRoy Oursler 			BENCHMARK(&start, BENCHMARK_TIME,
381aaeedf60SGreg Tucker 				  ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs,
382aaeedf60SGreg Tucker 						 parity_ptrs);
383*699bb5bdSRoy Oursler 				  ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32],
384*699bb5bdSRoy Oursler 						 frag_ptrs, &parity_ptrs[p]));
385aaeedf60SGreg Tucker 			printf("ec_piggyback_encode_sparse: ");
386*699bb5bdSRoy Oursler 			perf_print(start, m2 * len / 2);
387aaeedf60SGreg Tucker 		}
388aaeedf60SGreg Tucker 	}
389aaeedf60SGreg Tucker 
390aaeedf60SGreg Tucker 	if (nerrs <= 0)
391aaeedf60SGreg Tucker 		return 0;
392aaeedf60SGreg Tucker 
393aaeedf60SGreg Tucker 	printf(" recover %d fragments\n", nerrs);
394aaeedf60SGreg Tucker 
395aaeedf60SGreg Tucker 	// Set frag pointers to correspond to parity
396aaeedf60SGreg Tucker 	for (i = k2; i < m2; i++)
397aaeedf60SGreg Tucker 		frag_ptrs[i] = parity_ptrs[i - k2];
398aaeedf60SGreg Tucker 
399aaeedf60SGreg Tucker 	print_list(nerrs2, frag_err_list, " frag err list");
400aaeedf60SGreg Tucker 
401aaeedf60SGreg Tucker 	// Find a decode matrix to regenerate all erasures from remaining frags
402aaeedf60SGreg Tucker 	ret = gf_gen_decode_matrix(encode_matrix, decode_matrix,
403aaeedf60SGreg Tucker 				   invert_matrix, temp_matrix, decode_index, frag_err_list,
404aaeedf60SGreg Tucker 				   nerrs2, k2, m2);
405aaeedf60SGreg Tucker 
406aaeedf60SGreg Tucker 	if (ret != 0) {
407aaeedf60SGreg Tucker 		printf("Fail on generate decode matrix\n");
408aaeedf60SGreg Tucker 		return -1;
409aaeedf60SGreg Tucker 	}
410aaeedf60SGreg Tucker 	// Pack recovery array pointers as list of valid fragments
411aaeedf60SGreg Tucker 	for (i = 0; i < k2; i++)
412aaeedf60SGreg Tucker 		if (decode_index[i] < k2)
413aaeedf60SGreg Tucker 			recover_srcs[i] = frag_ptrs[decode_index[i]];
414aaeedf60SGreg Tucker 		else
415aaeedf60SGreg Tucker 			recover_srcs[i] = parity_ptrs[decode_index[i] - k2];
416aaeedf60SGreg Tucker 
417aaeedf60SGreg Tucker 	print_list(k2, decode_index, " decode index");
418aaeedf60SGreg Tucker 
419aaeedf60SGreg Tucker 	// Recover data
420aaeedf60SGreg Tucker 	ec_init_tables(k2, nerrs2, decode_matrix, g_tbls);
421aaeedf60SGreg Tucker 	ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs, recover_outp);
422aaeedf60SGreg Tucker 
423aaeedf60SGreg Tucker 	if (benchmark) {
424*699bb5bdSRoy Oursler 		struct perf start;
425*699bb5bdSRoy Oursler 		BENCHMARK(&start, BENCHMARK_TIME,
426aaeedf60SGreg Tucker 			  ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs,
427*699bb5bdSRoy Oursler 					 recover_outp));
428aaeedf60SGreg Tucker 		printf("ec_piggyback_decode: ");
429*699bb5bdSRoy Oursler 		perf_print(start, (k2 + nerrs2) * len / 2);
430aaeedf60SGreg Tucker 	}
431aaeedf60SGreg Tucker 	// Check that recovered buffers are the same as original
432aaeedf60SGreg Tucker 	printf(" check recovery of block {");
433aaeedf60SGreg Tucker 	for (i = 0; i < nerrs2; i++) {
434aaeedf60SGreg Tucker 		printf(" %d", frag_err_list[i]);
435aaeedf60SGreg Tucker 		if (memcmp(recover_outp[i], frag_ptrs[frag_err_list[i]], len / 2)) {
436aaeedf60SGreg Tucker 			printf(" Fail erasure recovery %d, frag %d\n", i, frag_err_list[i]);
437aaeedf60SGreg Tucker 			return -1;
438aaeedf60SGreg Tucker 		}
439aaeedf60SGreg Tucker 	}
440aaeedf60SGreg Tucker 	printf(" } done all: Pass\n");
441aaeedf60SGreg Tucker 
442aaeedf60SGreg Tucker 	return 0;
443aaeedf60SGreg Tucker }
444aaeedf60SGreg Tucker 
445aaeedf60SGreg Tucker // Generate decode matrix from encode matrix and erasure list
446aaeedf60SGreg Tucker 
447aaeedf60SGreg Tucker static int gf_gen_decode_matrix(u8 * encode_matrix,
448aaeedf60SGreg Tucker 				u8 * decode_matrix,
449aaeedf60SGreg Tucker 				u8 * invert_matrix,
450aaeedf60SGreg Tucker 				u8 * temp_matrix,
451aaeedf60SGreg Tucker 				u8 * decode_index, u8 * frag_err_list, int nerrs, int k, int m)
452aaeedf60SGreg Tucker {
453aaeedf60SGreg Tucker 	int i, j, p, r;
454aaeedf60SGreg Tucker 	int nsrcerrs = 0;
455aaeedf60SGreg Tucker 	u8 s, *b = temp_matrix;
456aaeedf60SGreg Tucker 	u8 frag_in_err[MMAX];
457aaeedf60SGreg Tucker 
458aaeedf60SGreg Tucker 	memset(frag_in_err, 0, sizeof(frag_in_err));
459aaeedf60SGreg Tucker 
460aaeedf60SGreg Tucker 	// Order the fragments in erasure for easier sorting
461aaeedf60SGreg Tucker 	for (i = 0; i < nerrs; i++) {
462aaeedf60SGreg Tucker 		if (frag_err_list[i] < k)
463aaeedf60SGreg Tucker 			nsrcerrs++;
464aaeedf60SGreg Tucker 		frag_in_err[frag_err_list[i]] = 1;
465aaeedf60SGreg Tucker 	}
466aaeedf60SGreg Tucker 
467aaeedf60SGreg Tucker 	// Construct b (matrix that encoded remaining frags) by removing erased rows
468aaeedf60SGreg Tucker 	for (i = 0, r = 0; i < k; i++, r++) {
469aaeedf60SGreg Tucker 		while (frag_in_err[r])
470aaeedf60SGreg Tucker 			r++;
471aaeedf60SGreg Tucker 		for (j = 0; j < k; j++)
472aaeedf60SGreg Tucker 			b[k * i + j] = encode_matrix[k * r + j];
473aaeedf60SGreg Tucker 		decode_index[i] = r;
474aaeedf60SGreg Tucker 	}
475aaeedf60SGreg Tucker 	if (verbose > 1)
476aaeedf60SGreg Tucker 		print_matrix(k, k, b, "matrix to invert");
477aaeedf60SGreg Tucker 
478aaeedf60SGreg Tucker 	// Invert matrix to get recovery matrix
479aaeedf60SGreg Tucker 	if (gf_invert_matrix(b, invert_matrix, k) < 0)
480aaeedf60SGreg Tucker 		return -1;
481aaeedf60SGreg Tucker 
482aaeedf60SGreg Tucker 	if (verbose > 2)
483aaeedf60SGreg Tucker 		print_matrix(k, k, invert_matrix, "matrix inverted");
484aaeedf60SGreg Tucker 
485aaeedf60SGreg Tucker 	// Get decode matrix with only wanted recovery rows
486aaeedf60SGreg Tucker 	for (i = 0; i < nsrcerrs; i++) {
487aaeedf60SGreg Tucker 		for (j = 0; j < k; j++) {
488aaeedf60SGreg Tucker 			decode_matrix[k * i + j] = invert_matrix[k * frag_err_list[i] + j];
489aaeedf60SGreg Tucker 		}
490aaeedf60SGreg Tucker 	}
491aaeedf60SGreg Tucker 
492aaeedf60SGreg Tucker 	// For non-src (parity) erasures need to multiply encode matrix * invert
493aaeedf60SGreg Tucker 	for (p = nsrcerrs; p < nerrs; p++) {
494aaeedf60SGreg Tucker 		for (i = 0; i < k; i++) {
495aaeedf60SGreg Tucker 			s = 0;
496aaeedf60SGreg Tucker 			for (j = 0; j < k; j++)
497aaeedf60SGreg Tucker 				s ^= gf_mul(invert_matrix[j * k + i],
498aaeedf60SGreg Tucker 					    encode_matrix[k * frag_err_list[p] + j]);
499aaeedf60SGreg Tucker 
500aaeedf60SGreg Tucker 			decode_matrix[k * p + i] = s;
501aaeedf60SGreg Tucker 		}
502aaeedf60SGreg Tucker 	}
503aaeedf60SGreg Tucker 	if (verbose > 1)
504aaeedf60SGreg Tucker 		print_matrix(nerrs, k, decode_matrix, "decode matrix");
505aaeedf60SGreg Tucker 	return 0;
506aaeedf60SGreg Tucker }
507