1 /* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /************************************************************ 30 * 31 * nwayxor.c -- code to do N-way xors for reconstruction 32 * 33 * nWayXorN xors N input buffers into the destination buffer. 34 * adapted from danner's longword_bxor code. 35 * 36 ************************************************************/ 37 38 #include "rf_nwayxor.h" 39 #include "rf_shutdown.h" 40 41 static int callcount[10]; 42 static void rf_ShutdownNWayXor(void *); 43 44 static void 45 rf_ShutdownNWayXor(ignored) 46 void *ignored; 47 { 48 int i; 49 50 if (rf_showXorCallCounts == 0) 51 return; 52 printf("Call counts for n-way xor routines: "); 53 for (i = 0; i < 10; i++) 54 printf("%d ", callcount[i]); 55 printf("\n"); 56 } 57 58 int 59 rf_ConfigureNWayXor(listp) 60 RF_ShutdownList_t **listp; 61 { 62 int i, rc; 63 64 for (i = 0; i < 10; i++) 65 callcount[i] = 0; 66 rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); 67 return (rc); 68 } 69 70 void 71 rf_nWayXor1(src_rbs, dest_rb, len) 72 RF_ReconBuffer_t **src_rbs; 73 RF_ReconBuffer_t *dest_rb; 74 int len; 75 { 76 unsigned long *src = (unsigned long *) src_rbs[0]->buffer; 77 unsigned long *dest = (unsigned long *) dest_rb->buffer; 78 unsigned long *end = src + len; 79 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; 80 81 callcount[1]++; 82 while (len >= 4) { 83 d0 = dest[0]; 84 d1 = dest[1]; 85 d2 = dest[2]; 86 d3 = dest[3]; 87 s0 = src[0]; 88 s1 = src[1]; 89 s2 = src[2]; 90 s3 = src[3]; 91 dest[0] = d0 ^ s0; 92 dest[1] = d1 ^ s1; 93 dest[2] = d2 ^ s2; 94 dest[3] = d3 ^ s3; 95 src += 4; 96 dest += 4; 97 len -= 4; 98 } 99 while (src < end) { 100 *dest++ ^= *src++; 101 } 102 } 103 104 void 105 rf_nWayXor2(src_rbs, dest_rb, len) 106 RF_ReconBuffer_t **src_rbs; 107 RF_ReconBuffer_t *dest_rb; 108 int len; 109 { 110 unsigned long *dst = (unsigned long *) dest_rb->buffer; 111 unsigned long *a = dst; 112 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 113 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 114 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 115 116 callcount[2]++; 117 /* align dest to cache line */ 118 while ((((unsigned long) dst) & 0x1f)) { 119 *dst++ = *a++ ^ *b++ ^ *c++; 120 len--; 121 } 122 while (len > 4) { 123 a0 = a[0]; 124 len -= 4; 125 126 a1 = a[1]; 127 a2 = a[2]; 128 129 a3 = a[3]; 130 a += 4; 131 132 b0 = b[0]; 133 b1 = b[1]; 134 135 b2 = b[2]; 136 b3 = b[3]; 137 /* start dual issue */ 138 a0 ^= b0; 139 b0 = c[0]; 140 141 b += 4; 142 a1 ^= b1; 143 144 a2 ^= b2; 145 a3 ^= b3; 146 147 b1 = c[1]; 148 a0 ^= b0; 149 150 b2 = c[2]; 151 a1 ^= b1; 152 153 b3 = c[3]; 154 a2 ^= b2; 155 156 dst[0] = a0; 157 a3 ^= b3; 158 dst[1] = a1; 159 c += 4; 160 dst[2] = a2; 161 dst[3] = a3; 162 dst += 4; 163 } 164 while (len) { 165 *dst++ = *a++ ^ *b++ ^ *c++; 166 len--; 167 } 168 } 169 /* note that first arg is not incremented but 2nd arg is */ 170 #define LOAD_FIRST(_dst,_b) \ 171 a0 = _dst[0]; len -= 4; \ 172 a1 = _dst[1]; \ 173 a2 = _dst[2]; \ 174 a3 = _dst[3]; \ 175 b0 = _b[0]; \ 176 b1 = _b[1]; \ 177 b2 = _b[2]; \ 178 b3 = _b[3]; _b += 4; 179 180 /* note: arg is incremented */ 181 #define XOR_AND_LOAD_NEXT(_n) \ 182 a0 ^= b0; b0 = _n[0]; \ 183 a1 ^= b1; b1 = _n[1]; \ 184 a2 ^= b2; b2 = _n[2]; \ 185 a3 ^= b3; b3 = _n[3]; \ 186 _n += 4; 187 188 /* arg is incremented */ 189 #define XOR_AND_STORE(_dst) \ 190 a0 ^= b0; _dst[0] = a0; \ 191 a1 ^= b1; _dst[1] = a1; \ 192 a2 ^= b2; _dst[2] = a2; \ 193 a3 ^= b3; _dst[3] = a3; \ 194 _dst += 4; 195 196 197 void 198 rf_nWayXor3(src_rbs, dest_rb, len) 199 RF_ReconBuffer_t **src_rbs; 200 RF_ReconBuffer_t *dest_rb; 201 int len; 202 { 203 unsigned long *dst = (unsigned long *) dest_rb->buffer; 204 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 205 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 206 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 207 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 208 209 callcount[3]++; 210 /* align dest to cache line */ 211 while ((((unsigned long) dst) & 0x1f)) { 212 *dst++ ^= *b++ ^ *c++ ^ *d++; 213 len--; 214 } 215 while (len > 4) { 216 LOAD_FIRST(dst, b); 217 XOR_AND_LOAD_NEXT(c); 218 XOR_AND_LOAD_NEXT(d); 219 XOR_AND_STORE(dst); 220 } 221 while (len) { 222 *dst++ ^= *b++ ^ *c++ ^ *d++; 223 len--; 224 } 225 } 226 227 void 228 rf_nWayXor4(src_rbs, dest_rb, len) 229 RF_ReconBuffer_t **src_rbs; 230 RF_ReconBuffer_t *dest_rb; 231 int len; 232 { 233 unsigned long *dst = (unsigned long *) dest_rb->buffer; 234 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 235 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 236 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 237 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 238 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 239 240 callcount[4]++; 241 /* align dest to cache line */ 242 while ((((unsigned long) dst) & 0x1f)) { 243 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 244 len--; 245 } 246 while (len > 4) { 247 LOAD_FIRST(dst, b); 248 XOR_AND_LOAD_NEXT(c); 249 XOR_AND_LOAD_NEXT(d); 250 XOR_AND_LOAD_NEXT(e); 251 XOR_AND_STORE(dst); 252 } 253 while (len) { 254 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 255 len--; 256 } 257 } 258 259 void 260 rf_nWayXor5(src_rbs, dest_rb, len) 261 RF_ReconBuffer_t **src_rbs; 262 RF_ReconBuffer_t *dest_rb; 263 int len; 264 { 265 unsigned long *dst = (unsigned long *) dest_rb->buffer; 266 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 267 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 268 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 269 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 270 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 271 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 272 273 callcount[5]++; 274 /* align dest to cache line */ 275 while ((((unsigned long) dst) & 0x1f)) { 276 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 277 len--; 278 } 279 while (len > 4) { 280 LOAD_FIRST(dst, b); 281 XOR_AND_LOAD_NEXT(c); 282 XOR_AND_LOAD_NEXT(d); 283 XOR_AND_LOAD_NEXT(e); 284 XOR_AND_LOAD_NEXT(f); 285 XOR_AND_STORE(dst); 286 } 287 while (len) { 288 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 289 len--; 290 } 291 } 292 293 void 294 rf_nWayXor6(src_rbs, dest_rb, len) 295 RF_ReconBuffer_t **src_rbs; 296 RF_ReconBuffer_t *dest_rb; 297 int len; 298 { 299 unsigned long *dst = (unsigned long *) dest_rb->buffer; 300 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 301 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 302 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 303 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 304 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 305 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 306 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 307 308 callcount[6]++; 309 /* align dest to cache line */ 310 while ((((unsigned long) dst) & 0x1f)) { 311 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 312 len--; 313 } 314 while (len > 4) { 315 LOAD_FIRST(dst, b); 316 XOR_AND_LOAD_NEXT(c); 317 XOR_AND_LOAD_NEXT(d); 318 XOR_AND_LOAD_NEXT(e); 319 XOR_AND_LOAD_NEXT(f); 320 XOR_AND_LOAD_NEXT(g); 321 XOR_AND_STORE(dst); 322 } 323 while (len) { 324 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 325 len--; 326 } 327 } 328 329 void 330 rf_nWayXor7(src_rbs, dest_rb, len) 331 RF_ReconBuffer_t **src_rbs; 332 RF_ReconBuffer_t *dest_rb; 333 int len; 334 { 335 unsigned long *dst = (unsigned long *) dest_rb->buffer; 336 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 337 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 338 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 339 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 340 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 341 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 342 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 343 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 344 345 callcount[7]++; 346 /* align dest to cache line */ 347 while ((((unsigned long) dst) & 0x1f)) { 348 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 349 len--; 350 } 351 while (len > 4) { 352 LOAD_FIRST(dst, b); 353 XOR_AND_LOAD_NEXT(c); 354 XOR_AND_LOAD_NEXT(d); 355 XOR_AND_LOAD_NEXT(e); 356 XOR_AND_LOAD_NEXT(f); 357 XOR_AND_LOAD_NEXT(g); 358 XOR_AND_LOAD_NEXT(h); 359 XOR_AND_STORE(dst); 360 } 361 while (len) { 362 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 363 len--; 364 } 365 } 366 367 void 368 rf_nWayXor8(src_rbs, dest_rb, len) 369 RF_ReconBuffer_t **src_rbs; 370 RF_ReconBuffer_t *dest_rb; 371 int len; 372 { 373 unsigned long *dst = (unsigned long *) dest_rb->buffer; 374 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 375 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 376 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 377 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 378 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 379 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 380 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 381 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 382 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 383 384 callcount[8]++; 385 /* align dest to cache line */ 386 while ((((unsigned long) dst) & 0x1f)) { 387 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 388 len--; 389 } 390 while (len > 4) { 391 LOAD_FIRST(dst, b); 392 XOR_AND_LOAD_NEXT(c); 393 XOR_AND_LOAD_NEXT(d); 394 XOR_AND_LOAD_NEXT(e); 395 XOR_AND_LOAD_NEXT(f); 396 XOR_AND_LOAD_NEXT(g); 397 XOR_AND_LOAD_NEXT(h); 398 XOR_AND_LOAD_NEXT(i); 399 XOR_AND_STORE(dst); 400 } 401 while (len) { 402 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 403 len--; 404 } 405 } 406 407 408 void 409 rf_nWayXor9(src_rbs, dest_rb, len) 410 RF_ReconBuffer_t **src_rbs; 411 RF_ReconBuffer_t *dest_rb; 412 int len; 413 { 414 unsigned long *dst = (unsigned long *) dest_rb->buffer; 415 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 416 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 417 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 418 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 419 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 420 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 421 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 422 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 423 unsigned long *j = (unsigned long *) src_rbs[8]->buffer; 424 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 425 426 callcount[9]++; 427 /* align dest to cache line */ 428 while ((((unsigned long) dst) & 0x1f)) { 429 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 430 len--; 431 } 432 while (len > 4) { 433 LOAD_FIRST(dst, b); 434 XOR_AND_LOAD_NEXT(c); 435 XOR_AND_LOAD_NEXT(d); 436 XOR_AND_LOAD_NEXT(e); 437 XOR_AND_LOAD_NEXT(f); 438 XOR_AND_LOAD_NEXT(g); 439 XOR_AND_LOAD_NEXT(h); 440 XOR_AND_LOAD_NEXT(i); 441 XOR_AND_LOAD_NEXT(j); 442 XOR_AND_STORE(dst); 443 } 444 while (len) { 445 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 446 len--; 447 } 448 } 449