1 /* $NetBSD: rf_dagfuncs.c,v 1.10 2002/09/21 00:52:49 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50 #include <sys/cdefs.h> 51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.10 2002/09/21 00:52:49 oster Exp $"); 52 53 #include <sys/param.h> 54 #include <sys/ioctl.h> 55 56 #include "rf_archs.h" 57 #include "rf_raid.h" 58 #include "rf_dag.h" 59 #include "rf_layout.h" 60 #include "rf_etimer.h" 61 #include "rf_acctrace.h" 62 #include "rf_diskqueue.h" 63 #include "rf_dagfuncs.h" 64 #include "rf_general.h" 65 #include "rf_engine.h" 66 #include "rf_dagutils.h" 67 68 #include "rf_kintf.h" 69 70 #if RF_INCLUDE_PARITYLOGGING > 0 71 #include "rf_paritylog.h" 72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74 int (*rf_DiskReadFunc) (RF_DagNode_t *); 75 int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84 /***************************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************************/ 87 int 88 rf_ConfigureDAGFuncs(listp) 89 RF_ShutdownList_t **listp; 90 { 91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102 } 103 104 105 106 /***************************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************************/ 109 int 110 rf_TerminateFunc(node) 111 RF_DagNode_t *node; 112 { 113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 114 node->status = rf_good; 115 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 116 } 117 118 int 119 rf_TerminateUndoFunc(node) 120 RF_DagNode_t *node; 121 { 122 return (0); 123 } 124 125 126 /***************************************************************************************** 127 * execution functions associated with a mirror node 128 * 129 * parameters: 130 * 131 * 0 - physical disk addres of data 132 * 1 - buffer for holding read data 133 * 2 - parity stripe ID 134 * 3 - flags 135 * 4 - physical disk address of mirror (parity) 136 * 137 ****************************************************************************************/ 138 139 int 140 rf_DiskReadMirrorIdleFunc(node) 141 RF_DagNode_t *node; 142 { 143 /* select the mirror copy with the shortest queue and fill in node 144 * parameters with physical disk address */ 145 146 rf_SelectMirrorDiskIdle(node); 147 return (rf_DiskReadFunc(node)); 148 } 149 150 int 151 rf_DiskReadMirrorPartitionFunc(node) 152 RF_DagNode_t *node; 153 { 154 /* select the mirror copy with the shortest queue and fill in node 155 * parameters with physical disk address */ 156 157 rf_SelectMirrorDiskPartition(node); 158 return (rf_DiskReadFunc(node)); 159 } 160 161 int 162 rf_DiskReadMirrorUndoFunc(node) 163 RF_DagNode_t *node; 164 { 165 return (0); 166 } 167 168 169 170 #if RF_INCLUDE_PARITYLOGGING > 0 171 /***************************************************************************************** 172 * the execution function associated with a parity log update node 173 ****************************************************************************************/ 174 int 175 rf_ParityLogUpdateFunc(node) 176 RF_DagNode_t *node; 177 { 178 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 179 caddr_t buf = (caddr_t) node->params[1].p; 180 RF_ParityLogData_t *logData; 181 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 182 RF_Etimer_t timer; 183 184 if (node->dagHdr->status == rf_enable) { 185 RF_ETIMER_START(timer); 186 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 187 (RF_Raid_t *) (node->dagHdr->raidPtr), 188 node->wakeFunc, (void *) node, 189 node->dagHdr->tracerec, timer); 190 if (logData) 191 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 192 else { 193 RF_ETIMER_STOP(timer); 194 RF_ETIMER_EVAL(timer); 195 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 196 (node->wakeFunc) (node, ENOMEM); 197 } 198 } 199 return (0); 200 } 201 202 203 /***************************************************************************************** 204 * the execution function associated with a parity log overwrite node 205 ****************************************************************************************/ 206 int 207 rf_ParityLogOverwriteFunc(node) 208 RF_DagNode_t *node; 209 { 210 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 211 caddr_t buf = (caddr_t) node->params[1].p; 212 RF_ParityLogData_t *logData; 213 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 214 RF_Etimer_t timer; 215 216 if (node->dagHdr->status == rf_enable) { 217 RF_ETIMER_START(timer); 218 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), 219 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 220 if (logData) 221 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 222 else { 223 RF_ETIMER_STOP(timer); 224 RF_ETIMER_EVAL(timer); 225 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 226 (node->wakeFunc) (node, ENOMEM); 227 } 228 } 229 return (0); 230 } 231 232 int 233 rf_ParityLogUpdateUndoFunc(node) 234 RF_DagNode_t *node; 235 { 236 return (0); 237 } 238 239 int 240 rf_ParityLogOverwriteUndoFunc(node) 241 RF_DagNode_t *node; 242 { 243 return (0); 244 } 245 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 246 247 /***************************************************************************************** 248 * the execution function associated with a NOP node 249 ****************************************************************************************/ 250 int 251 rf_NullNodeFunc(node) 252 RF_DagNode_t *node; 253 { 254 node->status = rf_good; 255 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 256 } 257 258 int 259 rf_NullNodeUndoFunc(node) 260 RF_DagNode_t *node; 261 { 262 node->status = rf_undone; 263 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 264 } 265 266 267 /***************************************************************************************** 268 * the execution function associated with a disk-read node 269 ****************************************************************************************/ 270 int 271 rf_DiskReadFuncForThreads(node) 272 RF_DagNode_t *node; 273 { 274 RF_DiskQueueData_t *req; 275 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 276 caddr_t buf = (caddr_t) node->params[1].p; 277 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 278 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 279 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 280 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 281 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 282 RF_DiskQueueDataFlags_t flags = 0; 283 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 284 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 285 void *b_proc = NULL; 286 287 if (node->dagHdr->bp) 288 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 289 290 RF_ASSERT(!(lock && unlock)); 291 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 292 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 293 294 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 295 buf, parityStripeID, which_ru, 296 (int (*) (void *, int)) node->wakeFunc, 297 node, NULL, node->dagHdr->tracerec, 298 (void *) (node->dagHdr->raidPtr), flags, b_proc); 299 if (!req) { 300 (node->wakeFunc) (node, ENOMEM); 301 } else { 302 node->dagFuncData = (void *) req; 303 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 304 } 305 return (0); 306 } 307 308 309 /***************************************************************************************** 310 * the execution function associated with a disk-write node 311 ****************************************************************************************/ 312 int 313 rf_DiskWriteFuncForThreads(node) 314 RF_DagNode_t *node; 315 { 316 RF_DiskQueueData_t *req; 317 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 318 caddr_t buf = (caddr_t) node->params[1].p; 319 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 320 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 321 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 322 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 323 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 324 RF_DiskQueueDataFlags_t flags = 0; 325 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 326 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 327 void *b_proc = NULL; 328 329 if (node->dagHdr->bp) 330 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 331 332 /* normal processing (rollaway or forward recovery) begins here */ 333 RF_ASSERT(!(lock && unlock)); 334 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 335 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 336 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 337 buf, parityStripeID, which_ru, 338 (int (*) (void *, int)) node->wakeFunc, 339 (void *) node, NULL, 340 node->dagHdr->tracerec, 341 (void *) (node->dagHdr->raidPtr), 342 flags, b_proc); 343 344 if (!req) { 345 (node->wakeFunc) (node, ENOMEM); 346 } else { 347 node->dagFuncData = (void *) req; 348 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 349 } 350 351 return (0); 352 } 353 /***************************************************************************************** 354 * the undo function for disk nodes 355 * Note: this is not a proper undo of a write node, only locks are released. 356 * old data is not restored to disk! 357 ****************************************************************************************/ 358 int 359 rf_DiskUndoFunc(node) 360 RF_DagNode_t *node; 361 { 362 RF_DiskQueueData_t *req; 363 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 364 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 365 366 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 367 0L, 0, NULL, 0L, 0, 368 (int (*) (void *, int)) node->wakeFunc, 369 (void *) node, 370 NULL, node->dagHdr->tracerec, 371 (void *) (node->dagHdr->raidPtr), 372 RF_UNLOCK_DISK_QUEUE, NULL); 373 if (!req) 374 (node->wakeFunc) (node, ENOMEM); 375 else { 376 node->dagFuncData = (void *) req; 377 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 378 } 379 380 return (0); 381 } 382 /***************************************************************************************** 383 * the execution function associated with an "unlock disk queue" node 384 ****************************************************************************************/ 385 int 386 rf_DiskUnlockFuncForThreads(node) 387 RF_DagNode_t *node; 388 { 389 RF_DiskQueueData_t *req; 390 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 391 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 392 393 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 394 0L, 0, NULL, 0L, 0, 395 (int (*) (void *, int)) node->wakeFunc, 396 (void *) node, 397 NULL, node->dagHdr->tracerec, 398 (void *) (node->dagHdr->raidPtr), 399 RF_UNLOCK_DISK_QUEUE, NULL); 400 if (!req) 401 (node->wakeFunc) (node, ENOMEM); 402 else { 403 node->dagFuncData = (void *) req; 404 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 405 } 406 407 return (0); 408 } 409 /***************************************************************************************** 410 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, 411 * the routine is called to set the node status and inform the execution engine that 412 * the node has fired. 413 ****************************************************************************************/ 414 int 415 rf_GenericWakeupFunc(node, status) 416 RF_DagNode_t *node; 417 int status; 418 { 419 switch (node->status) { 420 case rf_bwd1: 421 node->status = rf_bwd2; 422 if (node->dagFuncData) 423 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 424 return (rf_DiskWriteFuncForThreads(node)); 425 break; 426 case rf_fired: 427 if (status) 428 node->status = rf_bad; 429 else 430 node->status = rf_good; 431 break; 432 case rf_recover: 433 /* probably should never reach this case */ 434 if (status) 435 node->status = rf_panic; 436 else 437 node->status = rf_undone; 438 break; 439 default: 440 printf("rf_GenericWakeupFunc:"); 441 printf("node->status is %d,", node->status); 442 printf("status is %d \n", status); 443 RF_PANIC(); 444 break; 445 } 446 if (node->dagFuncData) 447 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 448 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 449 } 450 451 452 /***************************************************************************************** 453 * there are three distinct types of xor nodes 454 * A "regular xor" is used in the fault-free case where the access spans a complete 455 * stripe unit. It assumes that the result buffer is one full stripe unit in size, 456 * and uses the stripe-unit-offset values that it computes from the PDAs to determine 457 * where within the stripe unit to XOR each argument buffer. 458 * 459 * A "simple xor" is used in the fault-free case where the access touches only a portion 460 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument 461 * buffers are of the same size and have the same stripe unit offset. 462 * 463 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular 464 * xor function except that it takes the failed PDA as an additional parameter, and 465 * uses it to determine what portions of the argument buffers need to be xor'd into 466 * the result buffer, and where in the result buffer they should go. 467 ****************************************************************************************/ 468 469 /* xor the params together and store the result in the result field. 470 * assume the result field points to a buffer that is the size of one SU, 471 * and use the pda params to determine where within the buffer to XOR 472 * the input buffers. 473 */ 474 int 475 rf_RegularXorFunc(node) 476 RF_DagNode_t *node; 477 { 478 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 479 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 480 RF_Etimer_t timer; 481 int i, retcode; 482 483 retcode = 0; 484 if (node->dagHdr->status == rf_enable) { 485 /* don't do the XOR if the input is the same as the output */ 486 RF_ETIMER_START(timer); 487 for (i = 0; i < node->numParams - 1; i += 2) 488 if (node->params[i + 1].p != node->results[0]) { 489 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 490 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); 491 } 492 RF_ETIMER_STOP(timer); 493 RF_ETIMER_EVAL(timer); 494 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 495 } 496 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 497 * explicitly since no 498 * I/O in this node */ 499 } 500 /* xor the inputs into the result buffer, ignoring placement issues */ 501 int 502 rf_SimpleXorFunc(node) 503 RF_DagNode_t *node; 504 { 505 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 506 int i, retcode = 0; 507 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 508 RF_Etimer_t timer; 509 510 if (node->dagHdr->status == rf_enable) { 511 RF_ETIMER_START(timer); 512 /* don't do the XOR if the input is the same as the output */ 513 for (i = 0; i < node->numParams - 1; i += 2) 514 if (node->params[i + 1].p != node->results[0]) { 515 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 516 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), 517 (struct buf *) node->dagHdr->bp); 518 } 519 RF_ETIMER_STOP(timer); 520 RF_ETIMER_EVAL(timer); 521 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 522 } 523 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 524 * explicitly since no 525 * I/O in this node */ 526 } 527 /* this xor is used by the degraded-mode dag functions to recover lost data. 528 * the second-to-last parameter is the PDA for the failed portion of the access. 529 * the code here looks at this PDA and assumes that the xor target buffer is 530 * equal in size to the number of sectors in the failed PDA. It then uses 531 * the other PDAs in the parameter list to determine where within the target 532 * buffer the corresponding data should be xored. 533 */ 534 int 535 rf_RecoveryXorFunc(node) 536 RF_DagNode_t *node; 537 { 538 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 539 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 540 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 541 int i, retcode = 0; 542 RF_PhysDiskAddr_t *pda; 543 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 544 char *srcbuf, *destbuf; 545 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 546 RF_Etimer_t timer; 547 548 if (node->dagHdr->status == rf_enable) { 549 RF_ETIMER_START(timer); 550 for (i = 0; i < node->numParams - 2; i += 2) 551 if (node->params[i + 1].p != node->results[0]) { 552 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 553 srcbuf = (char *) node->params[i + 1].p; 554 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 555 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 556 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); 557 } 558 RF_ETIMER_STOP(timer); 559 RF_ETIMER_EVAL(timer); 560 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 561 } 562 return (rf_GenericWakeupFunc(node, retcode)); 563 } 564 /***************************************************************************************** 565 * The next three functions are utilities used by the above xor-execution functions. 566 ****************************************************************************************/ 567 568 569 /* 570 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit 571 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the 572 * access described by pda is one SU in size (which by implication means it's SU-aligned), 573 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one 574 * SU in size the XOR occurs on only the portion of targbuf identified in the pda. 575 */ 576 577 int 578 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) 579 RF_Raid_t *raidPtr; 580 RF_PhysDiskAddr_t *pda; 581 char *srcbuf; 582 char *targbuf; 583 void *bp; 584 { 585 char *targptr; 586 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 587 int SUOffset = pda->startSector % sectPerSU; 588 int length, retcode = 0; 589 590 RF_ASSERT(pda->numSector <= sectPerSU); 591 592 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 593 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 594 retcode = rf_bxor(srcbuf, targptr, length, bp); 595 return (retcode); 596 } 597 /* it really should be the case that the buffer pointers (returned by malloc) 598 * are aligned to the natural word size of the machine, so this is the only 599 * case we optimize for. The length should always be a multiple of the sector 600 * size, so there should be no problem with leftover bytes at the end. 601 */ 602 int 603 rf_bxor(src, dest, len, bp) 604 char *src; 605 char *dest; 606 int len; 607 void *bp; 608 { 609 unsigned mask = sizeof(long) - 1, retcode = 0; 610 611 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { 612 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); 613 } else { 614 RF_ASSERT(0); 615 } 616 return (retcode); 617 } 618 /* map a user buffer into kernel space, if necessary */ 619 #define REMAP_VA(_bp,x,y) (y) = (x) 620 621 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. 622 * We don't want to assume anything about which input buffers are in kernel/user 623 * space, nor about their alignment, so in each loop we compute the maximum number 624 * of bytes that we can xor without crossing any page boundaries, and do only this many 625 * bytes before the next remap. 626 */ 627 int 628 rf_longword_bxor(src, dest, len, bp) 629 unsigned long *src; 630 unsigned long *dest; 631 int len; /* longwords */ 632 void *bp; 633 { 634 unsigned long *end = src + len; 635 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 636 unsigned long *pg_src, *pg_dest; /* per-page source/dest 637 * pointers */ 638 int longs_this_time;/* # longwords to xor in the current iteration */ 639 640 REMAP_VA(bp, src, pg_src); 641 REMAP_VA(bp, dest, pg_dest); 642 if (!pg_src || !pg_dest) 643 return (EFAULT); 644 645 while (len >= 4) { 646 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 647 src += longs_this_time; 648 dest += longs_this_time; 649 len -= longs_this_time; 650 while (longs_this_time >= 4) { 651 d0 = pg_dest[0]; 652 d1 = pg_dest[1]; 653 d2 = pg_dest[2]; 654 d3 = pg_dest[3]; 655 s0 = pg_src[0]; 656 s1 = pg_src[1]; 657 s2 = pg_src[2]; 658 s3 = pg_src[3]; 659 pg_dest[0] = d0 ^ s0; 660 pg_dest[1] = d1 ^ s1; 661 pg_dest[2] = d2 ^ s2; 662 pg_dest[3] = d3 ^ s3; 663 pg_src += 4; 664 pg_dest += 4; 665 longs_this_time -= 4; 666 } 667 while (longs_this_time > 0) { /* cannot cross any page 668 * boundaries here */ 669 *pg_dest++ ^= *pg_src++; 670 longs_this_time--; 671 } 672 673 /* either we're done, or we've reached a page boundary on one 674 * (or possibly both) of the pointers */ 675 if (len) { 676 if (RF_PAGE_ALIGNED(src)) 677 REMAP_VA(bp, src, pg_src); 678 if (RF_PAGE_ALIGNED(dest)) 679 REMAP_VA(bp, dest, pg_dest); 680 if (!pg_src || !pg_dest) 681 return (EFAULT); 682 } 683 } 684 while (src < end) { 685 *pg_dest++ ^= *pg_src++; 686 src++; 687 dest++; 688 len--; 689 if (RF_PAGE_ALIGNED(src)) 690 REMAP_VA(bp, src, pg_src); 691 if (RF_PAGE_ALIGNED(dest)) 692 REMAP_VA(bp, dest, pg_dest); 693 } 694 RF_ASSERT(len == 0); 695 return (0); 696 } 697 698 #if 0 699 /* 700 dst = a ^ b ^ c; 701 a may equal dst 702 see comment above longword_bxor 703 */ 704 int 705 rf_longword_bxor3(dst, a, b, c, len, bp) 706 unsigned long *dst; 707 unsigned long *a; 708 unsigned long *b; 709 unsigned long *c; 710 int len; /* length in longwords */ 711 void *bp; 712 { 713 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 714 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 715 * pointers */ 716 int longs_this_time;/* # longs to xor in the current iteration */ 717 char dst_is_a = 0; 718 719 REMAP_VA(bp, a, pg_a); 720 REMAP_VA(bp, b, pg_b); 721 REMAP_VA(bp, c, pg_c); 722 if (a == dst) { 723 pg_dst = pg_a; 724 dst_is_a = 1; 725 } else { 726 REMAP_VA(bp, dst, pg_dst); 727 } 728 729 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 730 while ((((unsigned long) pg_dst) & 0x1f)) { 731 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 732 dst++; 733 a++; 734 b++; 735 c++; 736 if (RF_PAGE_ALIGNED(a)) { 737 REMAP_VA(bp, a, pg_a); 738 if (!pg_a) 739 return (EFAULT); 740 } 741 if (RF_PAGE_ALIGNED(b)) { 742 REMAP_VA(bp, a, pg_b); 743 if (!pg_b) 744 return (EFAULT); 745 } 746 if (RF_PAGE_ALIGNED(c)) { 747 REMAP_VA(bp, a, pg_c); 748 if (!pg_c) 749 return (EFAULT); 750 } 751 len--; 752 } 753 754 while (len > 4) { 755 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 756 a += longs_this_time; 757 b += longs_this_time; 758 c += longs_this_time; 759 dst += longs_this_time; 760 len -= longs_this_time; 761 while (longs_this_time >= 4) { 762 a0 = pg_a[0]; 763 longs_this_time -= 4; 764 765 a1 = pg_a[1]; 766 a2 = pg_a[2]; 767 768 a3 = pg_a[3]; 769 pg_a += 4; 770 771 b0 = pg_b[0]; 772 b1 = pg_b[1]; 773 774 b2 = pg_b[2]; 775 b3 = pg_b[3]; 776 /* start dual issue */ 777 a0 ^= b0; 778 b0 = pg_c[0]; 779 780 pg_b += 4; 781 a1 ^= b1; 782 783 a2 ^= b2; 784 a3 ^= b3; 785 786 b1 = pg_c[1]; 787 a0 ^= b0; 788 789 b2 = pg_c[2]; 790 a1 ^= b1; 791 792 b3 = pg_c[3]; 793 a2 ^= b2; 794 795 pg_dst[0] = a0; 796 a3 ^= b3; 797 pg_dst[1] = a1; 798 pg_c += 4; 799 pg_dst[2] = a2; 800 pg_dst[3] = a3; 801 pg_dst += 4; 802 } 803 while (longs_this_time > 0) { /* cannot cross any page 804 * boundaries here */ 805 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 806 longs_this_time--; 807 } 808 809 if (len) { 810 if (RF_PAGE_ALIGNED(a)) { 811 REMAP_VA(bp, a, pg_a); 812 if (!pg_a) 813 return (EFAULT); 814 if (dst_is_a) 815 pg_dst = pg_a; 816 } 817 if (RF_PAGE_ALIGNED(b)) { 818 REMAP_VA(bp, b, pg_b); 819 if (!pg_b) 820 return (EFAULT); 821 } 822 if (RF_PAGE_ALIGNED(c)) { 823 REMAP_VA(bp, c, pg_c); 824 if (!pg_c) 825 return (EFAULT); 826 } 827 if (!dst_is_a) 828 if (RF_PAGE_ALIGNED(dst)) { 829 REMAP_VA(bp, dst, pg_dst); 830 if (!pg_dst) 831 return (EFAULT); 832 } 833 } 834 } 835 while (len) { 836 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 837 dst++; 838 a++; 839 b++; 840 c++; 841 if (RF_PAGE_ALIGNED(a)) { 842 REMAP_VA(bp, a, pg_a); 843 if (!pg_a) 844 return (EFAULT); 845 if (dst_is_a) 846 pg_dst = pg_a; 847 } 848 if (RF_PAGE_ALIGNED(b)) { 849 REMAP_VA(bp, b, pg_b); 850 if (!pg_b) 851 return (EFAULT); 852 } 853 if (RF_PAGE_ALIGNED(c)) { 854 REMAP_VA(bp, c, pg_c); 855 if (!pg_c) 856 return (EFAULT); 857 } 858 if (!dst_is_a) 859 if (RF_PAGE_ALIGNED(dst)) { 860 REMAP_VA(bp, dst, pg_dst); 861 if (!pg_dst) 862 return (EFAULT); 863 } 864 len--; 865 } 866 return (0); 867 } 868 869 int 870 rf_bxor3(dst, a, b, c, len, bp) 871 unsigned char *dst; 872 unsigned char *a; 873 unsigned char *b; 874 unsigned char *c; 875 unsigned long len; 876 void *bp; 877 { 878 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 879 880 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 881 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 882 } 883 #endif 884