1 /* $NetBSD: rf_dagfuncs.c,v 1.11 2002/11/18 23:46:28 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50 #include <sys/cdefs.h> 51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.11 2002/11/18 23:46:28 oster Exp $"); 52 53 #include <sys/param.h> 54 #include <sys/ioctl.h> 55 56 #include "rf_archs.h" 57 #include "rf_raid.h" 58 #include "rf_dag.h" 59 #include "rf_layout.h" 60 #include "rf_etimer.h" 61 #include "rf_acctrace.h" 62 #include "rf_diskqueue.h" 63 #include "rf_dagfuncs.h" 64 #include "rf_general.h" 65 #include "rf_engine.h" 66 #include "rf_dagutils.h" 67 68 #include "rf_kintf.h" 69 70 #if RF_INCLUDE_PARITYLOGGING > 0 71 #include "rf_paritylog.h" 72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74 int (*rf_DiskReadFunc) (RF_DagNode_t *); 75 int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84 /***************************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************************/ 87 int 88 rf_ConfigureDAGFuncs(listp) 89 RF_ShutdownList_t **listp; 90 { 91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102 } 103 104 105 106 /***************************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************************/ 109 int 110 rf_TerminateFunc(node) 111 RF_DagNode_t *node; 112 { 113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 114 node->status = rf_good; 115 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 116 } 117 118 int 119 rf_TerminateUndoFunc(node) 120 RF_DagNode_t *node; 121 { 122 return (0); 123 } 124 125 126 /***************************************************************************************** 127 * execution functions associated with a mirror node 128 * 129 * parameters: 130 * 131 * 0 - physical disk addres of data 132 * 1 - buffer for holding read data 133 * 2 - parity stripe ID 134 * 3 - flags 135 * 4 - physical disk address of mirror (parity) 136 * 137 ****************************************************************************************/ 138 139 int 140 rf_DiskReadMirrorIdleFunc(node) 141 RF_DagNode_t *node; 142 { 143 /* select the mirror copy with the shortest queue and fill in node 144 * parameters with physical disk address */ 145 146 rf_SelectMirrorDiskIdle(node); 147 return (rf_DiskReadFunc(node)); 148 } 149 150 #if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0) 151 int 152 rf_DiskReadMirrorPartitionFunc(node) 153 RF_DagNode_t *node; 154 { 155 /* select the mirror copy with the shortest queue and fill in node 156 * parameters with physical disk address */ 157 158 rf_SelectMirrorDiskPartition(node); 159 return (rf_DiskReadFunc(node)); 160 } 161 #endif 162 163 int 164 rf_DiskReadMirrorUndoFunc(node) 165 RF_DagNode_t *node; 166 { 167 return (0); 168 } 169 170 171 172 #if RF_INCLUDE_PARITYLOGGING > 0 173 /***************************************************************************************** 174 * the execution function associated with a parity log update node 175 ****************************************************************************************/ 176 int 177 rf_ParityLogUpdateFunc(node) 178 RF_DagNode_t *node; 179 { 180 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 181 caddr_t buf = (caddr_t) node->params[1].p; 182 RF_ParityLogData_t *logData; 183 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 184 RF_Etimer_t timer; 185 186 if (node->dagHdr->status == rf_enable) { 187 RF_ETIMER_START(timer); 188 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 189 (RF_Raid_t *) (node->dagHdr->raidPtr), 190 node->wakeFunc, (void *) node, 191 node->dagHdr->tracerec, timer); 192 if (logData) 193 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 194 else { 195 RF_ETIMER_STOP(timer); 196 RF_ETIMER_EVAL(timer); 197 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 198 (node->wakeFunc) (node, ENOMEM); 199 } 200 } 201 return (0); 202 } 203 204 205 /***************************************************************************************** 206 * the execution function associated with a parity log overwrite node 207 ****************************************************************************************/ 208 int 209 rf_ParityLogOverwriteFunc(node) 210 RF_DagNode_t *node; 211 { 212 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 213 caddr_t buf = (caddr_t) node->params[1].p; 214 RF_ParityLogData_t *logData; 215 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 216 RF_Etimer_t timer; 217 218 if (node->dagHdr->status == rf_enable) { 219 RF_ETIMER_START(timer); 220 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), 221 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 222 if (logData) 223 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 224 else { 225 RF_ETIMER_STOP(timer); 226 RF_ETIMER_EVAL(timer); 227 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 228 (node->wakeFunc) (node, ENOMEM); 229 } 230 } 231 return (0); 232 } 233 234 int 235 rf_ParityLogUpdateUndoFunc(node) 236 RF_DagNode_t *node; 237 { 238 return (0); 239 } 240 241 int 242 rf_ParityLogOverwriteUndoFunc(node) 243 RF_DagNode_t *node; 244 { 245 return (0); 246 } 247 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 248 249 /***************************************************************************************** 250 * the execution function associated with a NOP node 251 ****************************************************************************************/ 252 int 253 rf_NullNodeFunc(node) 254 RF_DagNode_t *node; 255 { 256 node->status = rf_good; 257 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 258 } 259 260 int 261 rf_NullNodeUndoFunc(node) 262 RF_DagNode_t *node; 263 { 264 node->status = rf_undone; 265 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 266 } 267 268 269 /***************************************************************************************** 270 * the execution function associated with a disk-read node 271 ****************************************************************************************/ 272 int 273 rf_DiskReadFuncForThreads(node) 274 RF_DagNode_t *node; 275 { 276 RF_DiskQueueData_t *req; 277 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 278 caddr_t buf = (caddr_t) node->params[1].p; 279 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 280 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 281 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 282 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 283 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 284 RF_DiskQueueDataFlags_t flags = 0; 285 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 286 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 287 void *b_proc = NULL; 288 289 if (node->dagHdr->bp) 290 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 291 292 RF_ASSERT(!(lock && unlock)); 293 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 294 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 295 296 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 297 buf, parityStripeID, which_ru, 298 (int (*) (void *, int)) node->wakeFunc, 299 node, NULL, node->dagHdr->tracerec, 300 (void *) (node->dagHdr->raidPtr), flags, b_proc); 301 if (!req) { 302 (node->wakeFunc) (node, ENOMEM); 303 } else { 304 node->dagFuncData = (void *) req; 305 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 306 } 307 return (0); 308 } 309 310 311 /***************************************************************************************** 312 * the execution function associated with a disk-write node 313 ****************************************************************************************/ 314 int 315 rf_DiskWriteFuncForThreads(node) 316 RF_DagNode_t *node; 317 { 318 RF_DiskQueueData_t *req; 319 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 320 caddr_t buf = (caddr_t) node->params[1].p; 321 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 322 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 323 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 324 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 325 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 326 RF_DiskQueueDataFlags_t flags = 0; 327 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 328 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 329 void *b_proc = NULL; 330 331 if (node->dagHdr->bp) 332 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 333 334 /* normal processing (rollaway or forward recovery) begins here */ 335 RF_ASSERT(!(lock && unlock)); 336 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 337 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 338 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 339 buf, parityStripeID, which_ru, 340 (int (*) (void *, int)) node->wakeFunc, 341 (void *) node, NULL, 342 node->dagHdr->tracerec, 343 (void *) (node->dagHdr->raidPtr), 344 flags, b_proc); 345 346 if (!req) { 347 (node->wakeFunc) (node, ENOMEM); 348 } else { 349 node->dagFuncData = (void *) req; 350 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 351 } 352 353 return (0); 354 } 355 /***************************************************************************************** 356 * the undo function for disk nodes 357 * Note: this is not a proper undo of a write node, only locks are released. 358 * old data is not restored to disk! 359 ****************************************************************************************/ 360 int 361 rf_DiskUndoFunc(node) 362 RF_DagNode_t *node; 363 { 364 RF_DiskQueueData_t *req; 365 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 366 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 367 368 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 369 0L, 0, NULL, 0L, 0, 370 (int (*) (void *, int)) node->wakeFunc, 371 (void *) node, 372 NULL, node->dagHdr->tracerec, 373 (void *) (node->dagHdr->raidPtr), 374 RF_UNLOCK_DISK_QUEUE, NULL); 375 if (!req) 376 (node->wakeFunc) (node, ENOMEM); 377 else { 378 node->dagFuncData = (void *) req; 379 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 380 } 381 382 return (0); 383 } 384 /***************************************************************************************** 385 * the execution function associated with an "unlock disk queue" node 386 ****************************************************************************************/ 387 int 388 rf_DiskUnlockFuncForThreads(node) 389 RF_DagNode_t *node; 390 { 391 RF_DiskQueueData_t *req; 392 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 393 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 394 395 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 396 0L, 0, NULL, 0L, 0, 397 (int (*) (void *, int)) node->wakeFunc, 398 (void *) node, 399 NULL, node->dagHdr->tracerec, 400 (void *) (node->dagHdr->raidPtr), 401 RF_UNLOCK_DISK_QUEUE, NULL); 402 if (!req) 403 (node->wakeFunc) (node, ENOMEM); 404 else { 405 node->dagFuncData = (void *) req; 406 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 407 } 408 409 return (0); 410 } 411 /***************************************************************************************** 412 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, 413 * the routine is called to set the node status and inform the execution engine that 414 * the node has fired. 415 ****************************************************************************************/ 416 int 417 rf_GenericWakeupFunc(node, status) 418 RF_DagNode_t *node; 419 int status; 420 { 421 switch (node->status) { 422 case rf_bwd1: 423 node->status = rf_bwd2; 424 if (node->dagFuncData) 425 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 426 return (rf_DiskWriteFuncForThreads(node)); 427 break; 428 case rf_fired: 429 if (status) 430 node->status = rf_bad; 431 else 432 node->status = rf_good; 433 break; 434 case rf_recover: 435 /* probably should never reach this case */ 436 if (status) 437 node->status = rf_panic; 438 else 439 node->status = rf_undone; 440 break; 441 default: 442 printf("rf_GenericWakeupFunc:"); 443 printf("node->status is %d,", node->status); 444 printf("status is %d \n", status); 445 RF_PANIC(); 446 break; 447 } 448 if (node->dagFuncData) 449 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 450 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 451 } 452 453 454 /***************************************************************************************** 455 * there are three distinct types of xor nodes 456 * A "regular xor" is used in the fault-free case where the access spans a complete 457 * stripe unit. It assumes that the result buffer is one full stripe unit in size, 458 * and uses the stripe-unit-offset values that it computes from the PDAs to determine 459 * where within the stripe unit to XOR each argument buffer. 460 * 461 * A "simple xor" is used in the fault-free case where the access touches only a portion 462 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument 463 * buffers are of the same size and have the same stripe unit offset. 464 * 465 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular 466 * xor function except that it takes the failed PDA as an additional parameter, and 467 * uses it to determine what portions of the argument buffers need to be xor'd into 468 * the result buffer, and where in the result buffer they should go. 469 ****************************************************************************************/ 470 471 /* xor the params together and store the result in the result field. 472 * assume the result field points to a buffer that is the size of one SU, 473 * and use the pda params to determine where within the buffer to XOR 474 * the input buffers. 475 */ 476 int 477 rf_RegularXorFunc(node) 478 RF_DagNode_t *node; 479 { 480 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 481 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 482 RF_Etimer_t timer; 483 int i, retcode; 484 485 retcode = 0; 486 if (node->dagHdr->status == rf_enable) { 487 /* don't do the XOR if the input is the same as the output */ 488 RF_ETIMER_START(timer); 489 for (i = 0; i < node->numParams - 1; i += 2) 490 if (node->params[i + 1].p != node->results[0]) { 491 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 492 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); 493 } 494 RF_ETIMER_STOP(timer); 495 RF_ETIMER_EVAL(timer); 496 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 497 } 498 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 499 * explicitly since no 500 * I/O in this node */ 501 } 502 /* xor the inputs into the result buffer, ignoring placement issues */ 503 int 504 rf_SimpleXorFunc(node) 505 RF_DagNode_t *node; 506 { 507 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 508 int i, retcode = 0; 509 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 510 RF_Etimer_t timer; 511 512 if (node->dagHdr->status == rf_enable) { 513 RF_ETIMER_START(timer); 514 /* don't do the XOR if the input is the same as the output */ 515 for (i = 0; i < node->numParams - 1; i += 2) 516 if (node->params[i + 1].p != node->results[0]) { 517 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 518 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), 519 (struct buf *) node->dagHdr->bp); 520 } 521 RF_ETIMER_STOP(timer); 522 RF_ETIMER_EVAL(timer); 523 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 524 } 525 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 526 * explicitly since no 527 * I/O in this node */ 528 } 529 /* this xor is used by the degraded-mode dag functions to recover lost data. 530 * the second-to-last parameter is the PDA for the failed portion of the access. 531 * the code here looks at this PDA and assumes that the xor target buffer is 532 * equal in size to the number of sectors in the failed PDA. It then uses 533 * the other PDAs in the parameter list to determine where within the target 534 * buffer the corresponding data should be xored. 535 */ 536 int 537 rf_RecoveryXorFunc(node) 538 RF_DagNode_t *node; 539 { 540 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 541 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 542 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 543 int i, retcode = 0; 544 RF_PhysDiskAddr_t *pda; 545 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 546 char *srcbuf, *destbuf; 547 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 548 RF_Etimer_t timer; 549 550 if (node->dagHdr->status == rf_enable) { 551 RF_ETIMER_START(timer); 552 for (i = 0; i < node->numParams - 2; i += 2) 553 if (node->params[i + 1].p != node->results[0]) { 554 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 555 srcbuf = (char *) node->params[i + 1].p; 556 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 557 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 558 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); 559 } 560 RF_ETIMER_STOP(timer); 561 RF_ETIMER_EVAL(timer); 562 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 563 } 564 return (rf_GenericWakeupFunc(node, retcode)); 565 } 566 /***************************************************************************************** 567 * The next three functions are utilities used by the above xor-execution functions. 568 ****************************************************************************************/ 569 570 571 /* 572 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit 573 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the 574 * access described by pda is one SU in size (which by implication means it's SU-aligned), 575 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one 576 * SU in size the XOR occurs on only the portion of targbuf identified in the pda. 577 */ 578 579 int 580 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) 581 RF_Raid_t *raidPtr; 582 RF_PhysDiskAddr_t *pda; 583 char *srcbuf; 584 char *targbuf; 585 void *bp; 586 { 587 char *targptr; 588 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 589 int SUOffset = pda->startSector % sectPerSU; 590 int length, retcode = 0; 591 592 RF_ASSERT(pda->numSector <= sectPerSU); 593 594 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 595 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 596 retcode = rf_bxor(srcbuf, targptr, length, bp); 597 return (retcode); 598 } 599 /* it really should be the case that the buffer pointers (returned by malloc) 600 * are aligned to the natural word size of the machine, so this is the only 601 * case we optimize for. The length should always be a multiple of the sector 602 * size, so there should be no problem with leftover bytes at the end. 603 */ 604 int 605 rf_bxor(src, dest, len, bp) 606 char *src; 607 char *dest; 608 int len; 609 void *bp; 610 { 611 unsigned mask = sizeof(long) - 1, retcode = 0; 612 613 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { 614 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); 615 } else { 616 RF_ASSERT(0); 617 } 618 return (retcode); 619 } 620 /* map a user buffer into kernel space, if necessary */ 621 #define REMAP_VA(_bp,x,y) (y) = (x) 622 623 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. 624 * We don't want to assume anything about which input buffers are in kernel/user 625 * space, nor about their alignment, so in each loop we compute the maximum number 626 * of bytes that we can xor without crossing any page boundaries, and do only this many 627 * bytes before the next remap. 628 */ 629 int 630 rf_longword_bxor(src, dest, len, bp) 631 unsigned long *src; 632 unsigned long *dest; 633 int len; /* longwords */ 634 void *bp; 635 { 636 unsigned long *end = src + len; 637 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 638 unsigned long *pg_src, *pg_dest; /* per-page source/dest 639 * pointers */ 640 int longs_this_time;/* # longwords to xor in the current iteration */ 641 642 REMAP_VA(bp, src, pg_src); 643 REMAP_VA(bp, dest, pg_dest); 644 if (!pg_src || !pg_dest) 645 return (EFAULT); 646 647 while (len >= 4) { 648 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 649 src += longs_this_time; 650 dest += longs_this_time; 651 len -= longs_this_time; 652 while (longs_this_time >= 4) { 653 d0 = pg_dest[0]; 654 d1 = pg_dest[1]; 655 d2 = pg_dest[2]; 656 d3 = pg_dest[3]; 657 s0 = pg_src[0]; 658 s1 = pg_src[1]; 659 s2 = pg_src[2]; 660 s3 = pg_src[3]; 661 pg_dest[0] = d0 ^ s0; 662 pg_dest[1] = d1 ^ s1; 663 pg_dest[2] = d2 ^ s2; 664 pg_dest[3] = d3 ^ s3; 665 pg_src += 4; 666 pg_dest += 4; 667 longs_this_time -= 4; 668 } 669 while (longs_this_time > 0) { /* cannot cross any page 670 * boundaries here */ 671 *pg_dest++ ^= *pg_src++; 672 longs_this_time--; 673 } 674 675 /* either we're done, or we've reached a page boundary on one 676 * (or possibly both) of the pointers */ 677 if (len) { 678 if (RF_PAGE_ALIGNED(src)) 679 REMAP_VA(bp, src, pg_src); 680 if (RF_PAGE_ALIGNED(dest)) 681 REMAP_VA(bp, dest, pg_dest); 682 if (!pg_src || !pg_dest) 683 return (EFAULT); 684 } 685 } 686 while (src < end) { 687 *pg_dest++ ^= *pg_src++; 688 src++; 689 dest++; 690 len--; 691 if (RF_PAGE_ALIGNED(src)) 692 REMAP_VA(bp, src, pg_src); 693 if (RF_PAGE_ALIGNED(dest)) 694 REMAP_VA(bp, dest, pg_dest); 695 } 696 RF_ASSERT(len == 0); 697 return (0); 698 } 699 700 #if 0 701 /* 702 dst = a ^ b ^ c; 703 a may equal dst 704 see comment above longword_bxor 705 */ 706 int 707 rf_longword_bxor3(dst, a, b, c, len, bp) 708 unsigned long *dst; 709 unsigned long *a; 710 unsigned long *b; 711 unsigned long *c; 712 int len; /* length in longwords */ 713 void *bp; 714 { 715 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 716 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 717 * pointers */ 718 int longs_this_time;/* # longs to xor in the current iteration */ 719 char dst_is_a = 0; 720 721 REMAP_VA(bp, a, pg_a); 722 REMAP_VA(bp, b, pg_b); 723 REMAP_VA(bp, c, pg_c); 724 if (a == dst) { 725 pg_dst = pg_a; 726 dst_is_a = 1; 727 } else { 728 REMAP_VA(bp, dst, pg_dst); 729 } 730 731 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 732 while ((((unsigned long) pg_dst) & 0x1f)) { 733 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 734 dst++; 735 a++; 736 b++; 737 c++; 738 if (RF_PAGE_ALIGNED(a)) { 739 REMAP_VA(bp, a, pg_a); 740 if (!pg_a) 741 return (EFAULT); 742 } 743 if (RF_PAGE_ALIGNED(b)) { 744 REMAP_VA(bp, a, pg_b); 745 if (!pg_b) 746 return (EFAULT); 747 } 748 if (RF_PAGE_ALIGNED(c)) { 749 REMAP_VA(bp, a, pg_c); 750 if (!pg_c) 751 return (EFAULT); 752 } 753 len--; 754 } 755 756 while (len > 4) { 757 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 758 a += longs_this_time; 759 b += longs_this_time; 760 c += longs_this_time; 761 dst += longs_this_time; 762 len -= longs_this_time; 763 while (longs_this_time >= 4) { 764 a0 = pg_a[0]; 765 longs_this_time -= 4; 766 767 a1 = pg_a[1]; 768 a2 = pg_a[2]; 769 770 a3 = pg_a[3]; 771 pg_a += 4; 772 773 b0 = pg_b[0]; 774 b1 = pg_b[1]; 775 776 b2 = pg_b[2]; 777 b3 = pg_b[3]; 778 /* start dual issue */ 779 a0 ^= b0; 780 b0 = pg_c[0]; 781 782 pg_b += 4; 783 a1 ^= b1; 784 785 a2 ^= b2; 786 a3 ^= b3; 787 788 b1 = pg_c[1]; 789 a0 ^= b0; 790 791 b2 = pg_c[2]; 792 a1 ^= b1; 793 794 b3 = pg_c[3]; 795 a2 ^= b2; 796 797 pg_dst[0] = a0; 798 a3 ^= b3; 799 pg_dst[1] = a1; 800 pg_c += 4; 801 pg_dst[2] = a2; 802 pg_dst[3] = a3; 803 pg_dst += 4; 804 } 805 while (longs_this_time > 0) { /* cannot cross any page 806 * boundaries here */ 807 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 808 longs_this_time--; 809 } 810 811 if (len) { 812 if (RF_PAGE_ALIGNED(a)) { 813 REMAP_VA(bp, a, pg_a); 814 if (!pg_a) 815 return (EFAULT); 816 if (dst_is_a) 817 pg_dst = pg_a; 818 } 819 if (RF_PAGE_ALIGNED(b)) { 820 REMAP_VA(bp, b, pg_b); 821 if (!pg_b) 822 return (EFAULT); 823 } 824 if (RF_PAGE_ALIGNED(c)) { 825 REMAP_VA(bp, c, pg_c); 826 if (!pg_c) 827 return (EFAULT); 828 } 829 if (!dst_is_a) 830 if (RF_PAGE_ALIGNED(dst)) { 831 REMAP_VA(bp, dst, pg_dst); 832 if (!pg_dst) 833 return (EFAULT); 834 } 835 } 836 } 837 while (len) { 838 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 839 dst++; 840 a++; 841 b++; 842 c++; 843 if (RF_PAGE_ALIGNED(a)) { 844 REMAP_VA(bp, a, pg_a); 845 if (!pg_a) 846 return (EFAULT); 847 if (dst_is_a) 848 pg_dst = pg_a; 849 } 850 if (RF_PAGE_ALIGNED(b)) { 851 REMAP_VA(bp, b, pg_b); 852 if (!pg_b) 853 return (EFAULT); 854 } 855 if (RF_PAGE_ALIGNED(c)) { 856 REMAP_VA(bp, c, pg_c); 857 if (!pg_c) 858 return (EFAULT); 859 } 860 if (!dst_is_a) 861 if (RF_PAGE_ALIGNED(dst)) { 862 REMAP_VA(bp, dst, pg_dst); 863 if (!pg_dst) 864 return (EFAULT); 865 } 866 len--; 867 } 868 return (0); 869 } 870 871 int 872 rf_bxor3(dst, a, b, c, len, bp) 873 unsigned char *dst; 874 unsigned char *a; 875 unsigned char *b; 876 unsigned char *c; 877 unsigned long len; 878 void *bp; 879 { 880 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 881 882 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 883 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 884 } 885 #endif 886