1 /* $NetBSD: rf_reconbuffer.c,v 1.6 2001/11/13 07:11:16 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /*************************************************** 30 * 31 * rf_reconbuffer.c -- reconstruction buffer manager 32 * 33 ***************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.6 2001/11/13 07:11:16 lukem Exp $"); 37 38 #include "rf_raid.h" 39 #include "rf_reconbuffer.h" 40 #include "rf_acctrace.h" 41 #include "rf_etimer.h" 42 #include "rf_general.h" 43 #include "rf_debugprint.h" 44 #include "rf_revent.h" 45 #include "rf_reconutil.h" 46 #include "rf_nwayxor.h" 47 48 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) 49 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) 50 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) 51 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) 52 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) 53 54 /***************************************************************************** 55 * 56 * Submit a reconstruction buffer to the manager for XOR. We can only 57 * submit a buffer if (1) we can xor into an existing buffer, which 58 * means we don't have to acquire a new one, (2) we can acquire a 59 * floating recon buffer, or (3) the caller has indicated that we are 60 * allowed to keep the submitted buffer. 61 * 62 * Returns non-zero if and only if we were not able to submit. 63 * In this case, we append the current disk ID to the wait list on the 64 * indicated RU, so that it will be re-enabled when we acquire a buffer 65 * for this RU. 66 * 67 ****************************************************************************/ 68 69 /* 70 * nWayXorFuncs[i] is a pointer to a function that will xor "i" 71 * bufs into the accumulating sum. 72 */ 73 static RF_VoidFuncPtr nWayXorFuncs[] = { 74 NULL, 75 (RF_VoidFuncPtr) rf_nWayXor1, 76 (RF_VoidFuncPtr) rf_nWayXor2, 77 (RF_VoidFuncPtr) rf_nWayXor3, 78 (RF_VoidFuncPtr) rf_nWayXor4, 79 (RF_VoidFuncPtr) rf_nWayXor5, 80 (RF_VoidFuncPtr) rf_nWayXor6, 81 (RF_VoidFuncPtr) rf_nWayXor7, 82 (RF_VoidFuncPtr) rf_nWayXor8, 83 (RF_VoidFuncPtr) rf_nWayXor9 84 }; 85 86 int 87 rf_SubmitReconBuffer(rbuf, keep_it, use_committed) 88 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 89 int keep_it; /* whether we can keep this buffer or we have 90 * to return it */ 91 int use_committed; /* whether to use a committed or an available 92 * recon buffer */ 93 { 94 RF_LayoutSW_t *lp; 95 int rc; 96 97 lp = rbuf->raidPtr->Layout.map; 98 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); 99 return (rc); 100 } 101 102 int 103 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) 104 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 105 int keep_it; /* whether we can keep this buffer or we have 106 * to return it */ 107 int use_committed; /* whether to use a committed or an available 108 * recon buffer */ 109 { 110 RF_Raid_t *raidPtr = rbuf->raidPtr; 111 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 112 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; 113 RF_ReconParityStripeStatus_t *pssPtr; 114 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf 115 * pointers */ 116 caddr_t ta; /* temporary data buffer pointer */ 117 RF_CallbackDesc_t *cb, *p; 118 int retcode = 0, created = 0; 119 120 RF_Etimer_t timer; 121 122 /* makes no sense to have a submission from the failed disk */ 123 RF_ASSERT(rbuf); 124 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 125 126 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", 127 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); 128 129 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 130 131 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 132 133 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); 134 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 135 * an rbuf for it */ 136 137 /* check to see if enough buffers have accumulated to do an XOR. If 138 * so, there's no need to acquire a floating rbuf. Before we can do 139 * any XORing, we must have acquired a destination buffer. If we 140 * have, then we can go ahead and do the XOR if (1) including this 141 * buffer, enough bufs have accumulated, or (2) this is the last 142 * submission for this stripe. Otherwise, we have to go acquire a 143 * floating rbuf. */ 144 145 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 146 if ((targetRbuf != NULL) && 147 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { 148 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ 149 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); 150 RF_ETIMER_START(timer); 151 rf_MultiWayReconXor(raidPtr, pssPtr); 152 RF_ETIMER_STOP(timer); 153 RF_ETIMER_EVAL(timer); 154 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); 155 if (!keep_it) { 156 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); 157 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 158 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 159 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 160 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 161 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 162 163 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 164 } 165 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); 166 167 /* if use_committed is on, we _must_ consume a buffer off the 168 * committed list. */ 169 if (use_committed) { 170 t = reconCtrlPtr->committedRbufs; 171 RF_ASSERT(t); 172 reconCtrlPtr->committedRbufs = t->next; 173 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); 174 } 175 if (keep_it) { 176 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 177 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 178 rf_FreeReconBuffer(rbuf); 179 return (retcode); 180 } 181 goto out; 182 } 183 /* set the value of "t", which we'll use as the rbuf from here on */ 184 if (keep_it) { 185 t = rbuf; 186 } else { 187 if (use_committed) { /* if a buffer has been committed to 188 * us, use it */ 189 t = reconCtrlPtr->committedRbufs; 190 RF_ASSERT(t); 191 reconCtrlPtr->committedRbufs = t->next; 192 t->next = NULL; 193 } else 194 if (reconCtrlPtr->floatingRbufs) { 195 t = reconCtrlPtr->floatingRbufs; 196 reconCtrlPtr->floatingRbufs = t->next; 197 t->next = NULL; 198 } 199 } 200 201 /* If we weren't able to acquire a buffer, append to the end of the 202 * buf list in the recon ctrl struct. */ 203 if (!t) { 204 RF_ASSERT(!keep_it && !use_committed); 205 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); 206 207 raidPtr->procsInBufWait++; 208 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { 209 printf("Buffer wait deadlock detected. Exiting.\n"); 210 rf_PrintPSStatusTable(raidPtr, rbuf->row); 211 RF_PANIC(); 212 } 213 pssPtr->flags |= RF_PSS_BUFFERWAIT; 214 cb = rf_AllocCallbackDesc(); /* append to buf wait list in 215 * recon ctrl structure */ 216 cb->row = rbuf->row; 217 cb->col = rbuf->col; 218 cb->callbackArg.v = rbuf->parityStripeID; 219 cb->callbackArg2.v = rbuf->which_ru; 220 cb->next = NULL; 221 if (!reconCtrlPtr->bufferWaitList) 222 reconCtrlPtr->bufferWaitList = cb; 223 else { /* might want to maintain head/tail pointers 224 * here rather than search for end of list */ 225 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 226 p->next = cb; 227 } 228 retcode = 1; 229 goto out; 230 } 231 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); 232 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 233 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 234 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 235 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 236 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 237 238 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 239 240 /* initialize the buffer */ 241 if (t != rbuf) { 242 t->row = rbuf->row; 243 t->col = reconCtrlPtr->fcol; 244 t->parityStripeID = rbuf->parityStripeID; 245 t->which_ru = rbuf->which_ru; 246 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 247 t->spRow = rbuf->spRow; 248 t->spCol = rbuf->spCol; 249 t->spOffset = rbuf->spOffset; 250 251 ta = t->buffer; 252 t->buffer = rbuf->buffer; 253 rbuf->buffer = ta; /* swap buffers */ 254 } 255 /* the first installation always gets installed as the destination 256 * buffer. subsequent installations get stacked up to allow for 257 * multi-way XOR */ 258 if (!pssPtr->rbuf) { 259 pssPtr->rbuf = t; 260 t->count = 1; 261 } else 262 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ 263 264 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if 265 * G=2 */ 266 267 out: 268 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 269 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 270 return (retcode); 271 } 272 273 int 274 rf_MultiWayReconXor(raidPtr, pssPtr) 275 RF_Raid_t *raidPtr; 276 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this 277 * parity stripe */ 278 { 279 int i, numBufs = pssPtr->xorBufCount; 280 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); 281 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; 282 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 283 284 RF_ASSERT(pssPtr->rbuf != NULL); 285 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); 286 #ifdef _KERNEL 287 #ifndef __NetBSD__ 288 thread_block(); /* yield the processor before doing a big XOR */ 289 #endif 290 #endif /* _KERNEL */ 291 /* 292 * XXX 293 * 294 * What if more than 9 bufs? 295 */ 296 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); 297 298 /* release all the reconstruction buffers except the last one, which 299 * belongs to the disk whose submission caused this XOR to take place */ 300 for (i = 0; i < numBufs - 1; i++) { 301 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) 302 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); 303 else 304 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) 305 rf_FreeReconBuffer(rbufs[i]); 306 else 307 RF_ASSERT(0); 308 } 309 targetRbuf->count += pssPtr->xorBufCount; 310 pssPtr->xorBufCount = 0; 311 return (0); 312 } 313 /* removes one full buffer from one of the full-buffer lists and returns it. 314 * 315 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. 316 */ 317 RF_ReconBuffer_t * 318 rf_GetFullReconBuffer(reconCtrlPtr) 319 RF_ReconCtrl_t *reconCtrlPtr; 320 { 321 RF_ReconBuffer_t *p; 322 323 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 324 325 if ((p = reconCtrlPtr->priorityList) != NULL) { 326 reconCtrlPtr->priorityList = p->next; 327 p->next = NULL; 328 goto out; 329 } 330 if ((p = reconCtrlPtr->fullBufferList) != NULL) { 331 reconCtrlPtr->fullBufferList = p->next; 332 p->next = NULL; 333 goto out; 334 } 335 out: 336 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 337 return (p); 338 } 339 340 341 /* if the reconstruction buffer is full, move it to the full list, 342 * which is maintained sorted by failed disk sector offset 343 * 344 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ 345 int 346 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) 347 RF_Raid_t *raidPtr; 348 RF_ReconCtrl_t *reconCtrl; 349 RF_ReconParityStripeStatus_t *pssPtr; 350 int numDataCol; 351 { 352 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 353 354 if (rbuf->count == numDataCol) { 355 raidPtr->numFullReconBuffers++; 356 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", 357 (long) rbuf->parityStripeID, rbuf->which_ru); 358 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { 359 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", 360 (long) rbuf->parityStripeID, rbuf->which_ru); 361 rbuf->next = reconCtrl->fullBufferList; 362 reconCtrl->fullBufferList = rbuf; 363 } else { 364 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); 365 rbuf->next = p; 366 pt->next = rbuf; 367 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", 368 (long) rbuf->parityStripeID, rbuf->which_ru); 369 } 370 #if 0 371 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like 372 * to be able to find 373 * this rbuf while it's 374 * awaiting write */ 375 #else 376 rbuf->pssPtr = pssPtr; 377 #endif 378 pssPtr->rbuf = NULL; 379 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); 380 } 381 return (0); 382 } 383 384 385 /* release a floating recon buffer for someone else to use. 386 * assumes the rb_mutex is LOCKED at entry 387 */ 388 void 389 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) 390 RF_Raid_t *raidPtr; 391 RF_RowCol_t row; 392 RF_ReconBuffer_t *rbuf; 393 { 394 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; 395 RF_CallbackDesc_t *cb; 396 397 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", 398 (long) rbuf->parityStripeID, rbuf->which_ru); 399 400 /* if anyone is waiting on buffers, wake one of them up. They will 401 * subsequently wake up anyone else waiting on their RU */ 402 if (rcPtr->bufferWaitList) { 403 rbuf->next = rcPtr->committedRbufs; 404 rcPtr->committedRbufs = rbuf; 405 cb = rcPtr->bufferWaitList; 406 rcPtr->bufferWaitList = cb->next; 407 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've 408 * committed a buffer */ 409 rf_FreeCallbackDesc(cb); 410 raidPtr->procsInBufWait--; 411 } else { 412 rbuf->next = rcPtr->floatingRbufs; 413 rcPtr->floatingRbufs = rbuf; 414 } 415 } 416 /* release any disk that is waiting on a buffer for the indicated RU. 417 * assumes the rb_mutex is LOCKED at entry 418 */ 419 void 420 rf_ReleaseBufferWaiters(raidPtr, pssPtr) 421 RF_Raid_t *raidPtr; 422 RF_ReconParityStripeStatus_t *pssPtr; 423 { 424 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; 425 426 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", 427 (long) pssPtr->parityStripeID, pssPtr->which_ru); 428 pssPtr->flags &= ~RF_PSS_BUFFERWAIT; 429 while (cb) { 430 cb1 = cb->next; 431 cb->next = NULL; 432 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't 433 * committed a buffer */ 434 rf_FreeCallbackDesc(cb); 435 cb = cb1; 436 } 437 pssPtr->bufWaitList = NULL; 438 } 439 /* when reconstruction is forced on an RU, there may be some disks waiting to 440 * acquire a buffer for that RU. Since we allocate a new buffer as part of 441 * the forced-reconstruction process, we no longer have to wait for any 442 * buffers, so we wakeup any waiter that we find in the bufferWaitList 443 * 444 * assumes the rb_mutex is LOCKED at entry 445 */ 446 void 447 rf_ReleaseBufferWaiter(rcPtr, rbuf) 448 RF_ReconCtrl_t *rcPtr; 449 RF_ReconBuffer_t *rbuf; 450 { 451 RF_CallbackDesc_t *cb, *cbt; 452 453 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { 454 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { 455 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); 456 if (cbt) 457 cbt->next = cb->next; 458 else 459 rcPtr->bufferWaitList = cb->next; 460 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no 461 * committed buffer */ 462 rf_FreeCallbackDesc(cb); 463 return; 464 } 465 } 466 } 467