1 /* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /*************************************************** 30 * 31 * rf_reconbuffer.c -- reconstruction buffer manager 32 * 33 ***************************************************/ 34 35 #include "rf_raid.h" 36 #include "rf_reconbuffer.h" 37 #include "rf_acctrace.h" 38 #include "rf_etimer.h" 39 #include "rf_general.h" 40 #include "rf_debugprint.h" 41 #include "rf_revent.h" 42 #include "rf_reconutil.h" 43 #include "rf_nwayxor.h" 44 45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) 46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) 47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) 48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) 49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) 50 51 /***************************************************************************************** 52 * 53 * Submit a reconstruction buffer to the manager for XOR. 54 * We can only submit a buffer if (1) we can xor into an existing buffer, which means 55 * we don't have to acquire a new one, (2) we can acquire a floating 56 * recon buffer, or (3) the caller has indicated that we are allowed to keep the 57 * submitted buffer. 58 * 59 * Returns non-zero if and only if we were not able to submit. 60 * In this case, we append the current disk ID to the wait list on the indicated 61 * RU, so that it will be re-enabled when we acquire a buffer for this RU. 62 * 63 ****************************************************************************************/ 64 65 /* just to make the code below more readable */ 66 #define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \ 67 _cb_ = rf_AllocCallbackDesc(); \ 68 (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_); 69 70 /* 71 * nWayXorFuncs[i] is a pointer to a function that will xor "i" 72 * bufs into the accumulating sum. 73 */ 74 static RF_VoidFuncPtr nWayXorFuncs[] = { 75 NULL, 76 (RF_VoidFuncPtr) rf_nWayXor1, 77 (RF_VoidFuncPtr) rf_nWayXor2, 78 (RF_VoidFuncPtr) rf_nWayXor3, 79 (RF_VoidFuncPtr) rf_nWayXor4, 80 (RF_VoidFuncPtr) rf_nWayXor5, 81 (RF_VoidFuncPtr) rf_nWayXor6, 82 (RF_VoidFuncPtr) rf_nWayXor7, 83 (RF_VoidFuncPtr) rf_nWayXor8, 84 (RF_VoidFuncPtr) rf_nWayXor9 85 }; 86 87 int 88 rf_SubmitReconBuffer(rbuf, keep_it, use_committed) 89 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 90 int keep_it; /* whether we can keep this buffer or we have 91 * to return it */ 92 int use_committed; /* whether to use a committed or an available 93 * recon buffer */ 94 { 95 RF_LayoutSW_t *lp; 96 int rc; 97 98 lp = rbuf->raidPtr->Layout.map; 99 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); 100 return (rc); 101 } 102 103 int 104 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) 105 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 106 int keep_it; /* whether we can keep this buffer or we have 107 * to return it */ 108 int use_committed; /* whether to use a committed or an available 109 * recon buffer */ 110 { 111 RF_Raid_t *raidPtr = rbuf->raidPtr; 112 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 113 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; 114 RF_ReconParityStripeStatus_t *pssPtr; 115 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf 116 * pointers */ 117 caddr_t ta; /* temporary data buffer pointer */ 118 RF_CallbackDesc_t *cb, *p; 119 int retcode = 0, created = 0; 120 121 RF_Etimer_t timer; 122 123 /* makes no sense to have a submission from the failed disk */ 124 RF_ASSERT(rbuf); 125 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 126 127 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", 128 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); 129 130 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 131 132 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 133 134 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); 135 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 136 * an rbuf for it */ 137 138 /* check to see if enough buffers have accumulated to do an XOR. If 139 * so, there's no need to acquire a floating rbuf. Before we can do 140 * any XORing, we must have acquired a destination buffer. If we 141 * have, then we can go ahead and do the XOR if (1) including this 142 * buffer, enough bufs have accumulated, or (2) this is the last 143 * submission for this stripe. Otherwise, we have to go acquire a 144 * floating rbuf. */ 145 146 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 147 if ((targetRbuf != NULL) && 148 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { 149 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ 150 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); 151 RF_ETIMER_START(timer); 152 rf_MultiWayReconXor(raidPtr, pssPtr); 153 RF_ETIMER_STOP(timer); 154 RF_ETIMER_EVAL(timer); 155 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); 156 if (!keep_it) { 157 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); 158 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 159 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 160 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 161 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 162 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 163 164 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 165 } 166 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); 167 168 /* if use_committed is on, we _must_ consume a buffer off the 169 * committed list. */ 170 if (use_committed) { 171 t = reconCtrlPtr->committedRbufs; 172 RF_ASSERT(t); 173 reconCtrlPtr->committedRbufs = t->next; 174 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); 175 } 176 if (keep_it) { 177 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 178 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 179 rf_FreeReconBuffer(rbuf); 180 return (retcode); 181 } 182 goto out; 183 } 184 /* set the value of "t", which we'll use as the rbuf from here on */ 185 if (keep_it) { 186 t = rbuf; 187 } else { 188 if (use_committed) { /* if a buffer has been committed to 189 * us, use it */ 190 t = reconCtrlPtr->committedRbufs; 191 RF_ASSERT(t); 192 reconCtrlPtr->committedRbufs = t->next; 193 t->next = NULL; 194 } else 195 if (reconCtrlPtr->floatingRbufs) { 196 t = reconCtrlPtr->floatingRbufs; 197 reconCtrlPtr->floatingRbufs = t->next; 198 t->next = NULL; 199 } 200 } 201 202 /* If we weren't able to acquire a buffer, append to the end of the 203 * buf list in the recon ctrl struct. */ 204 if (!t) { 205 RF_ASSERT(!keep_it && !use_committed); 206 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); 207 208 raidPtr->procsInBufWait++; 209 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { 210 printf("Buffer wait deadlock detected. Exiting.\n"); 211 rf_PrintPSStatusTable(raidPtr, rbuf->row); 212 RF_PANIC(); 213 } 214 pssPtr->flags |= RF_PSS_BUFFERWAIT; 215 cb = rf_AllocCallbackDesc(); /* append to buf wait list in 216 * recon ctrl structure */ 217 cb->row = rbuf->row; 218 cb->col = rbuf->col; 219 cb->callbackArg.v = rbuf->parityStripeID; 220 cb->callbackArg2.v = rbuf->which_ru; 221 cb->next = NULL; 222 if (!reconCtrlPtr->bufferWaitList) 223 reconCtrlPtr->bufferWaitList = cb; 224 else { /* might want to maintain head/tail pointers 225 * here rather than search for end of list */ 226 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 227 p->next = cb; 228 } 229 retcode = 1; 230 goto out; 231 } 232 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); 233 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 234 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 235 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 236 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 237 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 238 239 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 240 241 /* initialize the buffer */ 242 if (t != rbuf) { 243 t->row = rbuf->row; 244 t->col = reconCtrlPtr->fcol; 245 t->parityStripeID = rbuf->parityStripeID; 246 t->which_ru = rbuf->which_ru; 247 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 248 t->spRow = rbuf->spRow; 249 t->spCol = rbuf->spCol; 250 t->spOffset = rbuf->spOffset; 251 252 ta = t->buffer; 253 t->buffer = rbuf->buffer; 254 rbuf->buffer = ta; /* swap buffers */ 255 } 256 /* the first installation always gets installed as the destination 257 * buffer. subsequent installations get stacked up to allow for 258 * multi-way XOR */ 259 if (!pssPtr->rbuf) { 260 pssPtr->rbuf = t; 261 t->count = 1; 262 } else 263 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ 264 265 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if 266 * G=2 */ 267 268 out: 269 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 270 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 271 return (retcode); 272 } 273 274 int 275 rf_MultiWayReconXor(raidPtr, pssPtr) 276 RF_Raid_t *raidPtr; 277 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this 278 * parity stripe */ 279 { 280 int i, numBufs = pssPtr->xorBufCount; 281 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); 282 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; 283 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 284 285 RF_ASSERT(pssPtr->rbuf != NULL); 286 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); 287 #ifdef _KERNEL 288 #ifndef __NetBSD__ 289 thread_block(); /* yield the processor before doing a big XOR */ 290 #endif 291 #endif /* _KERNEL */ 292 /* 293 * XXX 294 * 295 * What if more than 9 bufs? 296 */ 297 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); 298 299 /* release all the reconstruction buffers except the last one, which 300 * belongs to the disk whose submission caused this XOR to take place */ 301 for (i = 0; i < numBufs - 1; i++) { 302 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) 303 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); 304 else 305 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) 306 rf_FreeReconBuffer(rbufs[i]); 307 else 308 RF_ASSERT(0); 309 } 310 targetRbuf->count += pssPtr->xorBufCount; 311 pssPtr->xorBufCount = 0; 312 return (0); 313 } 314 /* removes one full buffer from one of the full-buffer lists and returns it. 315 * 316 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. 317 */ 318 RF_ReconBuffer_t * 319 rf_GetFullReconBuffer(reconCtrlPtr) 320 RF_ReconCtrl_t *reconCtrlPtr; 321 { 322 RF_ReconBuffer_t *p; 323 324 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 325 326 if ((p = reconCtrlPtr->priorityList) != NULL) { 327 reconCtrlPtr->priorityList = p->next; 328 p->next = NULL; 329 goto out; 330 } 331 if ((p = reconCtrlPtr->fullBufferList) != NULL) { 332 reconCtrlPtr->fullBufferList = p->next; 333 p->next = NULL; 334 goto out; 335 } 336 out: 337 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 338 return (p); 339 } 340 341 342 /* if the reconstruction buffer is full, move it to the full list, which is maintained 343 * sorted by failed disk sector offset 344 * 345 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. 346 */ 347 int 348 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) 349 RF_Raid_t *raidPtr; 350 RF_ReconCtrl_t *reconCtrl; 351 RF_ReconParityStripeStatus_t *pssPtr; 352 int numDataCol; 353 { 354 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 355 356 if (rbuf->count == numDataCol) { 357 raidPtr->numFullReconBuffers++; 358 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", 359 (long) rbuf->parityStripeID, rbuf->which_ru); 360 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { 361 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", 362 (long) rbuf->parityStripeID, rbuf->which_ru); 363 rbuf->next = reconCtrl->fullBufferList; 364 reconCtrl->fullBufferList = rbuf; 365 } else { 366 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); 367 rbuf->next = p; 368 pt->next = rbuf; 369 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", 370 (long) rbuf->parityStripeID, rbuf->which_ru); 371 } 372 #if 0 373 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like 374 * to be able to find 375 * this rbuf while it's 376 * awaiting write */ 377 #else 378 rbuf->pssPtr = pssPtr; 379 #endif 380 pssPtr->rbuf = NULL; 381 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); 382 } 383 return (0); 384 } 385 386 387 /* release a floating recon buffer for someone else to use. 388 * assumes the rb_mutex is LOCKED at entry 389 */ 390 void 391 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) 392 RF_Raid_t *raidPtr; 393 RF_RowCol_t row; 394 RF_ReconBuffer_t *rbuf; 395 { 396 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; 397 RF_CallbackDesc_t *cb; 398 399 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", 400 (long) rbuf->parityStripeID, rbuf->which_ru); 401 402 /* if anyone is waiting on buffers, wake one of them up. They will 403 * subsequently wake up anyone else waiting on their RU */ 404 if (rcPtr->bufferWaitList) { 405 rbuf->next = rcPtr->committedRbufs; 406 rcPtr->committedRbufs = rbuf; 407 cb = rcPtr->bufferWaitList; 408 rcPtr->bufferWaitList = cb->next; 409 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've 410 * committed a buffer */ 411 rf_FreeCallbackDesc(cb); 412 raidPtr->procsInBufWait--; 413 } else { 414 rbuf->next = rcPtr->floatingRbufs; 415 rcPtr->floatingRbufs = rbuf; 416 } 417 } 418 /* release any disk that is waiting on a buffer for the indicated RU. 419 * assumes the rb_mutex is LOCKED at entry 420 */ 421 void 422 rf_ReleaseBufferWaiters(raidPtr, pssPtr) 423 RF_Raid_t *raidPtr; 424 RF_ReconParityStripeStatus_t *pssPtr; 425 { 426 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; 427 428 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", 429 (long) pssPtr->parityStripeID, pssPtr->which_ru); 430 pssPtr->flags &= ~RF_PSS_BUFFERWAIT; 431 while (cb) { 432 cb1 = cb->next; 433 cb->next = NULL; 434 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't 435 * committed a buffer */ 436 rf_FreeCallbackDesc(cb); 437 cb = cb1; 438 } 439 pssPtr->bufWaitList = NULL; 440 } 441 /* when reconstruction is forced on an RU, there may be some disks waiting to 442 * acquire a buffer for that RU. Since we allocate a new buffer as part of 443 * the forced-reconstruction process, we no longer have to wait for any 444 * buffers, so we wakeup any waiter that we find in the bufferWaitList 445 * 446 * assumes the rb_mutex is LOCKED at entry 447 */ 448 void 449 rf_ReleaseBufferWaiter(rcPtr, rbuf) 450 RF_ReconCtrl_t *rcPtr; 451 RF_ReconBuffer_t *rbuf; 452 { 453 RF_CallbackDesc_t *cb, *cbt; 454 455 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { 456 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { 457 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); 458 if (cbt) 459 cbt->next = cb->next; 460 else 461 rcPtr->bufferWaitList = cb->next; 462 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no 463 * committed buffer */ 464 rf_FreeCallbackDesc(cb); 465 return; 466 } 467 } 468 } 469