xref: /netbsd-src/sys/dev/raidframe/rf_reconbuffer.c (revision e4d7c2e329d54c97e0c0bd3016bbe74f550c3d5e)
1 /*	$NetBSD: rf_reconbuffer.c,v 1.3 1999/02/05 00:06:16 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /***************************************************
30  *
31  * rf_reconbuffer.c -- reconstruction buffer manager
32  *
33  ***************************************************/
34 
35 #include "rf_raid.h"
36 #include "rf_reconbuffer.h"
37 #include "rf_acctrace.h"
38 #include "rf_etimer.h"
39 #include "rf_general.h"
40 #include "rf_debugprint.h"
41 #include "rf_revent.h"
42 #include "rf_reconutil.h"
43 #include "rf_nwayxor.h"
44 
45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
50 
51 /*****************************************************************************************
52  *
53  * Submit a reconstruction buffer to the manager for XOR.
54  * We can only submit a buffer if (1) we can xor into an existing buffer, which means
55  * we don't have to acquire a new one, (2) we can acquire a floating
56  * recon buffer, or (3) the caller has indicated that we are allowed to keep the
57  * submitted buffer.
58  *
59  * Returns non-zero if and only if we were not able to submit.
60  * In this case, we append the current disk ID to the wait list on the indicated
61  * RU, so that it will be re-enabled when we acquire a buffer for this RU.
62  *
63  ****************************************************************************************/
64 
65 /* just to make the code below more readable */
66 #define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \
67   _cb_ = rf_AllocCallbackDesc();                    \
68   (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_);
69 
70 /*
71  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
72  * bufs into the accumulating sum.
73  */
74 static RF_VoidFuncPtr nWayXorFuncs[] = {
75 	NULL,
76 	(RF_VoidFuncPtr) rf_nWayXor1,
77 	(RF_VoidFuncPtr) rf_nWayXor2,
78 	(RF_VoidFuncPtr) rf_nWayXor3,
79 	(RF_VoidFuncPtr) rf_nWayXor4,
80 	(RF_VoidFuncPtr) rf_nWayXor5,
81 	(RF_VoidFuncPtr) rf_nWayXor6,
82 	(RF_VoidFuncPtr) rf_nWayXor7,
83 	(RF_VoidFuncPtr) rf_nWayXor8,
84 	(RF_VoidFuncPtr) rf_nWayXor9
85 };
86 
87 int
88 rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
89 	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
90 	int     keep_it;	/* whether we can keep this buffer or we have
91 				 * to return it */
92 	int     use_committed;	/* whether to use a committed or an available
93 				 * recon buffer */
94 {
95 	RF_LayoutSW_t *lp;
96 	int     rc;
97 
98 	lp = rbuf->raidPtr->Layout.map;
99 	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
100 	return (rc);
101 }
102 
103 int
104 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
105 	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
106 	int     keep_it;	/* whether we can keep this buffer or we have
107 				 * to return it */
108 	int     use_committed;	/* whether to use a committed or an available
109 				 * recon buffer */
110 {
111 	RF_Raid_t *raidPtr = rbuf->raidPtr;
112 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
113 	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
114 	RF_ReconParityStripeStatus_t *pssPtr;
115 	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
116 							 * pointers */
117 	caddr_t ta;		/* temporary data buffer pointer */
118 	RF_CallbackDesc_t *cb, *p;
119 	int     retcode = 0, created = 0;
120 
121 	RF_Etimer_t timer;
122 
123 	/* makes no sense to have a submission from the failed disk */
124 	RF_ASSERT(rbuf);
125 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
126 
127 	Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
128 	    rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
129 
130 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
131 
132 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
133 
134 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
135 	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
136 				 * an rbuf for it */
137 
138 	/* check to see if enough buffers have accumulated to do an XOR.  If
139 	 * so, there's no need to acquire a floating rbuf.  Before we can do
140 	 * any XORing, we must have acquired a destination buffer.  If we
141 	 * have, then we can go ahead and do the XOR if (1) including this
142 	 * buffer, enough bufs have accumulated, or (2) this is the last
143 	 * submission for this stripe. Otherwise, we have to go acquire a
144 	 * floating rbuf. */
145 
146 	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
147 	if ((targetRbuf != NULL) &&
148 	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
149 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
150 		Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
151 		RF_ETIMER_START(timer);
152 		rf_MultiWayReconXor(raidPtr, pssPtr);
153 		RF_ETIMER_STOP(timer);
154 		RF_ETIMER_EVAL(timer);
155 		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
156 		if (!keep_it) {
157 			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
158 			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
159 			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
160 			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
161 			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
162 			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
163 
164 			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
165 		}
166 		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
167 
168 		/* if use_committed is on, we _must_ consume a buffer off the
169 		 * committed list. */
170 		if (use_committed) {
171 			t = reconCtrlPtr->committedRbufs;
172 			RF_ASSERT(t);
173 			reconCtrlPtr->committedRbufs = t->next;
174 			rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
175 		}
176 		if (keep_it) {
177 			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
178 			RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
179 			rf_FreeReconBuffer(rbuf);
180 			return (retcode);
181 		}
182 		goto out;
183 	}
184 	/* set the value of "t", which we'll use as the rbuf from here on */
185 	if (keep_it) {
186 		t = rbuf;
187 	} else {
188 		if (use_committed) {	/* if a buffer has been committed to
189 					 * us, use it */
190 			t = reconCtrlPtr->committedRbufs;
191 			RF_ASSERT(t);
192 			reconCtrlPtr->committedRbufs = t->next;
193 			t->next = NULL;
194 		} else
195 			if (reconCtrlPtr->floatingRbufs) {
196 				t = reconCtrlPtr->floatingRbufs;
197 				reconCtrlPtr->floatingRbufs = t->next;
198 				t->next = NULL;
199 			}
200 	}
201 
202 	/* If we weren't able to acquire a buffer, append to the end of the
203 	 * buf list in the recon ctrl struct. */
204 	if (!t) {
205 		RF_ASSERT(!keep_it && !use_committed);
206 		Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
207 
208 		raidPtr->procsInBufWait++;
209 		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
210 			printf("Buffer wait deadlock detected.  Exiting.\n");
211 			rf_PrintPSStatusTable(raidPtr, rbuf->row);
212 			RF_PANIC();
213 		}
214 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
215 		cb = rf_AllocCallbackDesc();	/* append to buf wait list in
216 						 * recon ctrl structure */
217 		cb->row = rbuf->row;
218 		cb->col = rbuf->col;
219 		cb->callbackArg.v = rbuf->parityStripeID;
220 		cb->callbackArg2.v = rbuf->which_ru;
221 		cb->next = NULL;
222 		if (!reconCtrlPtr->bufferWaitList)
223 			reconCtrlPtr->bufferWaitList = cb;
224 		else {		/* might want to maintain head/tail pointers
225 				 * here rather than search for end of list */
226 			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
227 			p->next = cb;
228 		}
229 		retcode = 1;
230 		goto out;
231 	}
232 	Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
233 	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
234 	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
235 	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
236 	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
237 	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
238 
239 	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
240 
241 	/* initialize the buffer */
242 	if (t != rbuf) {
243 		t->row = rbuf->row;
244 		t->col = reconCtrlPtr->fcol;
245 		t->parityStripeID = rbuf->parityStripeID;
246 		t->which_ru = rbuf->which_ru;
247 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
248 		t->spRow = rbuf->spRow;
249 		t->spCol = rbuf->spCol;
250 		t->spOffset = rbuf->spOffset;
251 
252 		ta = t->buffer;
253 		t->buffer = rbuf->buffer;
254 		rbuf->buffer = ta;	/* swap buffers */
255 	}
256 	/* the first installation always gets installed as the destination
257 	 * buffer. subsequent installations get stacked up to allow for
258 	 * multi-way XOR */
259 	if (!pssPtr->rbuf) {
260 		pssPtr->rbuf = t;
261 		t->count = 1;
262 	} else
263 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
264 
265 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
266 											 * G=2 */
267 
268 out:
269 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
270 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
271 	return (retcode);
272 }
273 
274 int
275 rf_MultiWayReconXor(raidPtr, pssPtr)
276 	RF_Raid_t *raidPtr;
277 	RF_ReconParityStripeStatus_t *pssPtr;	/* the pss descriptor for this
278 						 * parity stripe */
279 {
280 	int     i, numBufs = pssPtr->xorBufCount;
281 	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
282 	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
283 	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
284 
285 	RF_ASSERT(pssPtr->rbuf != NULL);
286 	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
287 #ifdef _KERNEL
288 #ifndef __NetBSD__
289 	thread_block();		/* yield the processor before doing a big XOR */
290 #endif
291 #endif				/* _KERNEL */
292 	/*
293          * XXX
294          *
295          * What if more than 9 bufs?
296          */
297 	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
298 
299 	/* release all the reconstruction buffers except the last one, which
300 	 * belongs to the the disk who's submission caused this XOR to take
301 	 * place */
302 	for (i = 0; i < numBufs - 1; i++) {
303 		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
304 			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
305 		else
306 			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
307 				rf_FreeReconBuffer(rbufs[i]);
308 			else
309 				RF_ASSERT(0);
310 	}
311 	targetRbuf->count += pssPtr->xorBufCount;
312 	pssPtr->xorBufCount = 0;
313 	return (0);
314 }
315 /* removes one full buffer from one of the full-buffer lists and returns it.
316  *
317  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
318  */
319 RF_ReconBuffer_t *
320 rf_GetFullReconBuffer(reconCtrlPtr)
321 	RF_ReconCtrl_t *reconCtrlPtr;
322 {
323 	RF_ReconBuffer_t *p;
324 
325 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
326 
327 	if ((p = reconCtrlPtr->priorityList) != NULL) {
328 		reconCtrlPtr->priorityList = p->next;
329 		p->next = NULL;
330 		goto out;
331 	}
332 	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
333 		reconCtrlPtr->fullBufferList = p->next;
334 		p->next = NULL;
335 		goto out;
336 	}
337 out:
338 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
339 	return (p);
340 }
341 
342 
343 /* if the reconstruction buffer is full, move it to the full list, which is maintained
344  * sorted by failed disk sector offset
345  *
346  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
347  */
348 int
349 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
350 	RF_Raid_t *raidPtr;
351 	RF_ReconCtrl_t *reconCtrl;
352 	RF_ReconParityStripeStatus_t *pssPtr;
353 	int     numDataCol;
354 {
355 	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
356 
357 	if (rbuf->count == numDataCol) {
358 		raidPtr->numFullReconBuffers++;
359 		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
360 		    (long) rbuf->parityStripeID, rbuf->which_ru);
361 		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
362 			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
363 			    (long) rbuf->parityStripeID, rbuf->which_ru);
364 			rbuf->next = reconCtrl->fullBufferList;
365 			reconCtrl->fullBufferList = rbuf;
366 		} else {
367 			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
368 			rbuf->next = p;
369 			pt->next = rbuf;
370 			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
371 			    (long) rbuf->parityStripeID, rbuf->which_ru);
372 		}
373 #if 0
374 		pssPtr->writeRbuf = pssPtr->rbuf;	/* DEBUG ONLY:  we like
375 							 * to be able to find
376 							 * this rbuf while it's
377 							 * awaiting write */
378 #else
379 		rbuf->pssPtr = pssPtr;
380 #endif
381 		pssPtr->rbuf = NULL;
382 		rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
383 	}
384 	return (0);
385 }
386 
387 
388 /* release a floating recon buffer for someone else to use.
389  * assumes the rb_mutex is LOCKED at entry
390  */
391 void
392 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
393 	RF_Raid_t *raidPtr;
394 	RF_RowCol_t row;
395 	RF_ReconBuffer_t *rbuf;
396 {
397 	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
398 	RF_CallbackDesc_t *cb;
399 
400 	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
401 	    (long) rbuf->parityStripeID, rbuf->which_ru);
402 
403 	/* if anyone is waiting on buffers, wake one of them up.  They will
404 	 * subsequently wake up anyone else waiting on their RU */
405 	if (rcPtr->bufferWaitList) {
406 		rbuf->next = rcPtr->committedRbufs;
407 		rcPtr->committedRbufs = rbuf;
408 		cb = rcPtr->bufferWaitList;
409 		rcPtr->bufferWaitList = cb->next;
410 		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
411 												 * committed a buffer */
412 		rf_FreeCallbackDesc(cb);
413 		raidPtr->procsInBufWait--;
414 	} else {
415 		rbuf->next = rcPtr->floatingRbufs;
416 		rcPtr->floatingRbufs = rbuf;
417 	}
418 }
419 /* release any disk that is waiting on a buffer for the indicated RU.
420  * assumes the rb_mutex is LOCKED at entry
421  */
422 void
423 rf_ReleaseBufferWaiters(raidPtr, pssPtr)
424 	RF_Raid_t *raidPtr;
425 	RF_ReconParityStripeStatus_t *pssPtr;
426 {
427 	RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
428 
429 	Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
430 	    (long) pssPtr->parityStripeID, pssPtr->which_ru);
431 	pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
432 	while (cb) {
433 		cb1 = cb->next;
434 		cb->next = NULL;
435 		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR);	/* arg==0 => we haven't
436 												 * committed a buffer */
437 		rf_FreeCallbackDesc(cb);
438 		cb = cb1;
439 	}
440 	pssPtr->bufWaitList = NULL;
441 }
442 /* when reconstruction is forced on an RU, there may be some disks waiting to
443  * acquire a buffer for that RU.  Since we allocate a new buffer as part of
444  * the forced-reconstruction process, we no longer have to wait for any
445  * buffers, so we wakeup any waiter that we find in the bufferWaitList
446  *
447  * assumes the rb_mutex is LOCKED at entry
448  */
449 void
450 rf_ReleaseBufferWaiter(rcPtr, rbuf)
451 	RF_ReconCtrl_t *rcPtr;
452 	RF_ReconBuffer_t *rbuf;
453 {
454 	RF_CallbackDesc_t *cb, *cbt;
455 
456 	for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
457 		if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
458 			Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
459 			if (cbt)
460 				cbt->next = cb->next;
461 			else
462 				rcPtr->bufferWaitList = cb->next;
463 			rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);	/* arg==0 => no
464 																 * committed buffer */
465 			rf_FreeCallbackDesc(cb);
466 			return;
467 		}
468 	}
469 }
470