xref: /netbsd-src/sys/dev/raidframe/rf_reconbuffer.c (revision d20841bb642898112fe68f0ad3f7b26dddf56f07)
1 /*	$NetBSD: rf_reconbuffer.c,v 1.17 2004/01/02 11:52:15 hannken Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /***************************************************
30  *
31  * rf_reconbuffer.c -- reconstruction buffer manager
32  *
33  ***************************************************/
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.17 2004/01/02 11:52:15 hannken Exp $");
37 
38 #include "rf_raid.h"
39 #include "rf_reconbuffer.h"
40 #include "rf_acctrace.h"
41 #include "rf_etimer.h"
42 #include "rf_general.h"
43 #include "rf_revent.h"
44 #include "rf_reconutil.h"
45 #include "rf_nwayxor.h"
46 
47 #ifdef DEBUG
48 
49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
54 
55 #else /* DEBUG */
56 
57 #define Dprintf1(s,a) {}
58 #define Dprintf2(s,a,b) {}
59 #define Dprintf3(s,a,b,c) {}
60 #define Dprintf4(s,a,b,c,d) {}
61 #define Dprintf5(s,a,b,c,d,e) {}
62 
63 #endif
64 
65 /*****************************************************************************
66  *
67  * Submit a reconstruction buffer to the manager for XOR.  We can only
68  * submit a buffer if (1) we can xor into an existing buffer, which
69  * means we don't have to acquire a new one, (2) we can acquire a
70  * floating recon buffer, or (3) the caller has indicated that we are
71  * allowed to keep the submitted buffer.
72  *
73  * Returns non-zero if and only if we were not able to submit.
74  * In this case, we append the current disk ID to the wait list on the
75  * indicated RU, so that it will be re-enabled when we acquire a buffer
76  * for this RU.
77  *
78  ****************************************************************************/
79 
80 /*
81  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82  * bufs into the accumulating sum.
83  */
84 static const RF_VoidFuncPtr nWayXorFuncs[] = {
85 	NULL,
86 	(RF_VoidFuncPtr) rf_nWayXor1,
87 	(RF_VoidFuncPtr) rf_nWayXor2,
88 	(RF_VoidFuncPtr) rf_nWayXor3,
89 	(RF_VoidFuncPtr) rf_nWayXor4,
90 	(RF_VoidFuncPtr) rf_nWayXor5,
91 	(RF_VoidFuncPtr) rf_nWayXor6,
92 	(RF_VoidFuncPtr) rf_nWayXor7,
93 	(RF_VoidFuncPtr) rf_nWayXor8,
94 	(RF_VoidFuncPtr) rf_nWayXor9
95 };
96 
97 /*
98  * rbuf          - the recon buffer to submit
99  * keep_it       - whether we can keep this buffer or we have to return it
100  * use_committed - whether to use a committed or an available recon buffer
101  */
102 int
103 rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed)
104 {
105 	const RF_LayoutSW_t *lp;
106 	int     rc;
107 
108 	lp = rbuf->raidPtr->Layout.map;
109 	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
110 	return (rc);
111 }
112 
113 /*
114  * rbuf          - the recon buffer to submit
115  * keep_it       - whether we can keep this buffer or we have to return it
116  * use_committed - whether to use a committed or an available recon buffer
117  */
118 int
119 rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it,
120 			  int use_committed)
121 {
122 	RF_Raid_t *raidPtr = rbuf->raidPtr;
123 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
124 	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
125 	RF_ReconParityStripeStatus_t *pssPtr;
126 	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
127 							 * pointers */
128 	caddr_t ta;		/* temporary data buffer pointer */
129 	RF_CallbackDesc_t *cb, *p;
130 	int     retcode = 0, created = 0;
131 
132 	RF_Etimer_t timer;
133 
134 	/* makes no sense to have a submission from the failed disk */
135 	RF_ASSERT(rbuf);
136 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
137 
138 	Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n",
139 	    rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
140 
141 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
142 
143 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
144 
145 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
146 	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
147 				 * an rbuf for it */
148 
149 	/* check to see if enough buffers have accumulated to do an XOR.  If
150 	 * so, there's no need to acquire a floating rbuf.  Before we can do
151 	 * any XORing, we must have acquired a destination buffer.  If we
152 	 * have, then we can go ahead and do the XOR if (1) including this
153 	 * buffer, enough bufs have accumulated, or (2) this is the last
154 	 * submission for this stripe. Otherwise, we have to go acquire a
155 	 * floating rbuf. */
156 
157 	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
158 	if ((targetRbuf != NULL) &&
159 	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
160 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
161 		Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf->col, pssPtr->xorBufCount);
162 		RF_ETIMER_START(timer);
163 		rf_MultiWayReconXor(raidPtr, pssPtr);
164 		RF_ETIMER_STOP(timer);
165 		RF_ETIMER_EVAL(timer);
166 		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
167 		if (!keep_it) {
168 			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
169 			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
170 			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
171 			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
172 			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
173 			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
174 
175 			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
176 		}
177 		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
178 
179 		/* if use_committed is on, we _must_ consume a buffer off the
180 		 * committed list. */
181 		if (use_committed) {
182 			t = reconCtrlPtr->committedRbufs;
183 			RF_ASSERT(t);
184 			reconCtrlPtr->committedRbufs = t->next;
185 			rf_ReleaseFloatingReconBuffer(raidPtr, t);
186 		}
187 		if (keep_it) {
188 			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
189 			RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
190 			rf_FreeReconBuffer(rbuf);
191 			return (retcode);
192 		}
193 		goto out;
194 	}
195 	/* set the value of "t", which we'll use as the rbuf from here on */
196 	if (keep_it) {
197 		t = rbuf;
198 	} else {
199 		if (use_committed) {	/* if a buffer has been committed to
200 					 * us, use it */
201 			t = reconCtrlPtr->committedRbufs;
202 			RF_ASSERT(t);
203 			reconCtrlPtr->committedRbufs = t->next;
204 			t->next = NULL;
205 		} else
206 			if (reconCtrlPtr->floatingRbufs) {
207 				t = reconCtrlPtr->floatingRbufs;
208 				reconCtrlPtr->floatingRbufs = t->next;
209 				t->next = NULL;
210 			}
211 	}
212 
213 	/* If we weren't able to acquire a buffer, append to the end of the
214 	 * buf list in the recon ctrl struct. */
215 	if (!t) {
216 		RF_ASSERT(!keep_it && !use_committed);
217 		Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf->col);
218 
219 		raidPtr->procsInBufWait++;
220 		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
221 			printf("Buffer wait deadlock detected.  Exiting.\n");
222 			rf_PrintPSStatusTable(raidPtr);
223 			RF_PANIC();
224 		}
225 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
226 		cb = rf_AllocCallbackDesc();	/* append to buf wait list in
227 						 * recon ctrl structure */
228 		cb->col = rbuf->col;
229 		cb->callbackArg.v = rbuf->parityStripeID;
230 		cb->callbackArg2.v = rbuf->which_ru;
231 		cb->next = NULL;
232 		if (!reconCtrlPtr->bufferWaitList)
233 			reconCtrlPtr->bufferWaitList = cb;
234 		else {		/* might want to maintain head/tail pointers
235 				 * here rather than search for end of list */
236 			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
237 			p->next = cb;
238 		}
239 		retcode = 1;
240 		goto out;
241 	}
242 	Dprintf1("RECON: col %d acquired rbuf\n", rbuf->col);
243 	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
244 	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
245 	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
246 	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
247 	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
248 
249 	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
250 
251 	/* initialize the buffer */
252 	if (t != rbuf) {
253 		t->col = reconCtrlPtr->fcol;
254 		t->parityStripeID = rbuf->parityStripeID;
255 		t->which_ru = rbuf->which_ru;
256 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
257 		t->spCol = rbuf->spCol;
258 		t->spOffset = rbuf->spOffset;
259 
260 		ta = t->buffer;
261 		t->buffer = rbuf->buffer;
262 		rbuf->buffer = ta;	/* swap buffers */
263 	}
264 	/* the first installation always gets installed as the destination
265 	 * buffer. subsequent installations get stacked up to allow for
266 	 * multi-way XOR */
267 	if (!pssPtr->rbuf) {
268 		pssPtr->rbuf = t;
269 		t->count = 1;
270 	} else
271 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
272 
273 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
274 											 * G=2 */
275 
276 out:
277 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
278 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
279 	return (retcode);
280 }
281 /* pssPtr - the pss descriptor for this parity stripe */
282 int
283 rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr)
284 {
285 	int     i, numBufs = pssPtr->xorBufCount;
286 	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
287 	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
288 	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
289 
290 	RF_ASSERT(pssPtr->rbuf != NULL);
291 	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
292 #ifdef _KERNEL
293 #ifndef __NetBSD__
294 	thread_block();		/* yield the processor before doing a big XOR */
295 #endif
296 #endif				/* _KERNEL */
297 	/*
298          * XXX
299          *
300          * What if more than 9 bufs?
301          */
302 	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
303 
304 	/* release all the reconstruction buffers except the last one, which
305 	 * belongs to the disk whose submission caused this XOR to take place */
306 	for (i = 0; i < numBufs - 1; i++) {
307 		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
308 			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]);
309 		else
310 			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
311 				rf_FreeReconBuffer(rbufs[i]);
312 			else
313 				RF_ASSERT(0);
314 	}
315 	targetRbuf->count += pssPtr->xorBufCount;
316 	pssPtr->xorBufCount = 0;
317 	return (0);
318 }
319 /* removes one full buffer from one of the full-buffer lists and returns it.
320  *
321  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
322  */
323 RF_ReconBuffer_t *
324 rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
325 {
326 	RF_ReconBuffer_t *p;
327 
328 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
329 
330 	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
331 		reconCtrlPtr->fullBufferList = p->next;
332 		p->next = NULL;
333 	}
334 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
335 	return (p);
336 }
337 
338 
339 /* if the reconstruction buffer is full, move it to the full list,
340  * which is maintained sorted by failed disk sector offset
341  *
342  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
343 int
344 rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
345 		    RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
346 {
347 	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
348 
349 	if (rbuf->count == numDataCol) {
350 		raidPtr->numFullReconBuffers++;
351 		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
352 		    (long) rbuf->parityStripeID, rbuf->which_ru);
353 		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
354 			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
355 			    (long) rbuf->parityStripeID, rbuf->which_ru);
356 			rbuf->next = reconCtrl->fullBufferList;
357 			reconCtrl->fullBufferList = rbuf;
358 		} else {
359 			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
360 			rbuf->next = p;
361 			pt->next = rbuf;
362 			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
363 			    (long) rbuf->parityStripeID, rbuf->which_ru);
364 		}
365 		rbuf->pssPtr = pssPtr;
366 		pssPtr->rbuf = NULL;
367 		rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY);
368 	}
369 	return (0);
370 }
371 
372 
373 /* release a floating recon buffer for someone else to use.
374  * assumes the rb_mutex is LOCKED at entry
375  */
376 void
377 rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf)
378 {
379 	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl;
380 	RF_CallbackDesc_t *cb;
381 
382 	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
383 	    (long) rbuf->parityStripeID, rbuf->which_ru);
384 
385 	/* if anyone is waiting on buffers, wake one of them up.  They will
386 	 * subsequently wake up anyone else waiting on their RU */
387 	if (rcPtr->bufferWaitList) {
388 		rbuf->next = rcPtr->committedRbufs;
389 		rcPtr->committedRbufs = rbuf;
390 		cb = rcPtr->bufferWaitList;
391 		rcPtr->bufferWaitList = cb->next;
392 		rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
393 												 * committed a buffer */
394 		rf_FreeCallbackDesc(cb);
395 		raidPtr->procsInBufWait--;
396 	} else {
397 		rbuf->next = rcPtr->floatingRbufs;
398 		rcPtr->floatingRbufs = rbuf;
399 	}
400 }
401