xref: /openbsd-src/sys/dev/softraid_raid1.c (revision d874cce4b1d9fe6b41c9e4f2117a77d8a4a37b92)
1 /* $OpenBSD: softraid_raid1.c,v 1.5 2008/02/05 16:49:25 marco Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions */
47 int
48 sr_raid1_alloc_resources(struct sr_discipline *sd)
49 {
50 	int			rv = EINVAL;
51 
52 	if (!sd)
53 		return (rv);
54 
55 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n",
56 	    DEVNAME(sd->sd_sc));
57 
58 	if (sr_alloc_wu(sd))
59 		goto bad;
60 	if (sr_alloc_ccb(sd))
61 		goto bad;
62 
63 	rv = 0;
64 bad:
65 	return (rv);
66 }
67 
68 int
69 sr_raid1_free_resources(struct sr_discipline *sd)
70 {
71 	int			rv = EINVAL;
72 
73 	if (!sd)
74 		return (rv);
75 
76 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n",
77 	    DEVNAME(sd->sd_sc));
78 
79 	sr_free_wu(sd);
80 	sr_free_ccb(sd);
81 
82 	if (sd->sd_meta)
83 		free(sd->sd_meta, M_DEVBUF);
84 
85 	rv = 0;
86 	return (rv);
87 }
88 
89 void
90 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
91 {
92 	int			old_state, s;
93 
94 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
95 	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
96 	    sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state);
97 
98 	/* ok to go to splbio since this only happens in error path */
99 	s = splbio();
100 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
101 
102 	/* multiple IOs to the same chunk that fail will come through here */
103 	if (old_state == new_state)
104 		goto done;
105 
106 	switch (old_state) {
107 	case BIOC_SDONLINE:
108 		switch (new_state) {
109 		case BIOC_SDOFFLINE:
110 			break;
111 		case BIOC_SDSCRUB:
112 			break;
113 		default:
114 			goto die;
115 		}
116 		break;
117 
118 	case BIOC_SDOFFLINE:
119 		if (new_state == BIOC_SDREBUILD) {
120 			;
121 		} else
122 			goto die;
123 		break;
124 
125 	case BIOC_SDSCRUB:
126 		if (new_state == BIOC_SDONLINE) {
127 			;
128 		} else
129 			goto die;
130 		break;
131 
132 	case BIOC_SDREBUILD:
133 		if (new_state == BIOC_SDONLINE) {
134 			;
135 		} else
136 			goto die;
137 		break;
138 
139 	case BIOC_SDHOTSPARE:
140 		if (new_state == BIOC_SDREBUILD) {
141 			;
142 		} else
143 			goto die;
144 		break;
145 
146 	default:
147 die:
148 		splx(s); /* XXX */
149 		panic("%s: %s: %s: invalid chunk state transition "
150 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
151 		    sd->sd_vol.sv_meta.svm_devname,
152 		    sd->sd_vol.sv_chunks[c]->src_meta.scm_devname,
153 		    old_state, new_state);
154 		/* NOTREACHED */
155 	}
156 
157 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
158 	sd->sd_set_vol_state(sd);
159 
160 	sd->sd_must_flush = 1;
161 	workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL);
162 done:
163 	splx(s);
164 }
165 
166 void
167 sr_raid1_set_vol_state(struct sr_discipline *sd)
168 {
169 	int			states[SR_MAX_STATES];
170 	int			new_state, i, s, nd;
171 	int			old_state = sd->sd_vol.sv_meta.svm_status;
172 
173 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
174 	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
175 
176 	nd = sd->sd_vol.sv_meta.svm_no_chunk;
177 
178 	for (i = 0; i < SR_MAX_STATES; i++)
179 		states[i] = 0;
180 
181 	for (i = 0; i < nd; i++) {
182 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
183 		if (s > SR_MAX_STATES)
184 			panic("%s: %s: %s: invalid chunk state",
185 			    DEVNAME(sd->sd_sc),
186 			    sd->sd_vol.sv_meta.svm_devname,
187 			    sd->sd_vol.sv_chunks[i]->src_meta.scm_devname);
188 		states[s]++;
189 	}
190 
191 	if (states[BIOC_SDONLINE] == nd)
192 		new_state = BIOC_SVONLINE;
193 	else if (states[BIOC_SDONLINE] == 0)
194 		new_state = BIOC_SVOFFLINE;
195 	else if (states[BIOC_SDSCRUB] != 0)
196 		new_state = BIOC_SVSCRUB;
197 	else if (states[BIOC_SDREBUILD] != 0)
198 		new_state = BIOC_SVREBUILD;
199 	else if (states[BIOC_SDOFFLINE] != 0)
200 		new_state = BIOC_SVDEGRADED;
201 	else {
202 		printf("old_state = %d, ", old_state);
203 		for (i = 0; i < nd; i++)
204 			printf("%d = %d, ", i,
205 			    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
206 		panic("invalid new_state");
207 	}
208 
209 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
210 	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
211 	    old_state, new_state);
212 
213 	switch (old_state) {
214 	case BIOC_SVONLINE:
215 		switch (new_state) {
216 		case BIOC_SVOFFLINE:
217 		case BIOC_SVDEGRADED:
218 			break;
219 		default:
220 			goto die;
221 		}
222 		break;
223 
224 	case BIOC_SVOFFLINE:
225 		/* XXX this might be a little too much */
226 		goto die;
227 
228 	case BIOC_SVSCRUB:
229 		switch (new_state) {
230 		case BIOC_SVONLINE:
231 		case BIOC_SVOFFLINE:
232 		case BIOC_SVDEGRADED:
233 		case BIOC_SVSCRUB: /* can go to same state */
234 			break;
235 		default:
236 			goto die;
237 		}
238 		break;
239 
240 	case BIOC_SVBUILDING:
241 		switch (new_state) {
242 		case BIOC_SVONLINE:
243 		case BIOC_SVOFFLINE:
244 		case BIOC_SVBUILDING: /* can go to the same state */
245 			break;
246 		default:
247 			goto die;
248 		}
249 		break;
250 
251 	case BIOC_SVREBUILD:
252 		switch (new_state) {
253 		case BIOC_SVONLINE:
254 		case BIOC_SVOFFLINE:
255 		case BIOC_SVREBUILD: /* can go to the same state */
256 			break;
257 		default:
258 			goto die;
259 		}
260 		break;
261 
262 	case BIOC_SVDEGRADED:
263 		switch (new_state) {
264 		case BIOC_SVOFFLINE:
265 		case BIOC_SVREBUILD:
266 		case BIOC_SVDEGRADED: /* can go to the same state */
267 			break;
268 		default:
269 			goto die;
270 		}
271 		break;
272 
273 	default:
274 die:
275 		panic("%s: %s: invalid volume state transition "
276 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
277 		    sd->sd_vol.sv_meta.svm_devname,
278 		    old_state, new_state);
279 		/* NOTREACHED */
280 	}
281 
282 	sd->sd_vol.sv_meta.svm_status = new_state;
283 }
284 
285 int
286 sr_raid1_rw(struct sr_workunit *wu)
287 {
288 	struct sr_discipline	*sd = wu->swu_dis;
289 	struct scsi_xfer	*xs = wu->swu_xs;
290 	struct sr_ccb		*ccb;
291 	struct sr_chunk		*scp;
292 	int			ios, x, i, s, rt;
293 	daddr64_t		blk;
294 
295 	/* blk and scsi error will be handled by sr_validate_io */
296 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
297 		goto bad;
298 
299 	/* calculate physical block */
300 	blk += SR_META_SIZE + SR_META_OFFSET;
301 
302 	if (xs->flags & SCSI_DATA_IN)
303 		ios = 1;
304 	else
305 		ios = sd->sd_vol.sv_meta.svm_no_chunk;
306 	wu->swu_io_count = ios;
307 
308 	for (i = 0; i < ios; i++) {
309 		ccb = sr_get_ccb(sd);
310 		if (!ccb) {
311 			/* should never happen but handle more gracefully */
312 			printf("%s: %s: too many ccbs queued\n",
313 			    DEVNAME(sd->sd_sc),
314 			    sd->sd_vol.sv_meta.svm_devname);
315 			goto bad;
316 		}
317 
318 		if (xs->flags & SCSI_POLL) {
319 			ccb->ccb_buf.b_flags = 0;
320 			ccb->ccb_buf.b_iodone = NULL;
321 		} else {
322 			ccb->ccb_buf.b_flags = B_CALL;
323 			ccb->ccb_buf.b_iodone = sr_raid1_intr;
324 		}
325 
326 		ccb->ccb_buf.b_blkno = blk;
327 		ccb->ccb_buf.b_bcount = xs->datalen;
328 		ccb->ccb_buf.b_bufsize = xs->datalen;
329 		ccb->ccb_buf.b_resid = xs->datalen;
330 		ccb->ccb_buf.b_data = xs->data;
331 		ccb->ccb_buf.b_error = 0;
332 		ccb->ccb_buf.b_proc = curproc;
333 		ccb->ccb_wu = wu;
334 
335 		if (xs->flags & SCSI_DATA_IN) {
336 			rt = 0;
337 ragain:
338 			/* interleave reads */
339 			x = sd->mds.mdd_raid1.sr1_counter++ %
340 			    sd->sd_vol.sv_meta.svm_no_chunk;
341 			scp = sd->sd_vol.sv_chunks[x];
342 			switch (scp->src_meta.scm_status) {
343 			case BIOC_SDONLINE:
344 			case BIOC_SDSCRUB:
345 				ccb->ccb_buf.b_flags |= B_READ;
346 				break;
347 
348 			case BIOC_SDOFFLINE:
349 			case BIOC_SDREBUILD:
350 			case BIOC_SDHOTSPARE:
351 				if (rt++ < sd->sd_vol.sv_meta.svm_no_chunk)
352 					goto ragain;
353 
354 				/* FALLTHROUGH */
355 			default:
356 				/* volume offline */
357 				printf("%s: is offline, can't read\n",
358 				    DEVNAME(sd->sd_sc));
359 				sr_put_ccb(ccb);
360 				goto bad;
361 			}
362 		} else {
363 			/* writes go on all working disks */
364 			x = i;
365 			scp = sd->sd_vol.sv_chunks[x];
366 			switch (scp->src_meta.scm_status) {
367 			case BIOC_SDONLINE:
368 			case BIOC_SDSCRUB:
369 			case BIOC_SDREBUILD:
370 				ccb->ccb_buf.b_flags |= B_WRITE;
371 				break;
372 
373 			case BIOC_SDHOTSPARE: /* should never happen */
374 			case BIOC_SDOFFLINE:
375 				wu->swu_io_count--;
376 				sr_put_ccb(ccb);
377 				continue;
378 
379 			default:
380 				goto bad;
381 			}
382 
383 		}
384 		ccb->ccb_target = x;
385 		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm;
386 		ccb->ccb_buf.b_vp = NULL;
387 
388 		LIST_INIT(&ccb->ccb_buf.b_dep);
389 
390 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
391 
392 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
393 		    "b_blkno: %x b_flags 0x%0x b_data %p\n",
394 		    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
395 		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
396 		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
397 	}
398 
399 	s = splbio();
400 
401 	/* current io failed, restart */
402 	if (wu->swu_state == SR_WU_RESTART)
403 		goto start;
404 
405 	/* deferred io failed, don't restart */
406 	if (wu->swu_state == SR_WU_REQUEUE)
407 		goto queued;
408 
409 	if (sr_check_io_collision(wu))
410 		goto queued;
411 
412 start:
413 	sr_raid_startwu(wu);
414 queued:
415 	splx(s);
416 	return (0);
417 bad:
418 	/* wu is unwound by sr_put_wu */
419 	return (1);
420 }
421 
422 void
423 sr_raid1_intr(struct buf *bp)
424 {
425 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
426 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
427 	struct sr_discipline	*sd = wu->swu_dis;
428 	struct scsi_xfer	*xs = wu->swu_xs;
429 	struct sr_softc		*sc = sd->sd_sc;
430 	int			s, pend;
431 
432 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
433 	    DEVNAME(sc), bp, xs);
434 
435 	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
436 	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
437 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
438 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
439 
440 	s = splbio();
441 
442 	if (ccb->ccb_buf.b_flags & B_ERROR) {
443 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
444 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
445 		wu->swu_ios_failed++;
446 		ccb->ccb_state = SR_CCB_FAILED;
447 		if (ccb->ccb_target != -1)
448 			sd->sd_set_chunk_state(sd, ccb->ccb_target,
449 			    BIOC_SDOFFLINE);
450 		else
451 			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
452 	} else {
453 		ccb->ccb_state = SR_CCB_OK;
454 		wu->swu_ios_succeeded++;
455 	}
456 	wu->swu_ios_complete++;
457 
458 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
459 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
460 	    wu->swu_ios_failed);
461 
462 	if (wu->swu_ios_complete >= wu->swu_io_count) {
463 		/* if all ios failed, retry reads and give up on writes */
464 		if (wu->swu_ios_failed == wu->swu_ios_complete) {
465 			if (xs->flags & SCSI_DATA_IN) {
466 				printf("%s: retrying read on block %lld\n",
467 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
468 				sr_put_ccb(ccb);
469 				TAILQ_INIT(&wu->swu_ccb);
470 				wu->swu_state = SR_WU_RESTART;
471 				if (sd->sd_scsi_rw(wu))
472 					goto bad;
473 				else
474 					goto retry;
475 			} else {
476 				printf("%s: permanently fail write on block "
477 				    "%lld\n", DEVNAME(sc),
478 				    ccb->ccb_buf.b_blkno);
479 				xs->error = XS_DRIVER_STUFFUP;
480 				goto bad;
481 			}
482 		}
483 
484 		xs->error = XS_NOERROR;
485 		xs->resid = 0;
486 		xs->flags |= ITSDONE;
487 
488 		pend = 0;
489 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
490 			if (wu == wup) {
491 				/* wu on pendq, remove */
492 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
493 				pend = 1;
494 
495 				if (wu->swu_collider) {
496 					if (wu->swu_ios_failed)
497 						/* toss all ccbs and recreate */
498 						sr_raid1_recreate_wu(wu->swu_collider);
499 
500 					/* restart deferred wu */
501 					wu->swu_collider->swu_state =
502 					    SR_WU_INPROGRESS;
503 					TAILQ_REMOVE(&sd->sd_wu_defq,
504 					    wu->swu_collider, swu_link);
505 					sr_raid_startwu(wu->swu_collider);
506 				}
507 				break;
508 			}
509 		}
510 
511 		if (!pend)
512 			printf("%s: wu: %p not on pending queue\n",
513 			    DEVNAME(sc), wu);
514 
515 		/* do not change the order of these 2 functions */
516 		sr_put_wu(wu);
517 		scsi_done(xs);
518 
519 		if (sd->sd_sync && sd->sd_wu_pending == 0)
520 			wakeup(sd);
521 	}
522 
523 retry:
524 	splx(s);
525 	return;
526 bad:
527 	xs->error = XS_DRIVER_STUFFUP;
528 	xs->flags |= ITSDONE;
529 	sr_put_wu(wu);
530 	scsi_done(xs);
531 	splx(s);
532 }
533 
534 void
535 sr_raid1_recreate_wu(struct sr_workunit *wu)
536 {
537 	struct sr_discipline	*sd = wu->swu_dis;
538 	struct sr_workunit	*wup = wu;
539 	struct sr_ccb		*ccb;
540 
541 	do {
542 		DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup);
543 
544 		/* toss all ccbs */
545 		while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) {
546 			TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link);
547 			sr_put_ccb(ccb);
548 		}
549 		TAILQ_INIT(&wup->swu_ccb);
550 
551 		/* recreate ccbs */
552 		wup->swu_state = SR_WU_REQUEUE;
553 		if (sd->sd_scsi_rw(wup))
554 			panic("could not requeue io");
555 
556 		wup = wup->swu_collider;
557 	} while (wup);
558 }
559