xref: /openbsd-src/sys/dev/softraid_raid1.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /* $OpenBSD: softraid_raid1.c,v 1.8 2009/04/28 02:54:53 marco Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions */
47 int
48 sr_raid1_alloc_resources(struct sr_discipline *sd)
49 {
50 	int			rv = EINVAL;
51 
52 	if (!sd)
53 		return (rv);
54 
55 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n",
56 	    DEVNAME(sd->sd_sc));
57 
58 	if (sr_wu_alloc(sd))
59 		goto bad;
60 	if (sr_ccb_alloc(sd))
61 		goto bad;
62 
63 	rv = 0;
64 bad:
65 	return (rv);
66 }
67 
68 int
69 sr_raid1_free_resources(struct sr_discipline *sd)
70 {
71 	int			rv = EINVAL;
72 
73 	if (!sd)
74 		return (rv);
75 
76 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n",
77 	    DEVNAME(sd->sd_sc));
78 
79 	sr_wu_free(sd);
80 	sr_ccb_free(sd);
81 
82 	rv = 0;
83 	return (rv);
84 }
85 
86 void
87 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
88 {
89 	int			old_state, s;
90 
91 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
92 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
93 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
94 
95 	/* ok to go to splbio since this only happens in error path */
96 	s = splbio();
97 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
98 
99 	/* multiple IOs to the same chunk that fail will come through here */
100 	if (old_state == new_state)
101 		goto done;
102 
103 	switch (old_state) {
104 	case BIOC_SDONLINE:
105 		switch (new_state) {
106 		case BIOC_SDOFFLINE:
107 			break;
108 		case BIOC_SDSCRUB:
109 			break;
110 		default:
111 			goto die;
112 		}
113 		break;
114 
115 	case BIOC_SDOFFLINE:
116 		if (new_state == BIOC_SDREBUILD) {
117 			;
118 		} else
119 			goto die;
120 		break;
121 
122 	case BIOC_SDSCRUB:
123 		if (new_state == BIOC_SDONLINE) {
124 			;
125 		} else
126 			goto die;
127 		break;
128 
129 	case BIOC_SDREBUILD:
130 		if (new_state == BIOC_SDONLINE) {
131 			;
132 		} else
133 			goto die;
134 		break;
135 
136 	case BIOC_SDHOTSPARE:
137 		if (new_state == BIOC_SDREBUILD) {
138 			;
139 		} else
140 			goto die;
141 		break;
142 
143 	default:
144 die:
145 		splx(s); /* XXX */
146 		panic("%s: %s: %s: invalid chunk state transition "
147 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
148 		    sd->sd_meta->ssd_devname,
149 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
150 		    old_state, new_state);
151 		/* NOTREACHED */
152 	}
153 
154 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
155 	sd->sd_set_vol_state(sd);
156 
157 	sd->sd_must_flush = 1;
158 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
159 done:
160 	splx(s);
161 }
162 
163 void
164 sr_raid1_set_vol_state(struct sr_discipline *sd)
165 {
166 	int			states[SR_MAX_STATES];
167 	int			new_state, i, s, nd;
168 	int			old_state = sd->sd_vol_status;
169 
170 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
171 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
172 
173 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
174 
175 	for (i = 0; i < SR_MAX_STATES; i++)
176 		states[i] = 0;
177 
178 	for (i = 0; i < nd; i++) {
179 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
180 		if (s > SR_MAX_STATES)
181 			panic("%s: %s: %s: invalid chunk state",
182 			    DEVNAME(sd->sd_sc),
183 			    sd->sd_meta->ssd_devname,
184 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
185 		states[s]++;
186 	}
187 
188 	if (states[BIOC_SDONLINE] == nd)
189 		new_state = BIOC_SVONLINE;
190 	else if (states[BIOC_SDONLINE] == 0)
191 		new_state = BIOC_SVOFFLINE;
192 	else if (states[BIOC_SDSCRUB] != 0)
193 		new_state = BIOC_SVSCRUB;
194 	else if (states[BIOC_SDREBUILD] != 0)
195 		new_state = BIOC_SVREBUILD;
196 	else if (states[BIOC_SDOFFLINE] != 0)
197 		new_state = BIOC_SVDEGRADED;
198 	else {
199 		printf("old_state = %d, ", old_state);
200 		for (i = 0; i < nd; i++)
201 			printf("%d = %d, ", i,
202 			    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
203 		panic("invalid new_state");
204 	}
205 
206 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
207 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
208 	    old_state, new_state);
209 
210 	switch (old_state) {
211 	case BIOC_SVONLINE:
212 		switch (new_state) {
213 		case BIOC_SVONLINE: /* can go to same state */
214 		case BIOC_SVOFFLINE:
215 		case BIOC_SVDEGRADED:
216 			break;
217 		default:
218 			goto die;
219 		}
220 		break;
221 
222 	case BIOC_SVOFFLINE:
223 		/* XXX this might be a little too much */
224 		goto die;
225 
226 	case BIOC_SVSCRUB:
227 		switch (new_state) {
228 		case BIOC_SVONLINE:
229 		case BIOC_SVOFFLINE:
230 		case BIOC_SVDEGRADED:
231 		case BIOC_SVSCRUB: /* can go to same state */
232 			break;
233 		default:
234 			goto die;
235 		}
236 		break;
237 
238 	case BIOC_SVBUILDING:
239 		switch (new_state) {
240 		case BIOC_SVONLINE:
241 		case BIOC_SVOFFLINE:
242 		case BIOC_SVBUILDING: /* can go to the same state */
243 			break;
244 		default:
245 			goto die;
246 		}
247 		break;
248 
249 	case BIOC_SVREBUILD:
250 		switch (new_state) {
251 		case BIOC_SVONLINE:
252 		case BIOC_SVOFFLINE:
253 		case BIOC_SVREBUILD: /* can go to the same state */
254 			break;
255 		default:
256 			goto die;
257 		}
258 		break;
259 
260 	case BIOC_SVDEGRADED:
261 		switch (new_state) {
262 		case BIOC_SVOFFLINE:
263 		case BIOC_SVREBUILD:
264 		case BIOC_SVDEGRADED: /* can go to the same state */
265 			break;
266 		default:
267 			goto die;
268 		}
269 		break;
270 
271 	default:
272 die:
273 		panic("%s: %s: invalid volume state transition "
274 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
275 		    sd->sd_meta->ssd_devname,
276 		    old_state, new_state);
277 		/* NOTREACHED */
278 	}
279 
280 	sd->sd_vol_status = new_state;
281 }
282 
283 int
284 sr_raid1_rw(struct sr_workunit *wu)
285 {
286 	struct sr_discipline	*sd = wu->swu_dis;
287 	struct scsi_xfer	*xs = wu->swu_xs;
288 	struct sr_ccb		*ccb;
289 	struct sr_chunk		*scp;
290 	int			ios, x, i, s, rt;
291 	daddr64_t		blk;
292 
293 	/* blk and scsi error will be handled by sr_validate_io */
294 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
295 		goto bad;
296 
297 	/* calculate physical block */
298 	blk += SR_META_SIZE + SR_META_OFFSET;
299 
300 	if (xs->flags & SCSI_DATA_IN)
301 		ios = 1;
302 	else
303 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
304 	wu->swu_io_count = ios;
305 
306 	for (i = 0; i < ios; i++) {
307 		ccb = sr_ccb_get(sd);
308 		if (!ccb) {
309 			/* should never happen but handle more gracefully */
310 			printf("%s: %s: too many ccbs queued\n",
311 			    DEVNAME(sd->sd_sc),
312 			    sd->sd_meta->ssd_devname);
313 			goto bad;
314 		}
315 
316 		if (xs->flags & SCSI_POLL) {
317 			ccb->ccb_buf.b_flags = 0;
318 			ccb->ccb_buf.b_iodone = NULL;
319 		} else {
320 			ccb->ccb_buf.b_flags = B_CALL;
321 			ccb->ccb_buf.b_iodone = sr_raid1_intr;
322 		}
323 
324 		ccb->ccb_buf.b_blkno = blk;
325 		ccb->ccb_buf.b_bcount = xs->datalen;
326 		ccb->ccb_buf.b_bufsize = xs->datalen;
327 		ccb->ccb_buf.b_resid = xs->datalen;
328 		ccb->ccb_buf.b_data = xs->data;
329 		ccb->ccb_buf.b_error = 0;
330 		ccb->ccb_buf.b_proc = curproc;
331 		ccb->ccb_wu = wu;
332 
333 		if (xs->flags & SCSI_DATA_IN) {
334 			rt = 0;
335 ragain:
336 			/* interleave reads */
337 			x = sd->mds.mdd_raid1.sr1_counter++ %
338 			    sd->sd_meta->ssdi.ssd_chunk_no;
339 			scp = sd->sd_vol.sv_chunks[x];
340 			switch (scp->src_meta.scm_status) {
341 			case BIOC_SDONLINE:
342 			case BIOC_SDSCRUB:
343 				ccb->ccb_buf.b_flags |= B_READ;
344 				break;
345 
346 			case BIOC_SDOFFLINE:
347 			case BIOC_SDREBUILD:
348 			case BIOC_SDHOTSPARE:
349 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
350 					goto ragain;
351 
352 				/* FALLTHROUGH */
353 			default:
354 				/* volume offline */
355 				printf("%s: is offline, can't read\n",
356 				    DEVNAME(sd->sd_sc));
357 				sr_ccb_put(ccb);
358 				goto bad;
359 			}
360 		} else {
361 			/* writes go on all working disks */
362 			x = i;
363 			scp = sd->sd_vol.sv_chunks[x];
364 			switch (scp->src_meta.scm_status) {
365 			case BIOC_SDONLINE:
366 			case BIOC_SDSCRUB:
367 			case BIOC_SDREBUILD:
368 				ccb->ccb_buf.b_flags |= B_WRITE;
369 				break;
370 
371 			case BIOC_SDHOTSPARE: /* should never happen */
372 			case BIOC_SDOFFLINE:
373 				wu->swu_io_count--;
374 				sr_ccb_put(ccb);
375 				continue;
376 
377 			default:
378 				goto bad;
379 			}
380 
381 		}
382 		ccb->ccb_target = x;
383 		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm;
384 		ccb->ccb_buf.b_vp = NULL;
385 
386 		LIST_INIT(&ccb->ccb_buf.b_dep);
387 
388 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
389 
390 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
391 		    "b_blkno: %x b_flags 0x%0x b_data %p\n",
392 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
393 		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
394 		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
395 	}
396 
397 	s = splbio();
398 
399 	/* current io failed, restart */
400 	if (wu->swu_state == SR_WU_RESTART)
401 		goto start;
402 
403 	/* deferred io failed, don't restart */
404 	if (wu->swu_state == SR_WU_REQUEUE)
405 		goto queued;
406 
407 	if (sr_check_io_collision(wu))
408 		goto queued;
409 
410 start:
411 	sr_raid_startwu(wu);
412 queued:
413 	splx(s);
414 	return (0);
415 bad:
416 	/* wu is unwound by sr_wu_put */
417 	return (1);
418 }
419 
420 void
421 sr_raid1_intr(struct buf *bp)
422 {
423 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
424 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
425 	struct sr_discipline	*sd = wu->swu_dis;
426 	struct scsi_xfer	*xs = wu->swu_xs;
427 	struct sr_softc		*sc = sd->sd_sc;
428 	int			s, pend;
429 
430 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
431 	    DEVNAME(sc), bp, xs);
432 
433 	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
434 	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
435 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
436 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
437 
438 	s = splbio();
439 
440 	if (ccb->ccb_buf.b_flags & B_ERROR) {
441 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
442 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
443 		wu->swu_ios_failed++;
444 		ccb->ccb_state = SR_CCB_FAILED;
445 		if (ccb->ccb_target != -1)
446 			sd->sd_set_chunk_state(sd, ccb->ccb_target,
447 			    BIOC_SDOFFLINE);
448 		else
449 			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
450 	} else {
451 		ccb->ccb_state = SR_CCB_OK;
452 		wu->swu_ios_succeeded++;
453 	}
454 	wu->swu_ios_complete++;
455 
456 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
457 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
458 	    wu->swu_ios_failed);
459 
460 	if (wu->swu_ios_complete >= wu->swu_io_count) {
461 		/* if all ios failed, retry reads and give up on writes */
462 		if (wu->swu_ios_failed == wu->swu_ios_complete) {
463 			if (xs->flags & SCSI_DATA_IN) {
464 				printf("%s: retrying read on block %lld\n",
465 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
466 				sr_ccb_put(ccb);
467 				TAILQ_INIT(&wu->swu_ccb);
468 				wu->swu_state = SR_WU_RESTART;
469 				if (sd->sd_scsi_rw(wu))
470 					goto bad;
471 				else
472 					goto retry;
473 			} else {
474 				printf("%s: permanently fail write on block "
475 				    "%lld\n", DEVNAME(sc),
476 				    ccb->ccb_buf.b_blkno);
477 				xs->error = XS_DRIVER_STUFFUP;
478 				goto bad;
479 			}
480 		}
481 
482 		xs->error = XS_NOERROR;
483 		xs->resid = 0;
484 		xs->flags |= ITSDONE;
485 
486 		pend = 0;
487 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
488 			if (wu == wup) {
489 				/* wu on pendq, remove */
490 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
491 				pend = 1;
492 
493 				if (wu->swu_collider) {
494 					if (wu->swu_ios_failed)
495 						/* toss all ccbs and recreate */
496 						sr_raid1_recreate_wu(wu->swu_collider);
497 
498 					/* restart deferred wu */
499 					wu->swu_collider->swu_state =
500 					    SR_WU_INPROGRESS;
501 					TAILQ_REMOVE(&sd->sd_wu_defq,
502 					    wu->swu_collider, swu_link);
503 					sr_raid_startwu(wu->swu_collider);
504 				}
505 				break;
506 			}
507 		}
508 
509 		if (!pend)
510 			printf("%s: wu: %p not on pending queue\n",
511 			    DEVNAME(sc), wu);
512 
513 		/* do not change the order of these 2 functions */
514 		sr_wu_put(wu);
515 		sr_scsi_done(sd, xs);
516 
517 		if (sd->sd_sync && sd->sd_wu_pending == 0)
518 			wakeup(sd);
519 	}
520 
521 retry:
522 	splx(s);
523 	return;
524 bad:
525 	xs->error = XS_DRIVER_STUFFUP;
526 	xs->flags |= ITSDONE;
527 	sr_wu_put(wu);
528 	sr_scsi_done(sd, xs);
529 	splx(s);
530 }
531 
532 void
533 sr_raid1_recreate_wu(struct sr_workunit *wu)
534 {
535 	struct sr_discipline	*sd = wu->swu_dis;
536 	struct sr_workunit	*wup = wu;
537 	struct sr_ccb		*ccb;
538 
539 	do {
540 		DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup);
541 
542 		/* toss all ccbs */
543 		while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) {
544 			TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link);
545 			sr_ccb_put(ccb);
546 		}
547 		TAILQ_INIT(&wup->swu_ccb);
548 
549 		/* recreate ccbs */
550 		wup->swu_state = SR_WU_REQUEUE;
551 		if (sd->sd_scsi_rw(wup))
552 			panic("could not requeue io");
553 
554 		wup = wup->swu_collider;
555 	} while (wup);
556 }
557