xref: /openbsd-src/sys/dev/softraid_raid1.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /* $OpenBSD: softraid_raid1.c,v 1.7 2008/11/25 23:05:17 marco Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions */
47 int
48 sr_raid1_alloc_resources(struct sr_discipline *sd)
49 {
50 	int			rv = EINVAL;
51 
52 	if (!sd)
53 		return (rv);
54 
55 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n",
56 	    DEVNAME(sd->sd_sc));
57 
58 	if (sr_wu_alloc(sd))
59 		goto bad;
60 	if (sr_ccb_alloc(sd))
61 		goto bad;
62 
63 	rv = 0;
64 bad:
65 	return (rv);
66 }
67 
68 int
69 sr_raid1_free_resources(struct sr_discipline *sd)
70 {
71 	int			rv = EINVAL;
72 
73 	if (!sd)
74 		return (rv);
75 
76 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n",
77 	    DEVNAME(sd->sd_sc));
78 
79 	sr_wu_free(sd);
80 	sr_ccb_free(sd);
81 
82 	rv = 0;
83 	return (rv);
84 }
85 
86 void
87 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
88 {
89 	int			old_state, s;
90 
91 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
92 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
93 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
94 
95 	/* ok to go to splbio since this only happens in error path */
96 	s = splbio();
97 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
98 
99 	/* multiple IOs to the same chunk that fail will come through here */
100 	if (old_state == new_state)
101 		goto done;
102 
103 	switch (old_state) {
104 	case BIOC_SDONLINE:
105 		switch (new_state) {
106 		case BIOC_SDOFFLINE:
107 			break;
108 		case BIOC_SDSCRUB:
109 			break;
110 		default:
111 			goto die;
112 		}
113 		break;
114 
115 	case BIOC_SDOFFLINE:
116 		if (new_state == BIOC_SDREBUILD) {
117 			;
118 		} else
119 			goto die;
120 		break;
121 
122 	case BIOC_SDSCRUB:
123 		if (new_state == BIOC_SDONLINE) {
124 			;
125 		} else
126 			goto die;
127 		break;
128 
129 	case BIOC_SDREBUILD:
130 		if (new_state == BIOC_SDONLINE) {
131 			;
132 		} else
133 			goto die;
134 		break;
135 
136 	case BIOC_SDHOTSPARE:
137 		if (new_state == BIOC_SDREBUILD) {
138 			;
139 		} else
140 			goto die;
141 		break;
142 
143 	default:
144 die:
145 		splx(s); /* XXX */
146 		panic("%s: %s: %s: invalid chunk state transition "
147 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
148 		    sd->sd_meta->ssd_devname,
149 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
150 		    old_state, new_state);
151 		/* NOTREACHED */
152 	}
153 
154 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
155 	sd->sd_set_vol_state(sd);
156 
157 	sd->sd_must_flush = 1;
158 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
159 done:
160 	splx(s);
161 }
162 
163 void
164 sr_raid1_set_vol_state(struct sr_discipline *sd)
165 {
166 	int			states[SR_MAX_STATES];
167 	int			new_state, i, s, nd;
168 	int			old_state = sd->sd_vol_status;
169 
170 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
171 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
172 
173 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
174 
175 	for (i = 0; i < SR_MAX_STATES; i++)
176 		states[i] = 0;
177 
178 	for (i = 0; i < nd; i++) {
179 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
180 		if (s > SR_MAX_STATES)
181 			panic("%s: %s: %s: invalid chunk state",
182 			    DEVNAME(sd->sd_sc),
183 			    sd->sd_meta->ssd_devname,
184 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
185 		states[s]++;
186 	}
187 
188 	if (states[BIOC_SDONLINE] == nd)
189 		new_state = BIOC_SVONLINE;
190 	else if (states[BIOC_SDONLINE] == 0)
191 		new_state = BIOC_SVOFFLINE;
192 	else if (states[BIOC_SDSCRUB] != 0)
193 		new_state = BIOC_SVSCRUB;
194 	else if (states[BIOC_SDREBUILD] != 0)
195 		new_state = BIOC_SVREBUILD;
196 	else if (states[BIOC_SDOFFLINE] != 0)
197 		new_state = BIOC_SVDEGRADED;
198 	else {
199 		printf("old_state = %d, ", old_state);
200 		for (i = 0; i < nd; i++)
201 			printf("%d = %d, ", i,
202 			    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
203 		panic("invalid new_state");
204 	}
205 
206 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
207 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
208 	    old_state, new_state);
209 
210 	switch (old_state) {
211 	case BIOC_SVONLINE:
212 		switch (new_state) {
213 		case BIOC_SVOFFLINE:
214 		case BIOC_SVDEGRADED:
215 			break;
216 		default:
217 			goto die;
218 		}
219 		break;
220 
221 	case BIOC_SVOFFLINE:
222 		/* XXX this might be a little too much */
223 		goto die;
224 
225 	case BIOC_SVSCRUB:
226 		switch (new_state) {
227 		case BIOC_SVONLINE:
228 		case BIOC_SVOFFLINE:
229 		case BIOC_SVDEGRADED:
230 		case BIOC_SVSCRUB: /* can go to same state */
231 			break;
232 		default:
233 			goto die;
234 		}
235 		break;
236 
237 	case BIOC_SVBUILDING:
238 		switch (new_state) {
239 		case BIOC_SVONLINE:
240 		case BIOC_SVOFFLINE:
241 		case BIOC_SVBUILDING: /* can go to the same state */
242 			break;
243 		default:
244 			goto die;
245 		}
246 		break;
247 
248 	case BIOC_SVREBUILD:
249 		switch (new_state) {
250 		case BIOC_SVONLINE:
251 		case BIOC_SVOFFLINE:
252 		case BIOC_SVREBUILD: /* can go to the same state */
253 			break;
254 		default:
255 			goto die;
256 		}
257 		break;
258 
259 	case BIOC_SVDEGRADED:
260 		switch (new_state) {
261 		case BIOC_SVOFFLINE:
262 		case BIOC_SVREBUILD:
263 		case BIOC_SVDEGRADED: /* can go to the same state */
264 			break;
265 		default:
266 			goto die;
267 		}
268 		break;
269 
270 	default:
271 die:
272 		panic("%s: %s: invalid volume state transition "
273 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
274 		    sd->sd_meta->ssd_devname,
275 		    old_state, new_state);
276 		/* NOTREACHED */
277 	}
278 
279 	sd->sd_vol_status = new_state;
280 }
281 
282 int
283 sr_raid1_rw(struct sr_workunit *wu)
284 {
285 	struct sr_discipline	*sd = wu->swu_dis;
286 	struct scsi_xfer	*xs = wu->swu_xs;
287 	struct sr_ccb		*ccb;
288 	struct sr_chunk		*scp;
289 	int			ios, x, i, s, rt;
290 	daddr64_t		blk;
291 
292 	/* blk and scsi error will be handled by sr_validate_io */
293 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
294 		goto bad;
295 
296 	/* calculate physical block */
297 	blk += SR_META_SIZE + SR_META_OFFSET;
298 
299 	if (xs->flags & SCSI_DATA_IN)
300 		ios = 1;
301 	else
302 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
303 	wu->swu_io_count = ios;
304 
305 	for (i = 0; i < ios; i++) {
306 		ccb = sr_ccb_get(sd);
307 		if (!ccb) {
308 			/* should never happen but handle more gracefully */
309 			printf("%s: %s: too many ccbs queued\n",
310 			    DEVNAME(sd->sd_sc),
311 			    sd->sd_meta->ssd_devname);
312 			goto bad;
313 		}
314 
315 		if (xs->flags & SCSI_POLL) {
316 			ccb->ccb_buf.b_flags = 0;
317 			ccb->ccb_buf.b_iodone = NULL;
318 		} else {
319 			ccb->ccb_buf.b_flags = B_CALL;
320 			ccb->ccb_buf.b_iodone = sr_raid1_intr;
321 		}
322 
323 		ccb->ccb_buf.b_blkno = blk;
324 		ccb->ccb_buf.b_bcount = xs->datalen;
325 		ccb->ccb_buf.b_bufsize = xs->datalen;
326 		ccb->ccb_buf.b_resid = xs->datalen;
327 		ccb->ccb_buf.b_data = xs->data;
328 		ccb->ccb_buf.b_error = 0;
329 		ccb->ccb_buf.b_proc = curproc;
330 		ccb->ccb_wu = wu;
331 
332 		if (xs->flags & SCSI_DATA_IN) {
333 			rt = 0;
334 ragain:
335 			/* interleave reads */
336 			x = sd->mds.mdd_raid1.sr1_counter++ %
337 			    sd->sd_meta->ssdi.ssd_chunk_no;
338 			scp = sd->sd_vol.sv_chunks[x];
339 			switch (scp->src_meta.scm_status) {
340 			case BIOC_SDONLINE:
341 			case BIOC_SDSCRUB:
342 				ccb->ccb_buf.b_flags |= B_READ;
343 				break;
344 
345 			case BIOC_SDOFFLINE:
346 			case BIOC_SDREBUILD:
347 			case BIOC_SDHOTSPARE:
348 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
349 					goto ragain;
350 
351 				/* FALLTHROUGH */
352 			default:
353 				/* volume offline */
354 				printf("%s: is offline, can't read\n",
355 				    DEVNAME(sd->sd_sc));
356 				sr_ccb_put(ccb);
357 				goto bad;
358 			}
359 		} else {
360 			/* writes go on all working disks */
361 			x = i;
362 			scp = sd->sd_vol.sv_chunks[x];
363 			switch (scp->src_meta.scm_status) {
364 			case BIOC_SDONLINE:
365 			case BIOC_SDSCRUB:
366 			case BIOC_SDREBUILD:
367 				ccb->ccb_buf.b_flags |= B_WRITE;
368 				break;
369 
370 			case BIOC_SDHOTSPARE: /* should never happen */
371 			case BIOC_SDOFFLINE:
372 				wu->swu_io_count--;
373 				sr_ccb_put(ccb);
374 				continue;
375 
376 			default:
377 				goto bad;
378 			}
379 
380 		}
381 		ccb->ccb_target = x;
382 		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm;
383 		ccb->ccb_buf.b_vp = NULL;
384 
385 		LIST_INIT(&ccb->ccb_buf.b_dep);
386 
387 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
388 
389 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
390 		    "b_blkno: %x b_flags 0x%0x b_data %p\n",
391 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
392 		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
393 		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
394 	}
395 
396 	s = splbio();
397 
398 	/* current io failed, restart */
399 	if (wu->swu_state == SR_WU_RESTART)
400 		goto start;
401 
402 	/* deferred io failed, don't restart */
403 	if (wu->swu_state == SR_WU_REQUEUE)
404 		goto queued;
405 
406 	if (sr_check_io_collision(wu))
407 		goto queued;
408 
409 start:
410 	sr_raid_startwu(wu);
411 queued:
412 	splx(s);
413 	return (0);
414 bad:
415 	/* wu is unwound by sr_wu_put */
416 	return (1);
417 }
418 
419 void
420 sr_raid1_intr(struct buf *bp)
421 {
422 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
423 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
424 	struct sr_discipline	*sd = wu->swu_dis;
425 	struct scsi_xfer	*xs = wu->swu_xs;
426 	struct sr_softc		*sc = sd->sd_sc;
427 	int			s, pend;
428 
429 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
430 	    DEVNAME(sc), bp, xs);
431 
432 	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
433 	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
434 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
435 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
436 
437 	s = splbio();
438 
439 	if (ccb->ccb_buf.b_flags & B_ERROR) {
440 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
441 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
442 		wu->swu_ios_failed++;
443 		ccb->ccb_state = SR_CCB_FAILED;
444 		if (ccb->ccb_target != -1)
445 			sd->sd_set_chunk_state(sd, ccb->ccb_target,
446 			    BIOC_SDOFFLINE);
447 		else
448 			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
449 	} else {
450 		ccb->ccb_state = SR_CCB_OK;
451 		wu->swu_ios_succeeded++;
452 	}
453 	wu->swu_ios_complete++;
454 
455 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
456 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
457 	    wu->swu_ios_failed);
458 
459 	if (wu->swu_ios_complete >= wu->swu_io_count) {
460 		/* if all ios failed, retry reads and give up on writes */
461 		if (wu->swu_ios_failed == wu->swu_ios_complete) {
462 			if (xs->flags & SCSI_DATA_IN) {
463 				printf("%s: retrying read on block %lld\n",
464 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
465 				sr_ccb_put(ccb);
466 				TAILQ_INIT(&wu->swu_ccb);
467 				wu->swu_state = SR_WU_RESTART;
468 				if (sd->sd_scsi_rw(wu))
469 					goto bad;
470 				else
471 					goto retry;
472 			} else {
473 				printf("%s: permanently fail write on block "
474 				    "%lld\n", DEVNAME(sc),
475 				    ccb->ccb_buf.b_blkno);
476 				xs->error = XS_DRIVER_STUFFUP;
477 				goto bad;
478 			}
479 		}
480 
481 		xs->error = XS_NOERROR;
482 		xs->resid = 0;
483 		xs->flags |= ITSDONE;
484 
485 		pend = 0;
486 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
487 			if (wu == wup) {
488 				/* wu on pendq, remove */
489 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
490 				pend = 1;
491 
492 				if (wu->swu_collider) {
493 					if (wu->swu_ios_failed)
494 						/* toss all ccbs and recreate */
495 						sr_raid1_recreate_wu(wu->swu_collider);
496 
497 					/* restart deferred wu */
498 					wu->swu_collider->swu_state =
499 					    SR_WU_INPROGRESS;
500 					TAILQ_REMOVE(&sd->sd_wu_defq,
501 					    wu->swu_collider, swu_link);
502 					sr_raid_startwu(wu->swu_collider);
503 				}
504 				break;
505 			}
506 		}
507 
508 		if (!pend)
509 			printf("%s: wu: %p not on pending queue\n",
510 			    DEVNAME(sc), wu);
511 
512 		/* do not change the order of these 2 functions */
513 		sr_wu_put(wu);
514 		sr_scsi_done(sd, xs);
515 
516 		if (sd->sd_sync && sd->sd_wu_pending == 0)
517 			wakeup(sd);
518 	}
519 
520 retry:
521 	splx(s);
522 	return;
523 bad:
524 	xs->error = XS_DRIVER_STUFFUP;
525 	xs->flags |= ITSDONE;
526 	sr_wu_put(wu);
527 	sr_scsi_done(sd, xs);
528 	splx(s);
529 }
530 
531 void
532 sr_raid1_recreate_wu(struct sr_workunit *wu)
533 {
534 	struct sr_discipline	*sd = wu->swu_dis;
535 	struct sr_workunit	*wup = wu;
536 	struct sr_ccb		*ccb;
537 
538 	do {
539 		DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup);
540 
541 		/* toss all ccbs */
542 		while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) {
543 			TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link);
544 			sr_ccb_put(ccb);
545 		}
546 		TAILQ_INIT(&wup->swu_ccb);
547 
548 		/* recreate ccbs */
549 		wup->swu_state = SR_WU_REQUEUE;
550 		if (sd->sd_scsi_rw(wup))
551 			panic("could not requeue io");
552 
553 		wup = wup->swu_collider;
554 	} while (wup);
555 }
556