xref: /openbsd-src/sys/dev/softraid_raid1.c (revision 48950c12d106c85f315112191a0228d7b83b9510)
1 /* $OpenBSD: softraid_raid1.c,v 1.42 2013/03/25 16:01:49 jsing Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions. */
47 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
48 	    int, int64_t);
49 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
50 	    int, void *);
51 int	sr_raid1_alloc_resources(struct sr_discipline *);
52 int	sr_raid1_free_resources(struct sr_discipline *);
53 int	sr_raid1_rw(struct sr_workunit *);
54 void	sr_raid1_intr(struct buf *);
55 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
56 void	sr_raid1_set_vol_state(struct sr_discipline *);
57 
58 /* Discipline initialisation. */
59 void
60 sr_raid1_discipline_init(struct sr_discipline *sd)
61 {
62 	/* Fill out discipline members. */
63 	sd->sd_type = SR_MD_RAID1;
64 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
65 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
66 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
67 	sd->sd_max_wu = SR_RAID1_NOWU;
68 
69 	/* Setup discipline specific function pointers. */
70 	sd->sd_alloc_resources = sr_raid1_alloc_resources;
71 	sd->sd_assemble = sr_raid1_assemble;
72 	sd->sd_create = sr_raid1_create;
73 	sd->sd_free_resources = sr_raid1_free_resources;
74 	sd->sd_scsi_rw = sr_raid1_rw;
75 	sd->sd_scsi_intr = sr_raid1_intr;
76 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
77 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
78 }
79 
80 int
81 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
82     int no_chunk, int64_t coerced_size)
83 {
84 
85 	if (no_chunk < 2) {
86 		sr_error(sd->sd_sc, "RAID 1 requires two or more chunks");
87 		return EINVAL;
88 	}
89 
90 	sd->sd_meta->ssdi.ssd_size = coerced_size;
91 
92 	sd->sd_max_ccb_per_wu = no_chunk;
93 
94 	return 0;
95 }
96 
97 int
98 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
99     int no_chunk, void *data)
100 {
101 
102 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
103 
104 	return 0;
105 }
106 
107 int
108 sr_raid1_alloc_resources(struct sr_discipline *sd)
109 {
110 	int			rv = EINVAL;
111 
112 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n",
113 	    DEVNAME(sd->sd_sc));
114 
115 	if (sr_wu_alloc(sd))
116 		goto bad;
117 	if (sr_ccb_alloc(sd))
118 		goto bad;
119 
120 	rv = 0;
121 bad:
122 	return (rv);
123 }
124 
125 int
126 sr_raid1_free_resources(struct sr_discipline *sd)
127 {
128 	int			rv = EINVAL;
129 
130 	DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n",
131 	    DEVNAME(sd->sd_sc));
132 
133 	sr_wu_free(sd);
134 	sr_ccb_free(sd);
135 
136 	rv = 0;
137 	return (rv);
138 }
139 
140 void
141 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
142 {
143 	int			old_state, s;
144 
145 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
146 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
147 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
148 
149 	/* ok to go to splbio since this only happens in error path */
150 	s = splbio();
151 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
152 
153 	/* multiple IOs to the same chunk that fail will come through here */
154 	if (old_state == new_state)
155 		goto done;
156 
157 	switch (old_state) {
158 	case BIOC_SDONLINE:
159 		switch (new_state) {
160 		case BIOC_SDOFFLINE:
161 		case BIOC_SDSCRUB:
162 			break;
163 		default:
164 			goto die;
165 		}
166 		break;
167 
168 	case BIOC_SDOFFLINE:
169 		switch (new_state) {
170 		case BIOC_SDREBUILD:
171 		case BIOC_SDHOTSPARE:
172 			break;
173 		default:
174 			goto die;
175 		}
176 		break;
177 
178 	case BIOC_SDSCRUB:
179 		if (new_state == BIOC_SDONLINE) {
180 			;
181 		} else
182 			goto die;
183 		break;
184 
185 	case BIOC_SDREBUILD:
186 		switch (new_state) {
187 		case BIOC_SDONLINE:
188 			break;
189 		case BIOC_SDOFFLINE:
190 			/* Abort rebuild since the rebuild chunk disappeared. */
191 			sd->sd_reb_abort = 1;
192 			break;
193 		default:
194 			goto die;
195 		}
196 		break;
197 
198 	case BIOC_SDHOTSPARE:
199 		switch (new_state) {
200 		case BIOC_SDOFFLINE:
201 		case BIOC_SDREBUILD:
202 			break;
203 		default:
204 			goto die;
205 		}
206 		break;
207 
208 	default:
209 die:
210 		splx(s); /* XXX */
211 		panic("%s: %s: %s: invalid chunk state transition "
212 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
213 		    sd->sd_meta->ssd_devname,
214 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
215 		    old_state, new_state);
216 		/* NOTREACHED */
217 	}
218 
219 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
220 	sd->sd_set_vol_state(sd);
221 
222 	sd->sd_must_flush = 1;
223 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
224 done:
225 	splx(s);
226 }
227 
228 void
229 sr_raid1_set_vol_state(struct sr_discipline *sd)
230 {
231 	int			states[SR_MAX_STATES];
232 	int			new_state, i, s, nd;
233 	int			old_state = sd->sd_vol_status;
234 
235 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
236 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
237 
238 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
239 
240 #ifdef SR_DEBUG
241 	for (i = 0; i < nd; i++)
242 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
243 		    DEVNAME(sd->sd_sc), i,
244 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
245 #endif
246 
247 	for (i = 0; i < SR_MAX_STATES; i++)
248 		states[i] = 0;
249 
250 	for (i = 0; i < nd; i++) {
251 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
252 		if (s >= SR_MAX_STATES)
253 			panic("%s: %s: %s: invalid chunk state",
254 			    DEVNAME(sd->sd_sc),
255 			    sd->sd_meta->ssd_devname,
256 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
257 		states[s]++;
258 	}
259 
260 	if (states[BIOC_SDONLINE] == nd)
261 		new_state = BIOC_SVONLINE;
262 	else if (states[BIOC_SDONLINE] == 0)
263 		new_state = BIOC_SVOFFLINE;
264 	else if (states[BIOC_SDSCRUB] != 0)
265 		new_state = BIOC_SVSCRUB;
266 	else if (states[BIOC_SDREBUILD] != 0)
267 		new_state = BIOC_SVREBUILD;
268 	else if (states[BIOC_SDOFFLINE] != 0)
269 		new_state = BIOC_SVDEGRADED;
270 	else {
271 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
272 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
273 		panic("invalid volume state");
274 	}
275 
276 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
277 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
278 	    old_state, new_state);
279 
280 	switch (old_state) {
281 	case BIOC_SVONLINE:
282 		switch (new_state) {
283 		case BIOC_SVONLINE: /* can go to same state */
284 		case BIOC_SVOFFLINE:
285 		case BIOC_SVDEGRADED:
286 		case BIOC_SVREBUILD: /* happens on boot */
287 			break;
288 		default:
289 			goto die;
290 		}
291 		break;
292 
293 	case BIOC_SVOFFLINE:
294 		/* XXX this might be a little too much */
295 		goto die;
296 
297 	case BIOC_SVSCRUB:
298 		switch (new_state) {
299 		case BIOC_SVONLINE:
300 		case BIOC_SVOFFLINE:
301 		case BIOC_SVDEGRADED:
302 		case BIOC_SVSCRUB: /* can go to same state */
303 			break;
304 		default:
305 			goto die;
306 		}
307 		break;
308 
309 	case BIOC_SVBUILDING:
310 		switch (new_state) {
311 		case BIOC_SVONLINE:
312 		case BIOC_SVOFFLINE:
313 		case BIOC_SVBUILDING: /* can go to the same state */
314 			break;
315 		default:
316 			goto die;
317 		}
318 		break;
319 
320 	case BIOC_SVREBUILD:
321 		switch (new_state) {
322 		case BIOC_SVONLINE:
323 		case BIOC_SVOFFLINE:
324 		case BIOC_SVDEGRADED:
325 		case BIOC_SVREBUILD: /* can go to the same state */
326 			break;
327 		default:
328 			goto die;
329 		}
330 		break;
331 
332 	case BIOC_SVDEGRADED:
333 		switch (new_state) {
334 		case BIOC_SVOFFLINE:
335 		case BIOC_SVREBUILD:
336 		case BIOC_SVDEGRADED: /* can go to the same state */
337 			break;
338 		default:
339 			goto die;
340 		}
341 		break;
342 
343 	default:
344 die:
345 		panic("%s: %s: invalid volume state transition "
346 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
347 		    sd->sd_meta->ssd_devname,
348 		    old_state, new_state);
349 		/* NOTREACHED */
350 	}
351 
352 	sd->sd_vol_status = new_state;
353 
354 	/* If we have just become degraded, look for a hotspare. */
355 	if (new_state == BIOC_SVDEGRADED)
356 		workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL);
357 }
358 
359 int
360 sr_raid1_rw(struct sr_workunit *wu)
361 {
362 	struct sr_discipline	*sd = wu->swu_dis;
363 	struct scsi_xfer	*xs = wu->swu_xs;
364 	struct sr_ccb		*ccb;
365 	struct sr_chunk		*scp;
366 	int			ios, chunk, i, s, rt;
367 	daddr64_t		blk;
368 
369 	/* blk and scsi error will be handled by sr_validate_io */
370 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
371 		goto bad;
372 
373 	/* calculate physical block */
374 	blk += sd->sd_meta->ssd_data_offset;
375 
376 	if (xs->flags & SCSI_DATA_IN)
377 		ios = 1;
378 	else
379 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
380 
381 	for (i = 0; i < ios; i++) {
382 		if (xs->flags & SCSI_DATA_IN) {
383 			rt = 0;
384 ragain:
385 			/* interleave reads */
386 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
387 			    sd->sd_meta->ssdi.ssd_chunk_no;
388 			scp = sd->sd_vol.sv_chunks[chunk];
389 			switch (scp->src_meta.scm_status) {
390 			case BIOC_SDONLINE:
391 			case BIOC_SDSCRUB:
392 				break;
393 
394 			case BIOC_SDOFFLINE:
395 			case BIOC_SDREBUILD:
396 			case BIOC_SDHOTSPARE:
397 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
398 					goto ragain;
399 
400 				/* FALLTHROUGH */
401 			default:
402 				/* volume offline */
403 				printf("%s: is offline, cannot read\n",
404 				    DEVNAME(sd->sd_sc));
405 				goto bad;
406 			}
407 		} else {
408 			/* writes go on all working disks */
409 			chunk = i;
410 			scp = sd->sd_vol.sv_chunks[chunk];
411 			switch (scp->src_meta.scm_status) {
412 			case BIOC_SDONLINE:
413 			case BIOC_SDSCRUB:
414 			case BIOC_SDREBUILD:
415 				break;
416 
417 			case BIOC_SDHOTSPARE: /* should never happen */
418 			case BIOC_SDOFFLINE:
419 				continue;
420 
421 			default:
422 				goto bad;
423 			}
424 		}
425 
426 		ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data,
427 		    xs->flags, 0);
428 		if (!ccb) {
429 			/* should never happen but handle more gracefully */
430 			printf("%s: %s: too many ccbs queued\n",
431 			    DEVNAME(sd->sd_sc),
432 			    sd->sd_meta->ssd_devname);
433 			goto bad;
434 		}
435 		sr_wu_enqueue_ccb(wu, ccb);
436 	}
437 
438 	s = splbio();
439 
440 	/* rebuild io, let rebuild routine deal with it */
441 	if (wu->swu_flags & SR_WUF_REBUILD)
442 		goto queued;
443 
444 	/* current io failed, restart */
445 	if (wu->swu_state == SR_WU_RESTART)
446 		goto start;
447 
448 	/* deferred io failed, don't restart */
449 	if (wu->swu_state == SR_WU_REQUEUE)
450 		goto queued;
451 
452 	if (sr_check_io_collision(wu))
453 		goto queued;
454 
455 start:
456 	sr_raid_startwu(wu);
457 queued:
458 	splx(s);
459 	return (0);
460 bad:
461 	/* wu is unwound by sr_wu_put */
462 	return (1);
463 }
464 
465 void
466 sr_raid1_intr(struct buf *bp)
467 {
468 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
469 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
470 	struct sr_discipline	*sd = wu->swu_dis;
471 	struct scsi_xfer	*xs = wu->swu_xs;
472 	struct sr_softc		*sc = sd->sd_sc;
473 	int			s, pend;
474 
475 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
476 	    DEVNAME(sc), bp, xs);
477 
478 	s = splbio();
479 
480 	sr_ccb_done(ccb);
481 
482 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
483 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
484 	    wu->swu_ios_failed);
485 
486 	if (wu->swu_ios_complete >= wu->swu_io_count) {
487 		/* if all ios failed, retry reads and give up on writes */
488 		if (wu->swu_ios_failed == wu->swu_ios_complete) {
489 			if (xs->flags & SCSI_DATA_IN) {
490 				printf("%s: retrying read on block %lld\n",
491 				    DEVNAME(sc), ccb->ccb_buf.b_blkno);
492 				sr_ccb_put(ccb);
493 				if (wu->swu_cb_active == 1)
494 					panic("%s: sr_raid1_intr_cb",
495 					    DEVNAME(sd->sd_sc));
496 				TAILQ_INIT(&wu->swu_ccb);
497 				wu->swu_state = SR_WU_RESTART;
498 				if (sd->sd_scsi_rw(wu))
499 					goto bad;
500 				else
501 					goto retry;
502 			} else {
503 				printf("%s: permanently fail write on block "
504 				    "%lld\n", DEVNAME(sc),
505 				    ccb->ccb_buf.b_blkno);
506 				xs->error = XS_DRIVER_STUFFUP;
507 				goto bad;
508 			}
509 		}
510 
511 		xs->error = XS_NOERROR;
512 
513 		pend = 0;
514 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
515 			if (wu == wup) {
516 				/* wu on pendq, remove */
517 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
518 				pend = 1;
519 
520 				if (wu->swu_collider) {
521 					if (wu->swu_ios_failed)
522 						/* toss all ccbs and recreate */
523 						sr_raid_recreate_wu(wu->swu_collider);
524 
525 					/* restart deferred wu */
526 					wu->swu_collider->swu_state =
527 					    SR_WU_INPROGRESS;
528 					TAILQ_REMOVE(&sd->sd_wu_defq,
529 					    wu->swu_collider, swu_link);
530 					sr_raid_startwu(wu->swu_collider);
531 				}
532 				break;
533 			}
534 		}
535 
536 		if (!pend)
537 			printf("%s: wu: %p not on pending queue\n",
538 			    DEVNAME(sc), wu);
539 
540 		if (wu->swu_flags & SR_WUF_REBUILD) {
541 			if (wu->swu_xs->flags & SCSI_DATA_OUT) {
542 				wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
543 				wakeup(wu);
544 			}
545 		} else {
546 			sr_scsi_done(sd, xs);
547 		}
548 
549 		if (sd->sd_sync && sd->sd_wu_pending == 0)
550 			wakeup(sd);
551 	}
552 
553 retry:
554 	splx(s);
555 	return;
556 bad:
557 	xs->error = XS_DRIVER_STUFFUP;
558 	if (wu->swu_flags & SR_WUF_REBUILD) {
559 		wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
560 		wakeup(wu);
561 	} else {
562 		sr_scsi_done(sd, xs);
563 	}
564 
565 	splx(s);
566 }
567