xref: /openbsd-src/sys/dev/softraid_raid1.c (revision fc627a5c7c1ac06c524fde49b452d96cf7c96cf0)
1 /* $OpenBSD: softraid_raid1.c,v 1.47 2013/03/31 11:12:06 jsing Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions. */
47 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
48 	    int, int64_t);
49 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
50 	    int, void *);
51 int	sr_raid1_rw(struct sr_workunit *);
52 void	sr_raid1_intr(struct buf *);
53 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
54 void	sr_raid1_set_vol_state(struct sr_discipline *);
55 
56 /* Discipline initialisation. */
57 void
58 sr_raid1_discipline_init(struct sr_discipline *sd)
59 {
60 	/* Fill out discipline members. */
61 	sd->sd_type = SR_MD_RAID1;
62 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
63 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
64 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
65 	sd->sd_max_wu = SR_RAID1_NOWU;
66 
67 	/* Setup discipline specific function pointers. */
68 	sd->sd_assemble = sr_raid1_assemble;
69 	sd->sd_create = sr_raid1_create;
70 	sd->sd_scsi_rw = sr_raid1_rw;
71 	sd->sd_scsi_intr = sr_raid1_intr;
72 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
73 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
74 }
75 
76 int
77 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
78     int no_chunk, int64_t coerced_size)
79 {
80 
81 	if (no_chunk < 2) {
82 		sr_error(sd->sd_sc, "RAID 1 requires two or more chunks");
83 		return EINVAL;
84 	}
85 
86 	sd->sd_meta->ssdi.ssd_size = coerced_size;
87 
88 	sd->sd_max_ccb_per_wu = no_chunk;
89 
90 	return 0;
91 }
92 
93 int
94 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
95     int no_chunk, void *data)
96 {
97 
98 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
99 
100 	return 0;
101 }
102 
103 void
104 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
105 {
106 	int			old_state, s;
107 
108 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
109 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
110 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
111 
112 	/* ok to go to splbio since this only happens in error path */
113 	s = splbio();
114 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
115 
116 	/* multiple IOs to the same chunk that fail will come through here */
117 	if (old_state == new_state)
118 		goto done;
119 
120 	switch (old_state) {
121 	case BIOC_SDONLINE:
122 		switch (new_state) {
123 		case BIOC_SDOFFLINE:
124 		case BIOC_SDSCRUB:
125 			break;
126 		default:
127 			goto die;
128 		}
129 		break;
130 
131 	case BIOC_SDOFFLINE:
132 		switch (new_state) {
133 		case BIOC_SDREBUILD:
134 		case BIOC_SDHOTSPARE:
135 			break;
136 		default:
137 			goto die;
138 		}
139 		break;
140 
141 	case BIOC_SDSCRUB:
142 		if (new_state == BIOC_SDONLINE) {
143 			;
144 		} else
145 			goto die;
146 		break;
147 
148 	case BIOC_SDREBUILD:
149 		switch (new_state) {
150 		case BIOC_SDONLINE:
151 			break;
152 		case BIOC_SDOFFLINE:
153 			/* Abort rebuild since the rebuild chunk disappeared. */
154 			sd->sd_reb_abort = 1;
155 			break;
156 		default:
157 			goto die;
158 		}
159 		break;
160 
161 	case BIOC_SDHOTSPARE:
162 		switch (new_state) {
163 		case BIOC_SDOFFLINE:
164 		case BIOC_SDREBUILD:
165 			break;
166 		default:
167 			goto die;
168 		}
169 		break;
170 
171 	default:
172 die:
173 		splx(s); /* XXX */
174 		panic("%s: %s: %s: invalid chunk state transition "
175 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
176 		    sd->sd_meta->ssd_devname,
177 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
178 		    old_state, new_state);
179 		/* NOTREACHED */
180 	}
181 
182 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
183 	sd->sd_set_vol_state(sd);
184 
185 	sd->sd_must_flush = 1;
186 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
187 done:
188 	splx(s);
189 }
190 
191 void
192 sr_raid1_set_vol_state(struct sr_discipline *sd)
193 {
194 	int			states[SR_MAX_STATES];
195 	int			new_state, i, s, nd;
196 	int			old_state = sd->sd_vol_status;
197 
198 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
199 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
200 
201 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
202 
203 #ifdef SR_DEBUG
204 	for (i = 0; i < nd; i++)
205 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
206 		    DEVNAME(sd->sd_sc), i,
207 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
208 #endif
209 
210 	for (i = 0; i < SR_MAX_STATES; i++)
211 		states[i] = 0;
212 
213 	for (i = 0; i < nd; i++) {
214 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
215 		if (s >= SR_MAX_STATES)
216 			panic("%s: %s: %s: invalid chunk state",
217 			    DEVNAME(sd->sd_sc),
218 			    sd->sd_meta->ssd_devname,
219 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
220 		states[s]++;
221 	}
222 
223 	if (states[BIOC_SDONLINE] == nd)
224 		new_state = BIOC_SVONLINE;
225 	else if (states[BIOC_SDONLINE] == 0)
226 		new_state = BIOC_SVOFFLINE;
227 	else if (states[BIOC_SDSCRUB] != 0)
228 		new_state = BIOC_SVSCRUB;
229 	else if (states[BIOC_SDREBUILD] != 0)
230 		new_state = BIOC_SVREBUILD;
231 	else if (states[BIOC_SDOFFLINE] != 0)
232 		new_state = BIOC_SVDEGRADED;
233 	else {
234 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
235 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
236 		panic("invalid volume state");
237 	}
238 
239 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
240 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
241 	    old_state, new_state);
242 
243 	switch (old_state) {
244 	case BIOC_SVONLINE:
245 		switch (new_state) {
246 		case BIOC_SVONLINE: /* can go to same state */
247 		case BIOC_SVOFFLINE:
248 		case BIOC_SVDEGRADED:
249 		case BIOC_SVREBUILD: /* happens on boot */
250 			break;
251 		default:
252 			goto die;
253 		}
254 		break;
255 
256 	case BIOC_SVOFFLINE:
257 		/* XXX this might be a little too much */
258 		goto die;
259 
260 	case BIOC_SVSCRUB:
261 		switch (new_state) {
262 		case BIOC_SVONLINE:
263 		case BIOC_SVOFFLINE:
264 		case BIOC_SVDEGRADED:
265 		case BIOC_SVSCRUB: /* can go to same state */
266 			break;
267 		default:
268 			goto die;
269 		}
270 		break;
271 
272 	case BIOC_SVBUILDING:
273 		switch (new_state) {
274 		case BIOC_SVONLINE:
275 		case BIOC_SVOFFLINE:
276 		case BIOC_SVBUILDING: /* can go to the same state */
277 			break;
278 		default:
279 			goto die;
280 		}
281 		break;
282 
283 	case BIOC_SVREBUILD:
284 		switch (new_state) {
285 		case BIOC_SVONLINE:
286 		case BIOC_SVOFFLINE:
287 		case BIOC_SVDEGRADED:
288 		case BIOC_SVREBUILD: /* can go to the same state */
289 			break;
290 		default:
291 			goto die;
292 		}
293 		break;
294 
295 	case BIOC_SVDEGRADED:
296 		switch (new_state) {
297 		case BIOC_SVOFFLINE:
298 		case BIOC_SVREBUILD:
299 		case BIOC_SVDEGRADED: /* can go to the same state */
300 			break;
301 		default:
302 			goto die;
303 		}
304 		break;
305 
306 	default:
307 die:
308 		panic("%s: %s: invalid volume state transition "
309 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
310 		    sd->sd_meta->ssd_devname,
311 		    old_state, new_state);
312 		/* NOTREACHED */
313 	}
314 
315 	sd->sd_vol_status = new_state;
316 
317 	/* If we have just become degraded, look for a hotspare. */
318 	if (new_state == BIOC_SVDEGRADED)
319 		workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL);
320 }
321 
322 int
323 sr_raid1_rw(struct sr_workunit *wu)
324 {
325 	struct sr_discipline	*sd = wu->swu_dis;
326 	struct scsi_xfer	*xs = wu->swu_xs;
327 	struct sr_ccb		*ccb;
328 	struct sr_chunk		*scp;
329 	int			ios, chunk, i, s, rt;
330 	daddr64_t		blk;
331 
332 	/* blk and scsi error will be handled by sr_validate_io */
333 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
334 		goto bad;
335 
336 	/* calculate physical block */
337 	blk += sd->sd_meta->ssd_data_offset;
338 
339 	if (xs->flags & SCSI_DATA_IN)
340 		ios = 1;
341 	else
342 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
343 
344 	for (i = 0; i < ios; i++) {
345 		if (xs->flags & SCSI_DATA_IN) {
346 			rt = 0;
347 ragain:
348 			/* interleave reads */
349 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
350 			    sd->sd_meta->ssdi.ssd_chunk_no;
351 			scp = sd->sd_vol.sv_chunks[chunk];
352 			switch (scp->src_meta.scm_status) {
353 			case BIOC_SDONLINE:
354 			case BIOC_SDSCRUB:
355 				break;
356 
357 			case BIOC_SDOFFLINE:
358 			case BIOC_SDREBUILD:
359 			case BIOC_SDHOTSPARE:
360 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
361 					goto ragain;
362 
363 				/* FALLTHROUGH */
364 			default:
365 				/* volume offline */
366 				printf("%s: is offline, cannot read\n",
367 				    DEVNAME(sd->sd_sc));
368 				goto bad;
369 			}
370 		} else {
371 			/* writes go on all working disks */
372 			chunk = i;
373 			scp = sd->sd_vol.sv_chunks[chunk];
374 			switch (scp->src_meta.scm_status) {
375 			case BIOC_SDONLINE:
376 			case BIOC_SDSCRUB:
377 			case BIOC_SDREBUILD:
378 				break;
379 
380 			case BIOC_SDHOTSPARE: /* should never happen */
381 			case BIOC_SDOFFLINE:
382 				continue;
383 
384 			default:
385 				goto bad;
386 			}
387 		}
388 
389 		ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data,
390 		    xs->flags, 0);
391 		if (!ccb) {
392 			/* should never happen but handle more gracefully */
393 			printf("%s: %s: too many ccbs queued\n",
394 			    DEVNAME(sd->sd_sc),
395 			    sd->sd_meta->ssd_devname);
396 			goto bad;
397 		}
398 		sr_wu_enqueue_ccb(wu, ccb);
399 	}
400 
401 	s = splbio();
402 
403 	/* rebuild io, let rebuild routine deal with it */
404 	if (wu->swu_flags & SR_WUF_REBUILD)
405 		goto queued;
406 
407 	/* current io failed, restart */
408 	if (wu->swu_state == SR_WU_RESTART)
409 		goto start;
410 
411 	/* deferred io failed, don't restart */
412 	if (wu->swu_state == SR_WU_REQUEUE)
413 		goto queued;
414 
415 	if (sr_check_io_collision(wu))
416 		goto queued;
417 
418 start:
419 	sr_raid_startwu(wu);
420 queued:
421 	splx(s);
422 	return (0);
423 bad:
424 	/* wu is unwound by sr_wu_put */
425 	return (1);
426 }
427 
428 void
429 sr_raid1_intr(struct buf *bp)
430 {
431 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
432 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
433 	struct sr_discipline	*sd = wu->swu_dis;
434 	struct scsi_xfer	*xs = wu->swu_xs;
435 	struct sr_softc		*sc = sd->sd_sc;
436 	int			s;
437 
438 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
439 	    DEVNAME(sc), bp, xs);
440 
441 	s = splbio();
442 
443 	sr_ccb_done(ccb);
444 
445 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
446 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
447 	    wu->swu_ios_failed);
448 
449 	if (wu->swu_ios_complete < wu->swu_io_count)
450 		goto done;
451 
452 	xs->error = XS_NOERROR;
453 
454 	/* if all ios failed, retry reads and give up on writes */
455 	if (wu->swu_ios_failed == wu->swu_ios_complete) {
456 		if (xs->flags & SCSI_DATA_IN) {
457 			printf("%s: retrying read on block %lld\n",
458 			    DEVNAME(sc), ccb->ccb_buf.b_blkno);
459 			if (wu->swu_cb_active == 1)
460 				panic("%s: sr_raid1_intr_cb",
461 				    DEVNAME(sd->sd_sc));
462 			sr_wu_release_ccbs(wu);
463 			wu->swu_state = SR_WU_RESTART;
464 			if (sd->sd_scsi_rw(wu) == 0)
465 				goto done;
466 			xs->error = XS_DRIVER_STUFFUP;
467 		} else {
468 			printf("%s: permanently failing write on block %lld\n",
469 			    DEVNAME(sc), ccb->ccb_buf.b_blkno);
470 			xs->error = XS_DRIVER_STUFFUP;
471 		}
472 	}
473 
474 	TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link)
475 		if (wu == wup)
476 			break;
477 
478 	if (wup == NULL)
479 		panic("%s: wu %p not on pending queue",
480 		    DEVNAME(sd->sd_sc), wu);
481 
482 	/* wu on pendq, remove */
483 	TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
484 
485 	if (wu->swu_collider) {
486 		if (wu->swu_ios_failed)
487 			sr_raid_recreate_wu(wu->swu_collider);
488 
489 		/* XXX Should the collider be failed if this xs failed? */
490 		/* restart deferred wu */
491 		wu->swu_collider->swu_state = SR_WU_INPROGRESS;
492 		TAILQ_REMOVE(&sd->sd_wu_defq, wu->swu_collider, swu_link);
493 		sr_raid_startwu(wu->swu_collider);
494 	}
495 
496 	if (wu->swu_flags & SR_WUF_REBUILD)
497 		wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
498 	if (wu->swu_flags & SR_WUF_WAKEUP)
499 		wakeup(wu);
500 	if (!(wu->swu_flags & SR_WUF_REBUILD))
501 		sr_scsi_done(sd, xs);
502 
503 done:
504 	splx(s);
505 }
506