xref: /openbsd-src/sys/dev/softraid_raid1.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /* $OpenBSD: softraid_raid1.c,v 1.58 2014/09/14 14:17:24 jsg Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/malloc.h>
26 #include <sys/kernel.h>
27 #include <sys/disk.h>
28 #include <sys/rwlock.h>
29 #include <sys/queue.h>
30 #include <sys/fcntl.h>
31 #include <sys/disklabel.h>
32 #include <sys/mount.h>
33 #include <sys/sensors.h>
34 #include <sys/stat.h>
35 #include <sys/task.h>
36 #include <sys/workq.h>
37 #include <sys/conf.h>
38 #include <sys/uio.h>
39 
40 #include <scsi/scsi_all.h>
41 #include <scsi/scsiconf.h>
42 #include <scsi/scsi_disk.h>
43 
44 #include <dev/softraidvar.h>
45 #include <dev/rndvar.h>
46 
47 /* RAID 1 functions. */
48 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
49 	    int, int64_t);
50 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
51 	    int, void *);
52 int	sr_raid1_init(struct sr_discipline *sd);
53 int	sr_raid1_rw(struct sr_workunit *);
54 int	sr_raid1_wu_done(struct sr_workunit *);
55 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
56 void	sr_raid1_set_vol_state(struct sr_discipline *);
57 
58 /* Discipline initialisation. */
59 void
60 sr_raid1_discipline_init(struct sr_discipline *sd)
61 {
62 	/* Fill out discipline members. */
63 	sd->sd_type = SR_MD_RAID1;
64 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
65 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
66 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
67 	sd->sd_max_wu = SR_RAID1_NOWU;
68 
69 	/* Setup discipline specific function pointers. */
70 	sd->sd_assemble = sr_raid1_assemble;
71 	sd->sd_create = sr_raid1_create;
72 	sd->sd_scsi_rw = sr_raid1_rw;
73 	sd->sd_scsi_wu_done = sr_raid1_wu_done;
74 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
75 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
76 }
77 
78 int
79 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
80     int no_chunk, int64_t coerced_size)
81 {
82 	if (no_chunk < 2) {
83 		sr_error(sd->sd_sc, "%s requires two or more chunks",
84 		    sd->sd_name);
85 		return EINVAL;
86 	}
87 
88 	sd->sd_meta->ssdi.ssd_size = coerced_size;
89 
90 	return sr_raid1_init(sd);
91 }
92 
93 int
94 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
95     int no_chunk, void *data)
96 {
97 	return sr_raid1_init(sd);
98 }
99 
100 int
101 sr_raid1_init(struct sr_discipline *sd)
102 {
103 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
104 
105 	return 0;
106 }
107 
108 void
109 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
110 {
111 	int			old_state, s;
112 
113 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
114 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
115 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
116 
117 	/* ok to go to splbio since this only happens in error path */
118 	s = splbio();
119 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
120 
121 	/* multiple IOs to the same chunk that fail will come through here */
122 	if (old_state == new_state)
123 		goto done;
124 
125 	switch (old_state) {
126 	case BIOC_SDONLINE:
127 		switch (new_state) {
128 		case BIOC_SDOFFLINE:
129 		case BIOC_SDSCRUB:
130 			break;
131 		default:
132 			goto die;
133 		}
134 		break;
135 
136 	case BIOC_SDOFFLINE:
137 		switch (new_state) {
138 		case BIOC_SDREBUILD:
139 		case BIOC_SDHOTSPARE:
140 			break;
141 		default:
142 			goto die;
143 		}
144 		break;
145 
146 	case BIOC_SDSCRUB:
147 		if (new_state == BIOC_SDONLINE) {
148 			;
149 		} else
150 			goto die;
151 		break;
152 
153 	case BIOC_SDREBUILD:
154 		switch (new_state) {
155 		case BIOC_SDONLINE:
156 			break;
157 		case BIOC_SDOFFLINE:
158 			/* Abort rebuild since the rebuild chunk disappeared. */
159 			sd->sd_reb_abort = 1;
160 			break;
161 		default:
162 			goto die;
163 		}
164 		break;
165 
166 	case BIOC_SDHOTSPARE:
167 		switch (new_state) {
168 		case BIOC_SDOFFLINE:
169 		case BIOC_SDREBUILD:
170 			break;
171 		default:
172 			goto die;
173 		}
174 		break;
175 
176 	default:
177 die:
178 		splx(s); /* XXX */
179 		panic("%s: %s: %s: invalid chunk state transition "
180 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
181 		    sd->sd_meta->ssd_devname,
182 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
183 		    old_state, new_state);
184 		/* NOTREACHED */
185 	}
186 
187 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
188 	sd->sd_set_vol_state(sd);
189 
190 	sd->sd_must_flush = 1;
191 	task_add(systq, &sd->sd_meta_save_task);
192 done:
193 	splx(s);
194 }
195 
196 void
197 sr_raid1_set_vol_state(struct sr_discipline *sd)
198 {
199 	int			states[SR_MAX_STATES];
200 	int			new_state, i, s, nd;
201 	int			old_state = sd->sd_vol_status;
202 
203 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
204 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
205 
206 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
207 
208 #ifdef SR_DEBUG
209 	for (i = 0; i < nd; i++)
210 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
211 		    DEVNAME(sd->sd_sc), i,
212 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
213 #endif
214 
215 	for (i = 0; i < SR_MAX_STATES; i++)
216 		states[i] = 0;
217 
218 	for (i = 0; i < nd; i++) {
219 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
220 		if (s >= SR_MAX_STATES)
221 			panic("%s: %s: %s: invalid chunk state",
222 			    DEVNAME(sd->sd_sc),
223 			    sd->sd_meta->ssd_devname,
224 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
225 		states[s]++;
226 	}
227 
228 	if (states[BIOC_SDONLINE] == nd)
229 		new_state = BIOC_SVONLINE;
230 	else if (states[BIOC_SDONLINE] == 0)
231 		new_state = BIOC_SVOFFLINE;
232 	else if (states[BIOC_SDSCRUB] != 0)
233 		new_state = BIOC_SVSCRUB;
234 	else if (states[BIOC_SDREBUILD] != 0)
235 		new_state = BIOC_SVREBUILD;
236 	else if (states[BIOC_SDOFFLINE] != 0)
237 		new_state = BIOC_SVDEGRADED;
238 	else {
239 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
240 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
241 		panic("invalid volume state");
242 	}
243 
244 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
245 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
246 	    old_state, new_state);
247 
248 	switch (old_state) {
249 	case BIOC_SVONLINE:
250 		switch (new_state) {
251 		case BIOC_SVONLINE: /* can go to same state */
252 		case BIOC_SVOFFLINE:
253 		case BIOC_SVDEGRADED:
254 		case BIOC_SVREBUILD: /* happens on boot */
255 			break;
256 		default:
257 			goto die;
258 		}
259 		break;
260 
261 	case BIOC_SVOFFLINE:
262 		/* XXX this might be a little too much */
263 		goto die;
264 
265 	case BIOC_SVDEGRADED:
266 		switch (new_state) {
267 		case BIOC_SVOFFLINE:
268 		case BIOC_SVREBUILD:
269 		case BIOC_SVDEGRADED: /* can go to the same state */
270 			break;
271 		default:
272 			goto die;
273 		}
274 		break;
275 
276 	case BIOC_SVBUILDING:
277 		switch (new_state) {
278 		case BIOC_SVONLINE:
279 		case BIOC_SVOFFLINE:
280 		case BIOC_SVBUILDING: /* can go to the same state */
281 			break;
282 		default:
283 			goto die;
284 		}
285 		break;
286 
287 	case BIOC_SVSCRUB:
288 		switch (new_state) {
289 		case BIOC_SVONLINE:
290 		case BIOC_SVOFFLINE:
291 		case BIOC_SVDEGRADED:
292 		case BIOC_SVSCRUB: /* can go to same state */
293 			break;
294 		default:
295 			goto die;
296 		}
297 		break;
298 
299 	case BIOC_SVREBUILD:
300 		switch (new_state) {
301 		case BIOC_SVONLINE:
302 		case BIOC_SVOFFLINE:
303 		case BIOC_SVDEGRADED:
304 		case BIOC_SVREBUILD: /* can go to the same state */
305 			break;
306 		default:
307 			goto die;
308 		}
309 		break;
310 
311 	default:
312 die:
313 		panic("%s: %s: invalid volume state transition "
314 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
315 		    sd->sd_meta->ssd_devname,
316 		    old_state, new_state);
317 		/* NOTREACHED */
318 	}
319 
320 	sd->sd_vol_status = new_state;
321 
322 	/* If we have just become degraded, look for a hotspare. */
323 	if (new_state == BIOC_SVDEGRADED)
324 		task_add(systq, &sd->sd_hotspare_rebuild_task);
325 }
326 
327 int
328 sr_raid1_rw(struct sr_workunit *wu)
329 {
330 	struct sr_discipline	*sd = wu->swu_dis;
331 	struct scsi_xfer	*xs = wu->swu_xs;
332 	struct sr_ccb		*ccb;
333 	struct sr_chunk		*scp;
334 	int			ios, chunk, i, rt;
335 	daddr_t			blk;
336 
337 	/* blk and scsi error will be handled by sr_validate_io */
338 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
339 		goto bad;
340 
341 	/* calculate physical block */
342 	blk += sd->sd_meta->ssd_data_offset;
343 
344 	if (xs->flags & SCSI_DATA_IN)
345 		ios = 1;
346 	else
347 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
348 
349 	for (i = 0; i < ios; i++) {
350 		if (xs->flags & SCSI_DATA_IN) {
351 			rt = 0;
352 ragain:
353 			/* interleave reads */
354 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
355 			    sd->sd_meta->ssdi.ssd_chunk_no;
356 			scp = sd->sd_vol.sv_chunks[chunk];
357 			switch (scp->src_meta.scm_status) {
358 			case BIOC_SDONLINE:
359 			case BIOC_SDSCRUB:
360 				break;
361 
362 			case BIOC_SDOFFLINE:
363 			case BIOC_SDREBUILD:
364 			case BIOC_SDHOTSPARE:
365 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
366 					goto ragain;
367 
368 				/* FALLTHROUGH */
369 			default:
370 				/* volume offline */
371 				printf("%s: is offline, cannot read\n",
372 				    DEVNAME(sd->sd_sc));
373 				goto bad;
374 			}
375 		} else {
376 			/* writes go on all working disks */
377 			chunk = i;
378 			scp = sd->sd_vol.sv_chunks[chunk];
379 			switch (scp->src_meta.scm_status) {
380 			case BIOC_SDONLINE:
381 			case BIOC_SDSCRUB:
382 			case BIOC_SDREBUILD:
383 				break;
384 
385 			case BIOC_SDHOTSPARE: /* should never happen */
386 			case BIOC_SDOFFLINE:
387 				continue;
388 
389 			default:
390 				goto bad;
391 			}
392 		}
393 
394 		ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data,
395 		    xs->flags, 0);
396 		if (!ccb) {
397 			/* should never happen but handle more gracefully */
398 			printf("%s: %s: too many ccbs queued\n",
399 			    DEVNAME(sd->sd_sc),
400 			    sd->sd_meta->ssd_devname);
401 			goto bad;
402 		}
403 		sr_wu_enqueue_ccb(wu, ccb);
404 	}
405 
406 	sr_schedule_wu(wu);
407 
408 	return (0);
409 
410 bad:
411 	/* wu is unwound by sr_wu_put */
412 	return (1);
413 }
414 
415 int
416 sr_raid1_wu_done(struct sr_workunit *wu)
417 {
418 	struct sr_discipline	*sd = wu->swu_dis;
419 	struct scsi_xfer	*xs = wu->swu_xs;
420 
421 	/* If at least one I/O succeeded, we are okay. */
422 	if (wu->swu_ios_succeeded > 0) {
423 		xs->error = XS_NOERROR;
424 		return SR_WU_OK;
425 	}
426 
427 	/* If all I/O failed, retry reads and give up on writes. */
428 	if (xs->flags & SCSI_DATA_IN) {
429 		printf("%s: retrying read on block %lld\n",
430 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
431 		if (wu->swu_cb_active == 1)
432 			panic("%s: sr_raid1_intr_cb",
433 			    DEVNAME(sd->sd_sc));
434 		sr_wu_release_ccbs(wu);
435 		wu->swu_state = SR_WU_RESTART;
436 		if (sd->sd_scsi_rw(wu) == 0)
437 			return SR_WU_RESTART;
438 	} else {
439 		printf("%s: permanently failing write on block %lld\n",
440 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
441 	}
442 
443 	wu->swu_state = SR_WU_FAILED;
444 	xs->error = XS_DRIVER_STUFFUP;
445 
446 	return SR_WU_FAILED;
447 }
448