xref: /openbsd-src/sys/dev/softraid_raid1.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /* $OpenBSD: softraid_raid1.c,v 1.60 2015/01/27 10:12:45 dlg Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/malloc.h>
26 #include <sys/kernel.h>
27 #include <sys/disk.h>
28 #include <sys/rwlock.h>
29 #include <sys/queue.h>
30 #include <sys/fcntl.h>
31 #include <sys/disklabel.h>
32 #include <sys/mount.h>
33 #include <sys/sensors.h>
34 #include <sys/stat.h>
35 #include <sys/task.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 
45 /* RAID 1 functions. */
46 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
47 	    int, int64_t);
48 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
49 	    int, void *);
50 int	sr_raid1_init(struct sr_discipline *sd);
51 int	sr_raid1_rw(struct sr_workunit *);
52 int	sr_raid1_wu_done(struct sr_workunit *);
53 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
54 void	sr_raid1_set_vol_state(struct sr_discipline *);
55 
56 /* Discipline initialisation. */
57 void
58 sr_raid1_discipline_init(struct sr_discipline *sd)
59 {
60 	/* Fill out discipline members. */
61 	sd->sd_type = SR_MD_RAID1;
62 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
63 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
64 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
65 	sd->sd_max_wu = SR_RAID1_NOWU;
66 
67 	/* Setup discipline specific function pointers. */
68 	sd->sd_assemble = sr_raid1_assemble;
69 	sd->sd_create = sr_raid1_create;
70 	sd->sd_scsi_rw = sr_raid1_rw;
71 	sd->sd_scsi_wu_done = sr_raid1_wu_done;
72 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
73 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
74 }
75 
76 int
77 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
78     int no_chunk, int64_t coerced_size)
79 {
80 	if (no_chunk < 2) {
81 		sr_error(sd->sd_sc, "%s requires two or more chunks",
82 		    sd->sd_name);
83 		return EINVAL;
84 	}
85 
86 	sd->sd_meta->ssdi.ssd_size = coerced_size;
87 
88 	return sr_raid1_init(sd);
89 }
90 
91 int
92 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
93     int no_chunk, void *data)
94 {
95 	return sr_raid1_init(sd);
96 }
97 
98 int
99 sr_raid1_init(struct sr_discipline *sd)
100 {
101 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
102 
103 	return 0;
104 }
105 
106 void
107 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
108 {
109 	int			old_state, s;
110 
111 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
112 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
113 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
114 
115 	/* ok to go to splbio since this only happens in error path */
116 	s = splbio();
117 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
118 
119 	/* multiple IOs to the same chunk that fail will come through here */
120 	if (old_state == new_state)
121 		goto done;
122 
123 	switch (old_state) {
124 	case BIOC_SDONLINE:
125 		switch (new_state) {
126 		case BIOC_SDOFFLINE:
127 		case BIOC_SDSCRUB:
128 			break;
129 		default:
130 			goto die;
131 		}
132 		break;
133 
134 	case BIOC_SDOFFLINE:
135 		switch (new_state) {
136 		case BIOC_SDREBUILD:
137 		case BIOC_SDHOTSPARE:
138 			break;
139 		default:
140 			goto die;
141 		}
142 		break;
143 
144 	case BIOC_SDSCRUB:
145 		if (new_state == BIOC_SDONLINE) {
146 			;
147 		} else
148 			goto die;
149 		break;
150 
151 	case BIOC_SDREBUILD:
152 		switch (new_state) {
153 		case BIOC_SDONLINE:
154 			break;
155 		case BIOC_SDOFFLINE:
156 			/* Abort rebuild since the rebuild chunk disappeared. */
157 			sd->sd_reb_abort = 1;
158 			break;
159 		default:
160 			goto die;
161 		}
162 		break;
163 
164 	case BIOC_SDHOTSPARE:
165 		switch (new_state) {
166 		case BIOC_SDOFFLINE:
167 		case BIOC_SDREBUILD:
168 			break;
169 		default:
170 			goto die;
171 		}
172 		break;
173 
174 	default:
175 die:
176 		splx(s); /* XXX */
177 		panic("%s: %s: %s: invalid chunk state transition "
178 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
179 		    sd->sd_meta->ssd_devname,
180 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
181 		    old_state, new_state);
182 		/* NOTREACHED */
183 	}
184 
185 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
186 	sd->sd_set_vol_state(sd);
187 
188 	sd->sd_must_flush = 1;
189 	task_add(systq, &sd->sd_meta_save_task);
190 done:
191 	splx(s);
192 }
193 
194 void
195 sr_raid1_set_vol_state(struct sr_discipline *sd)
196 {
197 	int			states[SR_MAX_STATES];
198 	int			new_state, i, s, nd;
199 	int			old_state = sd->sd_vol_status;
200 
201 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
202 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
203 
204 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
205 
206 #ifdef SR_DEBUG
207 	for (i = 0; i < nd; i++)
208 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
209 		    DEVNAME(sd->sd_sc), i,
210 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
211 #endif
212 
213 	for (i = 0; i < SR_MAX_STATES; i++)
214 		states[i] = 0;
215 
216 	for (i = 0; i < nd; i++) {
217 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
218 		if (s >= SR_MAX_STATES)
219 			panic("%s: %s: %s: invalid chunk state",
220 			    DEVNAME(sd->sd_sc),
221 			    sd->sd_meta->ssd_devname,
222 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
223 		states[s]++;
224 	}
225 
226 	if (states[BIOC_SDONLINE] == nd)
227 		new_state = BIOC_SVONLINE;
228 	else if (states[BIOC_SDONLINE] == 0)
229 		new_state = BIOC_SVOFFLINE;
230 	else if (states[BIOC_SDSCRUB] != 0)
231 		new_state = BIOC_SVSCRUB;
232 	else if (states[BIOC_SDREBUILD] != 0)
233 		new_state = BIOC_SVREBUILD;
234 	else if (states[BIOC_SDOFFLINE] != 0)
235 		new_state = BIOC_SVDEGRADED;
236 	else {
237 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
238 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
239 		panic("invalid volume state");
240 	}
241 
242 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
243 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
244 	    old_state, new_state);
245 
246 	switch (old_state) {
247 	case BIOC_SVONLINE:
248 		switch (new_state) {
249 		case BIOC_SVONLINE: /* can go to same state */
250 		case BIOC_SVOFFLINE:
251 		case BIOC_SVDEGRADED:
252 		case BIOC_SVREBUILD: /* happens on boot */
253 			break;
254 		default:
255 			goto die;
256 		}
257 		break;
258 
259 	case BIOC_SVOFFLINE:
260 		/* XXX this might be a little too much */
261 		goto die;
262 
263 	case BIOC_SVDEGRADED:
264 		switch (new_state) {
265 		case BIOC_SVOFFLINE:
266 		case BIOC_SVREBUILD:
267 		case BIOC_SVDEGRADED: /* can go to the same state */
268 			break;
269 		default:
270 			goto die;
271 		}
272 		break;
273 
274 	case BIOC_SVBUILDING:
275 		switch (new_state) {
276 		case BIOC_SVONLINE:
277 		case BIOC_SVOFFLINE:
278 		case BIOC_SVBUILDING: /* can go to the same state */
279 			break;
280 		default:
281 			goto die;
282 		}
283 		break;
284 
285 	case BIOC_SVSCRUB:
286 		switch (new_state) {
287 		case BIOC_SVONLINE:
288 		case BIOC_SVOFFLINE:
289 		case BIOC_SVDEGRADED:
290 		case BIOC_SVSCRUB: /* can go to same state */
291 			break;
292 		default:
293 			goto die;
294 		}
295 		break;
296 
297 	case BIOC_SVREBUILD:
298 		switch (new_state) {
299 		case BIOC_SVONLINE:
300 		case BIOC_SVOFFLINE:
301 		case BIOC_SVDEGRADED:
302 		case BIOC_SVREBUILD: /* can go to the same state */
303 			break;
304 		default:
305 			goto die;
306 		}
307 		break;
308 
309 	default:
310 die:
311 		panic("%s: %s: invalid volume state transition "
312 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
313 		    sd->sd_meta->ssd_devname,
314 		    old_state, new_state);
315 		/* NOTREACHED */
316 	}
317 
318 	sd->sd_vol_status = new_state;
319 
320 	/* If we have just become degraded, look for a hotspare. */
321 	if (new_state == BIOC_SVDEGRADED)
322 		task_add(systq, &sd->sd_hotspare_rebuild_task);
323 }
324 
325 int
326 sr_raid1_rw(struct sr_workunit *wu)
327 {
328 	struct sr_discipline	*sd = wu->swu_dis;
329 	struct scsi_xfer	*xs = wu->swu_xs;
330 	struct sr_ccb		*ccb;
331 	struct sr_chunk		*scp;
332 	int			ios, chunk, i, rt;
333 	daddr_t			blk;
334 
335 	/* blk and scsi error will be handled by sr_validate_io */
336 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
337 		goto bad;
338 
339 	/* calculate physical block */
340 	blk += sd->sd_meta->ssd_data_offset;
341 
342 	if (xs->flags & SCSI_DATA_IN)
343 		ios = 1;
344 	else
345 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
346 
347 	for (i = 0; i < ios; i++) {
348 		if (xs->flags & SCSI_DATA_IN) {
349 			rt = 0;
350 ragain:
351 			/* interleave reads */
352 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
353 			    sd->sd_meta->ssdi.ssd_chunk_no;
354 			scp = sd->sd_vol.sv_chunks[chunk];
355 			switch (scp->src_meta.scm_status) {
356 			case BIOC_SDONLINE:
357 			case BIOC_SDSCRUB:
358 				break;
359 
360 			case BIOC_SDOFFLINE:
361 			case BIOC_SDREBUILD:
362 			case BIOC_SDHOTSPARE:
363 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
364 					goto ragain;
365 
366 				/* FALLTHROUGH */
367 			default:
368 				/* volume offline */
369 				printf("%s: is offline, cannot read\n",
370 				    DEVNAME(sd->sd_sc));
371 				goto bad;
372 			}
373 		} else {
374 			/* writes go on all working disks */
375 			chunk = i;
376 			scp = sd->sd_vol.sv_chunks[chunk];
377 			switch (scp->src_meta.scm_status) {
378 			case BIOC_SDONLINE:
379 			case BIOC_SDSCRUB:
380 			case BIOC_SDREBUILD:
381 				break;
382 
383 			case BIOC_SDHOTSPARE: /* should never happen */
384 			case BIOC_SDOFFLINE:
385 				continue;
386 
387 			default:
388 				goto bad;
389 			}
390 		}
391 
392 		ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data,
393 		    xs->flags, 0);
394 		if (!ccb) {
395 			/* should never happen but handle more gracefully */
396 			printf("%s: %s: too many ccbs queued\n",
397 			    DEVNAME(sd->sd_sc),
398 			    sd->sd_meta->ssd_devname);
399 			goto bad;
400 		}
401 		sr_wu_enqueue_ccb(wu, ccb);
402 	}
403 
404 	sr_schedule_wu(wu);
405 
406 	return (0);
407 
408 bad:
409 	/* wu is unwound by sr_wu_put */
410 	return (1);
411 }
412 
413 int
414 sr_raid1_wu_done(struct sr_workunit *wu)
415 {
416 	struct sr_discipline	*sd = wu->swu_dis;
417 	struct scsi_xfer	*xs = wu->swu_xs;
418 
419 	/* If at least one I/O succeeded, we are okay. */
420 	if (wu->swu_ios_succeeded > 0) {
421 		xs->error = XS_NOERROR;
422 		return SR_WU_OK;
423 	}
424 
425 	/* If all I/O failed, retry reads and give up on writes. */
426 	if (xs->flags & SCSI_DATA_IN) {
427 		printf("%s: retrying read on block %lld\n",
428 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
429 		if (wu->swu_cb_active == 1)
430 			panic("%s: sr_raid1_intr_cb",
431 			    DEVNAME(sd->sd_sc));
432 		sr_wu_release_ccbs(wu);
433 		wu->swu_state = SR_WU_RESTART;
434 		if (sd->sd_scsi_rw(wu) == 0)
435 			return SR_WU_RESTART;
436 	} else {
437 		printf("%s: permanently failing write on block %lld\n",
438 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
439 	}
440 
441 	wu->swu_state = SR_WU_FAILED;
442 	xs->error = XS_DRIVER_STUFFUP;
443 
444 	return SR_WU_FAILED;
445 }
446