xref: /openbsd-src/sys/dev/softraid.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /* $OpenBSD: softraid.c,v 1.246 2011/08/08 18:18:22 marco Exp $ */
2 /*
3  * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  * Copyright (c) 2009 Joel Sing <jsing@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bio.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/buf.h>
25 #include <sys/device.h>
26 #include <sys/ioctl.h>
27 #include <sys/proc.h>
28 #include <sys/malloc.h>
29 #include <sys/pool.h>
30 #include <sys/kernel.h>
31 #include <sys/disk.h>
32 #include <sys/rwlock.h>
33 #include <sys/queue.h>
34 #include <sys/fcntl.h>
35 #include <sys/disklabel.h>
36 #include <sys/mount.h>
37 #include <sys/sensors.h>
38 #include <sys/stat.h>
39 #include <sys/conf.h>
40 #include <sys/uio.h>
41 #include <sys/workq.h>
42 #include <sys/kthread.h>
43 #include <sys/dkio.h>
44 
45 #ifdef AOE
46 #include <sys/mbuf.h>
47 #include <net/if_aoe.h>
48 #endif /* AOE */
49 
50 #include <crypto/cryptodev.h>
51 
52 #include <scsi/scsi_all.h>
53 #include <scsi/scsiconf.h>
54 #include <scsi/scsi_disk.h>
55 
56 #include <dev/softraidvar.h>
57 #include <dev/rndvar.h>
58 
59 /* #define SR_FANCY_STATS */
60 
61 #ifdef SR_DEBUG
62 #define SR_FANCY_STATS
63 uint32_t	sr_debug = 0
64 		    /* | SR_D_CMD */
65 		    /* | SR_D_MISC */
66 		    /* | SR_D_INTR */
67 		    /* | SR_D_IOCTL */
68 		    /* | SR_D_CCB */
69 		    /* | SR_D_WU */
70 		    /* | SR_D_META */
71 		    /* | SR_D_DIS */
72 		    /* | SR_D_STATE */
73 		;
74 #endif
75 
76 int		sr_match(struct device *, void *, void *);
77 void		sr_attach(struct device *, struct device *, void *);
78 int		sr_detach(struct device *, int);
79 
80 struct cfattach softraid_ca = {
81 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
82 };
83 
84 struct cfdriver softraid_cd = {
85 	NULL, "softraid", DV_DULL
86 };
87 
88 /* scsi & discipline */
89 void			sr_scsi_cmd(struct scsi_xfer *);
90 void			sr_minphys(struct buf *, struct scsi_link *);
91 int			sr_scsi_probe(struct scsi_link *);
92 void			sr_copy_internal_data(struct scsi_xfer *,
93 			    void *, size_t);
94 int			sr_scsi_ioctl(struct scsi_link *, u_long,
95 			    caddr_t, int);
96 int			sr_ioctl(struct device *, u_long, caddr_t);
97 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
98 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
99 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
100 int			sr_ioctl_setstate(struct sr_softc *,
101 			    struct bioc_setstate *);
102 int			sr_ioctl_createraid(struct sr_softc *,
103 			    struct bioc_createraid *, int);
104 int			sr_ioctl_deleteraid(struct sr_softc *,
105 			    struct bioc_deleteraid *);
106 int			sr_ioctl_discipline(struct sr_softc *,
107 			    struct bioc_discipline *);
108 int			sr_ioctl_installboot(struct sr_softc *,
109 			    struct bioc_installboot *);
110 void			sr_chunks_unwind(struct sr_softc *,
111 			    struct sr_chunk_head *);
112 void			sr_discipline_free(struct sr_discipline *);
113 void			sr_discipline_shutdown(struct sr_discipline *, int);
114 int			sr_discipline_init(struct sr_discipline *, int);
115 
116 /* utility functions */
117 void			sr_shutdown(struct sr_softc *);
118 void			sr_shutdownhook(void *);
119 void			sr_uuid_get(struct sr_uuid *);
120 void			sr_uuid_print(struct sr_uuid *, int);
121 void			sr_checksum_print(u_int8_t *);
122 int			sr_boot_assembly(struct sr_softc *);
123 int			sr_already_assembled(struct sr_discipline *);
124 int			sr_hotspare(struct sr_softc *, dev_t);
125 void			sr_hotspare_rebuild(struct sr_discipline *);
126 int			sr_rebuild_init(struct sr_discipline *, dev_t, int);
127 void			sr_rebuild(void *);
128 void			sr_rebuild_thread(void *);
129 void			sr_roam_chunks(struct sr_discipline *);
130 int			sr_chunk_in_use(struct sr_softc *, dev_t);
131 void			sr_startwu_callback(void *, void *);
132 int			sr_rw(struct sr_softc *, dev_t, char *, size_t,
133 			    daddr64_t, long);
134 
135 /* don't include these on RAMDISK */
136 #ifndef SMALL_KERNEL
137 void			sr_sensors_refresh(void *);
138 int			sr_sensors_create(struct sr_discipline *);
139 void			sr_sensors_delete(struct sr_discipline *);
140 #endif
141 
142 /* metadata */
143 int			sr_meta_probe(struct sr_discipline *, dev_t *, int);
144 int			sr_meta_attach(struct sr_discipline *, int, int);
145 int			sr_meta_rw(struct sr_discipline *, dev_t, void *,
146 			    size_t, daddr64_t, long);
147 int			sr_meta_clear(struct sr_discipline *);
148 void			sr_meta_chunks_create(struct sr_softc *,
149 			    struct sr_chunk_head *);
150 void			sr_meta_init(struct sr_discipline *,
151 			    struct sr_chunk_head *);
152 void			sr_meta_opt_load(struct sr_discipline *,
153 			    struct sr_meta_opt *);
154 
155 /* hotplug magic */
156 void			sr_disk_attach(struct disk *, int);
157 
158 struct sr_hotplug_list {
159 	void			(*sh_hotplug)(struct sr_discipline *,
160 				    struct disk *, int);
161 	struct sr_discipline	*sh_sd;
162 
163 	SLIST_ENTRY(sr_hotplug_list) shl_link;
164 };
165 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list);
166 
167 struct			sr_hotplug_list_head	sr_hotplug_callbacks;
168 extern void		(*softraid_disk_attach)(struct disk *, int);
169 
170 /* scsi glue */
171 struct scsi_adapter sr_switch = {
172 	sr_scsi_cmd, sr_minphys, sr_scsi_probe, NULL, sr_scsi_ioctl
173 };
174 
175 /* native metadata format */
176 int			sr_meta_native_bootprobe(struct sr_softc *, dev_t,
177 			    struct sr_boot_chunk_head *);
178 #define SR_META_NOTCLAIMED	(0)
179 #define SR_META_CLAIMED		(1)
180 int			sr_meta_native_probe(struct sr_softc *,
181 			   struct sr_chunk *);
182 int			sr_meta_native_attach(struct sr_discipline *, int);
183 int			sr_meta_native_write(struct sr_discipline *, dev_t,
184 			    struct sr_metadata *,void *);
185 
186 #ifdef SR_DEBUG
187 void			sr_meta_print(struct sr_metadata *);
188 #else
189 #define			sr_meta_print(m)
190 #endif
191 
192 /* the metadata driver should remain stateless */
193 struct sr_meta_driver {
194 	daddr64_t		smd_offset;	/* metadata location */
195 	u_int32_t		smd_size;	/* size of metadata */
196 
197 	int			(*smd_probe)(struct sr_softc *,
198 				   struct sr_chunk *);
199 	int			(*smd_attach)(struct sr_discipline *, int);
200 	int			(*smd_detach)(struct sr_discipline *);
201 	int			(*smd_read)(struct sr_discipline *, dev_t,
202 				    struct sr_metadata *, void *);
203 	int			(*smd_write)(struct sr_discipline *, dev_t,
204 				    struct sr_metadata *, void *);
205 	int			(*smd_validate)(struct sr_discipline *,
206 				    struct sr_metadata *, void *);
207 } smd[] = {
208 	{ SR_META_OFFSET, SR_META_SIZE * 512,
209 	  sr_meta_native_probe, sr_meta_native_attach, NULL,
210 	  sr_meta_native_read, sr_meta_native_write, NULL },
211 	{ 0, 0, NULL, NULL, NULL, NULL }
212 };
213 
214 int
215 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force)
216 {
217 	struct sr_softc		*sc = sd->sd_sc;
218 	struct sr_chunk_head	*cl;
219 	struct sr_chunk		*ch_entry, *chunk1, *chunk2;
220 	int			rv = 1, i = 0;
221 
222 	DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
223 
224 	/* in memory copy of metadata */
225 	sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
226 	if (!sd->sd_meta) {
227 		printf("%s: could not allocate memory for metadata\n",
228 		    DEVNAME(sc));
229 		goto bad;
230 	}
231 
232 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
233 		/* in memory copy of foreign metadata */
234 		sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size,
235 		    M_DEVBUF, M_ZERO | M_NOWAIT);
236 		if (!sd->sd_meta_foreign) {
237 			/* unwind frees sd_meta */
238 			printf("%s: could not allocate memory for foreign "
239 			    "metadata\n", DEVNAME(sc));
240 			goto bad;
241 		}
242 	}
243 
244 	/* we have a valid list now create an array index */
245 	cl = &sd->sd_vol.sv_chunk_list;
246 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no,
247 	    M_DEVBUF, M_WAITOK | M_ZERO);
248 
249 	/* fill out chunk array */
250 	i = 0;
251 	SLIST_FOREACH(ch_entry, cl, src_link)
252 		sd->sd_vol.sv_chunks[i++] = ch_entry;
253 
254 	/* attach metadata */
255 	if (smd[sd->sd_meta_type].smd_attach(sd, force))
256 		goto bad;
257 
258 	/* Force chunks into correct order now that metadata is attached. */
259 	SLIST_FOREACH(ch_entry, cl, src_link)
260 		SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link);
261 	for (i = 0; i < chunk_no; i++) {
262 		ch_entry = sd->sd_vol.sv_chunks[i];
263 		chunk2 = NULL;
264 		SLIST_FOREACH(chunk1, cl, src_link) {
265 			if (chunk1->src_meta.scmi.scm_chunk_id >
266 			    ch_entry->src_meta.scmi.scm_chunk_id)
267 				break;
268 			chunk2 = chunk1;
269 		}
270 		if (chunk2 == NULL)
271 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
272 		else
273 			SLIST_INSERT_AFTER(chunk2, ch_entry, src_link);
274 	}
275 	i = 0;
276 	SLIST_FOREACH(ch_entry, cl, src_link)
277 		sd->sd_vol.sv_chunks[i++] = ch_entry;
278 
279 	rv = 0;
280 bad:
281 	return (rv);
282 }
283 
284 int
285 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
286 {
287 	struct sr_softc		*sc = sd->sd_sc;
288 	struct vnode		*vn;
289 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
290 	struct sr_chunk_head	*cl;
291 	char			devname[32];
292 	int			i, d, type, found, prevf, error;
293 	dev_t			dev;
294 
295 	DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
296 
297 	if (no_chunk == 0)
298 		goto unwind;
299 
300 
301 	cl = &sd->sd_vol.sv_chunk_list;
302 
303 	for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
304 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
305 		    M_WAITOK | M_ZERO);
306 		/* keep disks in user supplied order */
307 		if (ch_prev)
308 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
309 		else
310 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
311 		ch_prev = ch_entry;
312 		dev = dt[d];
313 		ch_entry->src_dev_mm = dev;
314 
315 		if (dev == NODEV) {
316 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
317 			continue;
318 		} else {
319 			sr_meta_getdevname(sc, dev, devname, sizeof(devname));
320 			if (bdevvp(dev, &vn)) {
321 				printf("%s:, sr_meta_probe: can't allocate "
322 				    "vnode\n", DEVNAME(sc));
323 				goto unwind;
324 			}
325 
326 			/*
327 			 * XXX leaving dev open for now; move this to attach
328 			 * and figure out the open/close dance for unwind.
329 			 */
330 			error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
331 			if (error) {
332 				DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
333 				    "open %s\n", DEVNAME(sc), devname);
334 				vput(vn);
335 				goto unwind;
336 			}
337 
338 			strlcpy(ch_entry->src_devname, devname,
339 			    sizeof(ch_entry->src_devname));
340 			ch_entry->src_vn = vn;
341 		}
342 
343 		/* determine if this is a device we understand */
344 		for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
345 			type = smd[i].smd_probe(sc, ch_entry);
346 			if (type == SR_META_F_INVALID)
347 				continue;
348 			else {
349 				found = type;
350 				break;
351 			}
352 		}
353 
354 		if (found == SR_META_F_INVALID)
355 			goto unwind;
356 		if (prevf == SR_META_F_INVALID)
357 			prevf = found;
358 		if (prevf != found) {
359 			DNPRINTF(SR_D_META, "%s: prevf != found\n",
360 			    DEVNAME(sc));
361 			goto unwind;
362 		}
363 	}
364 
365 	return (prevf);
366 unwind:
367 	return (SR_META_F_INVALID);
368 }
369 
370 void
371 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
372 {
373 	int			maj, unit, part;
374 	char			*name;
375 
376 	DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
377 	    DEVNAME(sc), buf, size);
378 
379 	if (!buf)
380 		return;
381 
382 	maj = major(dev);
383 	part = DISKPART(dev);
384 	unit = DISKUNIT(dev);
385 
386 	name = findblkname(maj);
387 	if (name == NULL)
388 		return;
389 
390 	snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
391 }
392 
393 int
394 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr64_t offset,
395     long flags)
396 {
397 	struct vnode		*vp;
398 	struct buf		b;
399 	size_t			bufsize, dma_bufsize;
400 	int			rv = 1;
401 	char			*dma_buf;
402 
403 	DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %d, %llu 0x%x)\n",
404 	    DEVNAME(sc), dev, buf, size, offset, flags);
405 
406 	dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size;
407 	dma_buf = dma_alloc(dma_bufsize, PR_WAITOK);
408 
409 	if (bdevvp(dev, &vp)) {
410 		printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc));
411 		goto done;
412 	}
413 
414 	while (size > 0) {
415 		DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %d, offset %llu)\n",
416 		    DEVNAME(sc), dma_buf, size, offset);
417 
418 		bufsize = (size > MAXPHYS) ? MAXPHYS : size;
419 		if (flags == B_WRITE)
420 			bcopy(buf, dma_buf, bufsize);
421 
422 		bzero(&b, sizeof(b));
423 		b.b_flags = flags | B_PHYS;
424 		b.b_proc = curproc;
425 		b.b_dev = dev;
426 		b.b_iodone = NULL;
427 		b.b_error = 0;
428 		b.b_blkno = offset;
429 		b.b_data = dma_buf;
430 		b.b_bcount = bufsize;
431 		b.b_bufsize = bufsize;
432 		b.b_resid = bufsize;
433 		b.b_vp = vp;
434 
435 		if ((b.b_flags & B_READ) == 0)
436 			vp->v_numoutput++;
437 
438 		LIST_INIT(&b.b_dep);
439 		VOP_STRATEGY(&b);
440 		biowait(&b);
441 
442 		if (b.b_flags & B_ERROR) {
443 			printf("%s: I/O error %d on dev 0x%x at block %llu\n",
444 			    DEVNAME(sc), b.b_error, dev, b.b_blkno);
445 			goto done;
446 		}
447 
448 		if (flags == B_READ)
449 			bcopy(dma_buf, buf, bufsize);
450 
451 		size -= bufsize;
452 		buf += bufsize;
453 		offset += howmany(bufsize, DEV_BSIZE);
454 	}
455 
456 	rv = 0;
457 
458 done:
459 	if (vp)
460 		vput(vp);
461 
462 	dma_free(dma_buf, dma_bufsize);
463 
464 	return (rv);
465 }
466 
467 int
468 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t size,
469     daddr64_t offset, long flags)
470 {
471 	int			rv = 1;
472 
473 	DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
474 	    DEVNAME(sd->sd_sc), dev, md, size, offset, flags);
475 
476 	if (md == NULL) {
477 		printf("%s: sr_meta_rw: invalid metadata pointer\n",
478 		    DEVNAME(sd->sd_sc));
479 		goto done;
480 	}
481 
482 	rv = sr_rw(sd->sd_sc, dev, md, size, offset, flags);
483 
484 done:
485 	return (rv);
486 }
487 
488 int
489 sr_meta_clear(struct sr_discipline *sd)
490 {
491 	struct sr_softc		*sc = sd->sd_sc;
492 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
493 	struct sr_chunk		*ch_entry;
494 	void			*m;
495 	int			rv = 1;
496 
497 	DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
498 
499 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
500 		printf("%s: sr_meta_clear can not clear foreign metadata\n",
501 		    DEVNAME(sc));
502 		goto done;
503 	}
504 
505 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
506 	SLIST_FOREACH(ch_entry, cl, src_link) {
507 		if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
508 			/* XXX mark disk offline */
509 			DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
510 			    "clear %s\n", ch_entry->src_devname);
511 			rv++;
512 			continue;
513 		}
514 		bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
515 	}
516 
517 	bzero(sd->sd_meta, SR_META_SIZE * 512);
518 
519 	free(m, M_DEVBUF);
520 	rv = 0;
521 done:
522 	return (rv);
523 }
524 
525 void
526 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl)
527 {
528 	struct sr_chunk		*ch_entry;
529 	struct sr_uuid		uuid;
530 	int			cid = 0;
531 	char			*name;
532 	u_int64_t		max_chunk_sz = 0, min_chunk_sz;
533 
534 	DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc));
535 
536 	sr_uuid_get(&uuid);
537 
538 	/* fill out stuff and get largest chunk size while looping */
539 	SLIST_FOREACH(ch_entry, cl, src_link) {
540 		name = ch_entry->src_devname;
541 		ch_entry->src_meta.scmi.scm_size = ch_entry->src_size;
542 		ch_entry->src_meta.scmi.scm_chunk_id = cid++;
543 		ch_entry->src_meta.scm_status = BIOC_SDONLINE;
544 		strlcpy(ch_entry->src_meta.scmi.scm_devname, name,
545 		    sizeof(ch_entry->src_meta.scmi.scm_devname));
546 		bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid,
547 		    sizeof(ch_entry->src_meta.scmi.scm_uuid));
548 
549 		if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz)
550 			max_chunk_sz = ch_entry->src_meta.scmi.scm_size;
551 	}
552 
553 	/* get smallest chunk size */
554 	min_chunk_sz = max_chunk_sz;
555 	SLIST_FOREACH(ch_entry, cl, src_link)
556 		if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz)
557 			min_chunk_sz = ch_entry->src_meta.scmi.scm_size;
558 
559 	/* equalize all sizes */
560 	SLIST_FOREACH(ch_entry, cl, src_link)
561 		ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz;
562 
563 	/* whine if chunks are not the same size */
564 	if (min_chunk_sz != max_chunk_sz)
565 		printf("%s: chunk sizes are not equal; up to %llu blocks "
566 		    "wasted per chunk\n",
567 		    DEVNAME(sc), max_chunk_sz - min_chunk_sz);
568 }
569 
570 void
571 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl)
572 {
573 	struct sr_softc		*sc = sd->sd_sc;
574 	struct sr_metadata	*sm = sd->sd_meta;
575 	struct sr_meta_chunk	*im_sc;
576 	int			i, chunk_no;
577 
578 	DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
579 
580 	if (!sm)
581 		return;
582 
583 	/* initial metadata */
584 	sm->ssdi.ssd_magic = SR_MAGIC;
585 	sm->ssdi.ssd_version = SR_META_VERSION;
586 	sm->ssd_ondisk = 0;
587 	sm->ssdi.ssd_vol_flags = sd->sd_meta_flags;
588 	sm->ssd_data_offset = SR_DATA_OFFSET;
589 
590 	/* get uuid from chunk 0 */
591 	bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid,
592 	    &sm->ssdi.ssd_uuid,
593 	    sizeof(struct sr_uuid));
594 
595 	/* volume is filled in createraid */
596 
597 	/* add missing chunk bits */
598 	chunk_no = sm->ssdi.ssd_chunk_no;
599 	for (i = 0; i < chunk_no; i++) {
600 		im_sc = &sd->sd_vol.sv_chunks[i]->src_meta;
601 		im_sc->scmi.scm_volid = sm->ssdi.ssd_volid;
602 		sr_checksum(sc, im_sc, &im_sc->scm_checksum,
603 		    sizeof(struct sr_meta_chunk_invariant));
604 	}
605 }
606 
607 void
608 sr_meta_opt_load(struct sr_discipline *sd, struct sr_meta_opt *om)
609 {
610 	if (om->somi.som_type == SR_OPT_BOOT) {
611 
612 
613 	} else
614 		panic("unknown optional metadata type");
615 }
616 
617 void
618 sr_meta_save_callback(void *arg1, void *arg2)
619 {
620 	struct sr_discipline	*sd = arg1;
621 	int			s;
622 
623 	s = splbio();
624 
625 	if (sr_meta_save(arg1, SR_META_DIRTY))
626 		printf("%s: save metadata failed\n",
627 		    DEVNAME(sd->sd_sc));
628 
629 	sd->sd_must_flush = 0;
630 	splx(s);
631 }
632 
633 int
634 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
635 {
636 	struct sr_softc		*sc = sd->sd_sc;
637 	struct sr_metadata	*sm = sd->sd_meta, *m;
638 	struct sr_meta_driver	*s;
639 	struct sr_chunk		*src;
640 	struct sr_meta_chunk	*cm;
641 	struct sr_workunit	wu;
642 	struct sr_meta_opt_item *omi;
643 	struct sr_meta_opt	*om;
644 	int			i;
645 
646 	DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
647 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
648 
649 	if (!sm) {
650 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
651 		goto bad;
652 	}
653 
654 	/* meta scratchpad */
655 	s = &smd[sd->sd_meta_type];
656 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
657 	if (!m) {
658 		printf("%s: could not allocate metadata scratch area\n",
659 		    DEVNAME(sc));
660 		goto bad;
661 	}
662 
663 	/* from here on out metadata is updated */
664 restart:
665 	sm->ssd_ondisk++;
666 	sm->ssd_meta_flags = flags;
667 	bcopy(sm, m, sizeof(*m));
668 
669 	/* Chunk metadata. */
670 	cm = (struct sr_meta_chunk *)(m + 1);
671 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
672 		src = sd->sd_vol.sv_chunks[i];
673 		bcopy(&src->src_meta, cm, sizeof(*cm));
674 		cm++;
675 	}
676 
677 	/* Optional metadata. */
678 	om = (struct sr_meta_opt *)(cm);
679 	SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) {
680 		bcopy(&omi->omi_om, om, sizeof(*om));
681 		sr_checksum(sc, om, &om->som_checksum,
682 		    sizeof(struct sr_meta_opt_invariant));
683 		om++;
684 	}
685 
686 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
687 		src = sd->sd_vol.sv_chunks[i];
688 
689 		/* skip disks that are offline */
690 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
691 			continue;
692 
693 		/* calculate metadata checksum for correct chunk */
694 		m->ssdi.ssd_chunk_id = i;
695 		sr_checksum(sc, m, &m->ssd_checksum,
696 		    sizeof(struct sr_meta_invariant));
697 
698 #ifdef SR_DEBUG
699 		DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
700 		    "chunkid: %d checksum: ",
701 		    DEVNAME(sc), src->src_meta.scmi.scm_devname,
702 		    m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
703 
704 		if (sr_debug & SR_D_META)
705 			sr_checksum_print((u_int8_t *)&m->ssd_checksum);
706 		DNPRINTF(SR_D_META, "\n");
707 		sr_meta_print(m);
708 #endif
709 
710 		/* translate and write to disk */
711 		if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
712 			printf("%s: could not write metadata to %s\n",
713 			    DEVNAME(sc), src->src_devname);
714 			/* restart the meta write */
715 			src->src_meta.scm_status = BIOC_SDOFFLINE;
716 			/* XXX recalculate volume status */
717 			goto restart;
718 		}
719 	}
720 
721 	/* not all disciplines have sync */
722 	if (sd->sd_scsi_sync) {
723 		bzero(&wu, sizeof(wu));
724 		wu.swu_fake = 1;
725 		wu.swu_dis = sd;
726 		sd->sd_scsi_sync(&wu);
727 	}
728 	free(m, M_DEVBUF);
729 	return (0);
730 bad:
731 	return (1);
732 }
733 
734 int
735 sr_meta_read(struct sr_discipline *sd)
736 {
737 #ifdef SR_DEBUG
738 	struct sr_softc		*sc = sd->sd_sc;
739 #endif
740 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
741 	struct sr_metadata	*sm;
742 	struct sr_chunk		*ch_entry;
743 	struct sr_meta_chunk	*cp;
744 	struct sr_meta_driver	*s;
745 	struct sr_meta_opt_item *omi;
746 	struct sr_meta_opt	*om;
747 	void			*fm = NULL;
748 	int			i, no_disk = 0, got_meta = 0;
749 
750 	DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
751 
752 	sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
753 	s = &smd[sd->sd_meta_type];
754 	if (sd->sd_meta_type != SR_META_F_NATIVE)
755 		fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO);
756 
757 	cp = (struct sr_meta_chunk *)(sm + 1);
758 	SLIST_FOREACH(ch_entry, cl, src_link) {
759 		/* skip disks that are offline */
760 		if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) {
761 			DNPRINTF(SR_D_META,
762 			    "%s: %s chunk marked offline, spoofing status\n",
763 			    DEVNAME(sc), ch_entry->src_devname);
764 			cp++; /* adjust chunk pointer to match failure */
765 			continue;
766 		} else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
767 			/* read and translate */
768 			/* XXX mark chunk offline, elsewhere!! */
769 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
770 			cp++; /* adjust chunk pointer to match failure */
771 			DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
772 			    DEVNAME(sc));
773 			continue;
774 		}
775 
776 		if (sm->ssdi.ssd_magic != SR_MAGIC) {
777 			DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
778 			    DEVNAME(sc));
779 			continue;
780 		}
781 
782 		/* validate metadata */
783 		if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
784 			DNPRINTF(SR_D_META, "%s: invalid metadata\n",
785 			    DEVNAME(sc));
786 			no_disk = -1;
787 			goto done;
788 		}
789 
790 		/* assume first chunk contains metadata */
791 		if (got_meta == 0) {
792 			bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
793 			got_meta = 1;
794 		}
795 
796 		bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
797 
798 		/* Process optional metadata. */
799 		om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) +
800 		    sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
801 		for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
802 
803 			omi = malloc(sizeof(struct sr_meta_opt_item),
804 			    M_DEVBUF, M_WAITOK | M_ZERO);
805 			bcopy(om, &omi->omi_om, sizeof(struct sr_meta_opt));
806 			SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link);
807 
808 			/* See if discipline wants to handle it. */
809 			if (sd->sd_meta_opt_load &&
810 			    sd->sd_meta_opt_load(sd, &omi->omi_om) == 0)
811 				continue;
812 			else
813 				sr_meta_opt_load(sd, &omi->omi_om);
814 
815 			om++;
816 		}
817 
818 		cp++;
819 		no_disk++;
820 	}
821 
822 	free(sm, M_DEVBUF);
823 	if (fm)
824 		free(fm, M_DEVBUF);
825 
826 done:
827 	DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
828 	    no_disk);
829 	return (no_disk);
830 }
831 
832 int
833 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
834     void *fm)
835 {
836 	struct sr_softc		*sc = sd->sd_sc;
837 	struct sr_meta_driver	*s;
838 #ifdef SR_DEBUG
839 	struct sr_meta_chunk	*mc;
840 #endif
841 	char			devname[32];
842 	int			rv = 1;
843 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
844 
845 	DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
846 
847 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
848 
849 	s = &smd[sd->sd_meta_type];
850 	if (sd->sd_meta_type != SR_META_F_NATIVE)
851 		if (s->smd_validate(sd, sm, fm)) {
852 			printf("%s: invalid foreign metadata\n", DEVNAME(sc));
853 			goto done;
854 		}
855 
856 	/*
857 	 * at this point all foreign metadata has been translated to the native
858 	 * format and will be treated just like the native format
859 	 */
860 
861 	if (sm->ssdi.ssd_magic != SR_MAGIC) {
862 		printf("%s: not valid softraid metadata\n", DEVNAME(sc));
863 		goto done;
864 	}
865 
866 	/* Verify metadata checksum. */
867 	sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
868 	if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
869 		printf("%s: invalid metadata checksum\n", DEVNAME(sc));
870 		goto done;
871 	}
872 
873 	/* Handle changes between versions. */
874 	if (sm->ssdi.ssd_version == 3) {
875 
876 		/*
877 		 * Version 3 - update metadata version and fix up data offset
878 		 * value since this did not exist in version 3.
879 		 */
880 		sm->ssdi.ssd_version = SR_META_VERSION;
881 		snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
882 		    "%03d", SR_META_VERSION);
883 		if (sm->ssd_data_offset == 0)
884 			sm->ssd_data_offset = SR_META_V3_DATA_OFFSET;
885 
886 	} else if (sm->ssdi.ssd_version == SR_META_VERSION) {
887 
888 		/*
889 		 * Version 4 - original metadata format did not store
890 		 * data offset so fix this up if necessary.
891 		 */
892 		if (sm->ssd_data_offset == 0)
893 			sm->ssd_data_offset = SR_DATA_OFFSET;
894 
895 	} else {
896 
897 		printf("%s: %s can not read metadata version %u, expected %u\n",
898 		    DEVNAME(sc), devname, sm->ssdi.ssd_version,
899 		    SR_META_VERSION);
900 		goto done;
901 
902 	}
903 
904 #ifdef SR_DEBUG
905 	/* warn if disk changed order */
906 	mc = (struct sr_meta_chunk *)(sm + 1);
907 	if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
908 	    sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
909 		DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n",
910 		    DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname,
911 		    devname);
912 #endif
913 
914 	/* we have meta data on disk */
915 	DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
916 	    DEVNAME(sc), devname);
917 
918 	rv = 0;
919 done:
920 	return (rv);
921 }
922 
923 int
924 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
925     struct sr_boot_chunk_head *bch)
926 {
927 	struct vnode		*vn;
928 	struct disklabel	label;
929 	struct sr_metadata	*md = NULL;
930 	struct sr_discipline	*fake_sd = NULL;
931 	struct sr_boot_chunk	*bc;
932 	char			devname[32];
933 	dev_t			chrdev, rawdev;
934 	int			error, i;
935 	int			rv = SR_META_NOTCLAIMED;
936 
937 	DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
938 
939 	/*
940 	 * Use character raw device to avoid SCSI complaints about missing
941 	 * media on removable media devices.
942 	 */
943 	chrdev = blktochr(devno);
944 	rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART);
945 	if (cdevvp(rawdev, &vn)) {
946 		printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n",
947 		    DEVNAME(sc));
948 		goto done;
949 	}
950 
951 	/* open device */
952 	error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
953 	if (error) {
954 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
955 		    "failed\n", DEVNAME(sc));
956 		vput(vn);
957 		goto done;
958 	}
959 
960 	/* get disklabel */
961 	error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED,
962 	    curproc);
963 	if (error) {
964 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
965 		    "failed\n", DEVNAME(sc));
966 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
967 		vput(vn);
968 		goto done;
969 	}
970 
971 	/* we are done, close device */
972 	error = VOP_CLOSE(vn, FREAD, NOCRED, curproc);
973 	if (error) {
974 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
975 		    "failed\n", DEVNAME(sc));
976 		vput(vn);
977 		goto done;
978 	}
979 	vput(vn);
980 
981 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
982 	if (md == NULL) {
983 		printf("%s: not enough memory for metadata buffer\n",
984 		    DEVNAME(sc));
985 		goto done;
986 	}
987 
988 	/* create fake sd to use utility functions */
989 	fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF,
990 	    M_ZERO | M_NOWAIT);
991 	if (fake_sd == NULL) {
992 		printf("%s: not enough memory for fake discipline\n",
993 		    DEVNAME(sc));
994 		goto done;
995 	}
996 	fake_sd->sd_sc = sc;
997 	fake_sd->sd_meta_type = SR_META_F_NATIVE;
998 
999 	for (i = 0; i < MAXPARTITIONS; i++) {
1000 		if (label.d_partitions[i].p_fstype != FS_RAID)
1001 			continue;
1002 
1003 		/* open partition */
1004 		rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i);
1005 		if (bdevvp(rawdev, &vn)) {
1006 			printf("%s:, sr_meta_native_bootprobe: can't allocate "
1007 			    "vnode for partition\n", DEVNAME(sc));
1008 			goto done;
1009 		}
1010 		error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1011 		if (error) {
1012 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
1013 			    "open failed, partition %d\n",
1014 			    DEVNAME(sc), i);
1015 			vput(vn);
1016 			continue;
1017 		}
1018 
1019 		if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) {
1020 			printf("%s: native bootprobe could not read native "
1021 			    "metadata\n", DEVNAME(sc));
1022 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1023 			vput(vn);
1024 			continue;
1025 		}
1026 
1027 		/* are we a softraid partition? */
1028 		if (md->ssdi.ssd_magic != SR_MAGIC) {
1029 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1030 			vput(vn);
1031 			continue;
1032 		}
1033 
1034 		sr_meta_getdevname(sc, rawdev, devname, sizeof(devname));
1035 		if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) {
1036 			if (md->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE) {
1037 				DNPRINTF(SR_D_META, "%s: don't save %s\n",
1038 				    DEVNAME(sc), devname);
1039 			} else {
1040 				/* XXX fix M_WAITOK, this is boot time */
1041 				bc = malloc(sizeof(*bc), M_DEVBUF,
1042 				    M_WAITOK | M_ZERO);
1043 				bcopy(md, &bc->sbc_metadata,
1044 				    sizeof(bc->sbc_metadata));
1045 				bc->sbc_mm = rawdev;
1046 				SLIST_INSERT_HEAD(bch, bc, sbc_link);
1047 				rv = SR_META_CLAIMED;
1048 			}
1049 		}
1050 
1051 		/* we are done, close partition */
1052 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1053 		vput(vn);
1054 	}
1055 
1056 done:
1057 	if (fake_sd)
1058 		free(fake_sd, M_DEVBUF);
1059 	if (md)
1060 		free(md, M_DEVBUF);
1061 
1062 	return (rv);
1063 }
1064 
1065 int
1066 sr_boot_assembly(struct sr_softc *sc)
1067 {
1068 	struct sr_boot_volume_head bvh;
1069 	struct sr_boot_chunk_head bch, kdh;
1070 	struct sr_boot_volume	*bv, *bv1, *bv2;
1071 	struct sr_boot_chunk	*bc, *bcnext, *bc1, *bc2;
1072 	struct sr_disk_head	sdklist;
1073 	struct sr_disk		*sdk;
1074 	struct disk		*dk;
1075 	struct bioc_createraid	bcr;
1076 	struct sr_meta_chunk	*hm;
1077 	struct sr_chunk_head	*cl;
1078 	struct sr_chunk		*hotspare, *chunk, *last;
1079 	u_int64_t		*ondisk = NULL;
1080 	dev_t			*devs = NULL;
1081 	char			devname[32];
1082 	int			rv = 0, i;
1083 
1084 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
1085 
1086 	SLIST_INIT(&sdklist);
1087 	SLIST_INIT(&bvh);
1088 	SLIST_INIT(&bch);
1089 	SLIST_INIT(&kdh);
1090 
1091 	dk = TAILQ_FIRST(&disklist);
1092 	while (dk != TAILQ_END(&disklist)) {
1093 
1094 		/* See if this disk has been checked. */
1095 		SLIST_FOREACH(sdk, &sdklist, sdk_link)
1096 			if (sdk->sdk_devno == dk->dk_devno)
1097 				break;
1098 
1099 		if (sdk != NULL) {
1100 			dk = TAILQ_NEXT(dk, dk_link);
1101 			continue;
1102 		}
1103 
1104 		/* Add this disk to the list that we've checked. */
1105 		sdk = malloc(sizeof(struct sr_disk), M_DEVBUF,
1106 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1107 		if (sdk == NULL)
1108 			goto unwind;
1109 		sdk->sdk_devno = dk->dk_devno;
1110 		SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link);
1111 
1112 		/* Only check sd(4) and wd(4) devices. */
1113 		if (strncmp(dk->dk_name, "sd", 2) &&
1114 		    strncmp(dk->dk_name, "wd", 2)) {
1115 			dk = TAILQ_NEXT(dk, dk_link);
1116 			continue;
1117 		}
1118 
1119 		/* native softraid uses partitions */
1120 		sr_meta_native_bootprobe(sc, dk->dk_devno, &bch);
1121 
1122 		/* probe non-native disks if native failed. */
1123 
1124 		/* Restart scan since we may have slept. */
1125 		dk = TAILQ_FIRST(&disklist);
1126 	}
1127 
1128 	/*
1129 	 * Create a list of volumes and associate chunks with each volume.
1130 	 */
1131 	for (bc = SLIST_FIRST(&bch); bc != SLIST_END(&bch); bc = bcnext) {
1132 
1133 		bcnext = SLIST_NEXT(bc, sbc_link);
1134 		SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link);
1135 		bc->sbc_chunk_id = bc->sbc_metadata.ssdi.ssd_chunk_id;
1136 
1137 		/* Handle key disks separately. */
1138 		if (bc->sbc_metadata.ssdi.ssd_level == SR_KEYDISK_LEVEL) {
1139 			SLIST_INSERT_HEAD(&kdh, bc, sbc_link);
1140 			continue;
1141 		}
1142 
1143 		SLIST_FOREACH(bv, &bvh, sbv_link) {
1144 			if (bcmp(&bc->sbc_metadata.ssdi.ssd_uuid,
1145 			    &bv->sbv_uuid,
1146 			    sizeof(bc->sbc_metadata.ssdi.ssd_uuid)) == 0)
1147 				break;
1148 		}
1149 
1150 		if (bv == NULL) {
1151 			bv = malloc(sizeof(struct sr_boot_volume),
1152 			    M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO);
1153 			if (bv == NULL) {
1154 				printf("%s: failed to allocate boot volume!\n",
1155 				    DEVNAME(sc));
1156 				goto unwind;
1157 			}
1158 
1159 			bv->sbv_level = bc->sbc_metadata.ssdi.ssd_level;
1160 			bv->sbv_volid = bc->sbc_metadata.ssdi.ssd_volid;
1161 			bv->sbv_chunk_no = bc->sbc_metadata.ssdi.ssd_chunk_no;
1162 			bcopy(&bc->sbc_metadata.ssdi.ssd_uuid, &bv->sbv_uuid,
1163 			    sizeof(bc->sbc_metadata.ssdi.ssd_uuid));
1164 			SLIST_INIT(&bv->sbv_chunks);
1165 
1166 			/* Maintain volume order. */
1167 			bv2 = NULL;
1168 			SLIST_FOREACH(bv1, &bvh, sbv_link) {
1169 				if (bv1->sbv_volid > bv->sbv_volid)
1170 					break;
1171 				bv2 = bv1;
1172 			}
1173 			if (bv2 == NULL) {
1174 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1175 				    "at head\n", DEVNAME(sc), bv->sbv_volid);
1176 				SLIST_INSERT_HEAD(&bvh, bv, sbv_link);
1177 			} else {
1178 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1179 				    "after %u\n", DEVNAME(sc), bv->sbv_volid,
1180 				    bv2->sbv_volid);
1181 				SLIST_INSERT_AFTER(bv2, bv, sbv_link);
1182 			}
1183 		}
1184 
1185 		/* Maintain chunk order. */
1186 		bc2 = NULL;
1187 		SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) {
1188 			if (bc1->sbc_chunk_id > bc->sbc_chunk_id)
1189 				break;
1190 			bc2 = bc1;
1191 		}
1192 		if (bc2 == NULL) {
1193 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1194 			    "at head\n", DEVNAME(sc), bv->sbv_volid,
1195 			    bc->sbc_chunk_id);
1196 			SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link);
1197 		} else {
1198 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1199 			    "after %u\n", DEVNAME(sc), bv->sbv_volid,
1200 			    bc->sbc_chunk_id, bc2->sbc_chunk_id);
1201 			SLIST_INSERT_AFTER(bc2, bc, sbc_link);
1202 		}
1203 
1204 		bv->sbv_dev_no++;
1205 	}
1206 
1207 	/* Allocate memory for device and ondisk version arrays. */
1208 	devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF,
1209 	    M_NOWAIT | M_CANFAIL);
1210 	if (devs == NULL) {
1211 		printf("%s: failed to allocate device array\n", DEVNAME(sc));
1212 		goto unwind;
1213 	}
1214 	ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF,
1215 	    M_NOWAIT | M_CANFAIL);
1216 	if (ondisk == NULL) {
1217 		printf("%s: failed to allocate ondisk array\n", DEVNAME(sc));
1218 		goto unwind;
1219 	}
1220 
1221 	/*
1222 	 * Assemble hotspare "volumes".
1223 	 */
1224 	SLIST_FOREACH(bv, &bvh, sbv_link) {
1225 
1226 		/* Check if this is a hotspare "volume". */
1227 		if (bv->sbv_level != SR_HOTSPARE_LEVEL ||
1228 		    bv->sbv_chunk_no != 1)
1229 			continue;
1230 
1231 #ifdef SR_DEBUG
1232 		DNPRINTF(SR_D_META, "%s: assembling hotspare volume ",
1233 		    DEVNAME(sc));
1234 		if (sr_debug & SR_D_META)
1235 			sr_uuid_print(&bv->sbv_uuid, 0);
1236 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1237 		    bv->sbv_volid, bv->sbv_chunk_no);
1238 #endif
1239 
1240 		/* Create hotspare chunk metadata. */
1241 		hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF,
1242 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1243 		if (hotspare == NULL) {
1244 			printf("%s: failed to allocate hotspare\n",
1245 			    DEVNAME(sc));
1246 			goto unwind;
1247 		}
1248 
1249 		bc = SLIST_FIRST(&bv->sbv_chunks);
1250 		sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname));
1251 		hotspare->src_dev_mm = bc->sbc_mm;
1252 		strlcpy(hotspare->src_devname, devname,
1253 		    sizeof(hotspare->src_devname));
1254 		hotspare->src_size = bc->sbc_metadata.ssdi.ssd_size;
1255 
1256 		hm = &hotspare->src_meta;
1257 		hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
1258 		hm->scmi.scm_chunk_id = 0;
1259 		hm->scmi.scm_size = bc->sbc_metadata.ssdi.ssd_size;
1260 		hm->scmi.scm_coerced_size = bc->sbc_metadata.ssdi.ssd_size;
1261 		strlcpy(hm->scmi.scm_devname, devname,
1262 		    sizeof(hm->scmi.scm_devname));
1263 		bcopy(&bc->sbc_metadata.ssdi.ssd_uuid, &hm->scmi.scm_uuid,
1264 		    sizeof(struct sr_uuid));
1265 
1266 		sr_checksum(sc, hm, &hm->scm_checksum,
1267 		    sizeof(struct sr_meta_chunk_invariant));
1268 
1269 		hm->scm_status = BIOC_SDHOTSPARE;
1270 
1271 		/* Add chunk to hotspare list. */
1272 		rw_enter_write(&sc->sc_hs_lock);
1273 		cl = &sc->sc_hotspare_list;
1274 		if (SLIST_EMPTY(cl))
1275 			SLIST_INSERT_HEAD(cl, hotspare, src_link);
1276 		else {
1277 			SLIST_FOREACH(chunk, cl, src_link)
1278 				last = chunk;
1279 			SLIST_INSERT_AFTER(last, hotspare, src_link);
1280 		}
1281 		sc->sc_hotspare_no++;
1282 		rw_exit_write(&sc->sc_hs_lock);
1283 
1284 	}
1285 
1286 	/*
1287 	 * Assemble RAID volumes.
1288 	 */
1289 	SLIST_FOREACH(bv, &bvh, sbv_link) {
1290 
1291 		bzero(&bc, sizeof(bc));
1292 
1293 		/* Check if this is a hotspare "volume". */
1294 		if (bv->sbv_level == SR_HOTSPARE_LEVEL &&
1295 		    bv->sbv_chunk_no == 1)
1296 			continue;
1297 
1298 #ifdef SR_DEBUG
1299 		DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc));
1300 		if (sr_debug & SR_D_META)
1301 			sr_uuid_print(&bv->sbv_uuid, 0);
1302 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1303 		    bv->sbv_volid, bv->sbv_chunk_no);
1304 #endif
1305 
1306 		/*
1307 		 * If this is a crypto volume, try to find a matching
1308 		 * key disk...
1309 		 */
1310 		bcr.bc_key_disk = NODEV;
1311 		if (bv->sbv_level == 'C') {
1312 			SLIST_FOREACH(bc, &kdh, sbc_link) {
1313 				if (bcmp(&bc->sbc_metadata.ssdi.ssd_uuid,
1314 				    &bv->sbv_uuid,
1315 				    sizeof(bc->sbc_metadata.ssdi.ssd_uuid))
1316 				    == 0)
1317 					bcr.bc_key_disk = bc->sbc_mm;
1318 			}
1319 		}
1320 
1321 		for (i = 0; i < BIOC_CRMAXLEN; i++) {
1322 			devs[i] = NODEV; /* mark device as illegal */
1323 			ondisk[i] = 0;
1324 		}
1325 
1326 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
1327 			if (devs[bc->sbc_chunk_id] != NODEV) {
1328 				bv->sbv_dev_no--;
1329 				sr_meta_getdevname(sc, bc->sbc_mm, devname,
1330 				    sizeof(devname));
1331 				printf("%s: found duplicate chunk %u for "
1332 				    "volume %u on device %s\n", DEVNAME(sc),
1333 				    bc->sbc_chunk_id, bv->sbv_volid, devname);
1334 			}
1335 
1336 			if (devs[bc->sbc_chunk_id] == NODEV ||
1337 			    bc->sbc_metadata.ssd_ondisk >
1338 			    ondisk[bc->sbc_chunk_id]) {
1339 				devs[bc->sbc_chunk_id] = bc->sbc_mm;
1340 				ondisk[bc->sbc_chunk_id] =
1341 				    bc->sbc_metadata.ssd_ondisk;
1342 				DNPRINTF(SR_D_META, "%s: using ondisk "
1343 				    "metadata version %llu for chunk %u\n",
1344 				    DEVNAME(sc), ondisk[bc->sbc_chunk_id],
1345 				    bc->sbc_chunk_id);
1346 			}
1347 		}
1348 
1349 		if (bv->sbv_chunk_no != bv->sbv_dev_no) {
1350 			printf("%s: not all chunks were provided; "
1351 			    "attempting to bring volume %d online\n",
1352 			    DEVNAME(sc), bv->sbv_volid);
1353 		}
1354 
1355 		bcr.bc_level = bv->sbv_level;
1356 		bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t);
1357 		bcr.bc_dev_list = devs;
1358 		bcr.bc_flags = BIOC_SCDEVT;
1359 
1360 		rw_enter_write(&sc->sc_lock);
1361 		sr_ioctl_createraid(sc, &bcr, 0);
1362 		rw_exit_write(&sc->sc_lock);
1363 
1364 		rv++;
1365 	}
1366 
1367 	/* done with metadata */
1368 unwind:
1369 	/* Free boot volumes and associated chunks. */
1370 	for (bv1 = SLIST_FIRST(&bvh); bv1 != SLIST_END(&bvh); bv1 = bv2) {
1371 		bv2 = SLIST_NEXT(bv1, sbv_link);
1372 		for (bc1 = SLIST_FIRST(&bv1->sbv_chunks);
1373 		    bc1 != SLIST_END(&bv1->sbv_chunks); bc1 = bc2) {
1374 			bc2 = SLIST_NEXT(bc1, sbc_link);
1375 			free(bc1, M_DEVBUF);
1376 		}
1377 		free(bv1, M_DEVBUF);
1378 	}
1379 	/* Free keydisks chunks. */
1380 	for (bc1 = SLIST_FIRST(&kdh); bc1 != SLIST_END(&kdh); bc1 = bc2) {
1381 		bc2 = SLIST_NEXT(bc1, sbc_link);
1382 		free(bc1, M_DEVBUF);
1383 	}
1384 	/* Free unallocated chunks. */
1385 	for (bc1 = SLIST_FIRST(&bch); bc1 != SLIST_END(&bch); bc1 = bc2) {
1386 		bc2 = SLIST_NEXT(bc1, sbc_link);
1387 		free(bc1, M_DEVBUF);
1388 	}
1389 
1390 	while (!SLIST_EMPTY(&sdklist)) {
1391 		sdk = SLIST_FIRST(&sdklist);
1392 		SLIST_REMOVE_HEAD(&sdklist, sdk_link);
1393 		free(sdk, M_DEVBUF);
1394 	}
1395 
1396 	if (devs)
1397 		free(devs, M_DEVBUF);
1398 	if (ondisk)
1399 		free(ondisk, M_DEVBUF);
1400 
1401 	return (rv);
1402 }
1403 
1404 int
1405 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
1406 {
1407 	struct disklabel	label;
1408 	char			*devname;
1409 	int			error, part;
1410 	daddr64_t		size;
1411 
1412 	DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
1413 	   DEVNAME(sc), ch_entry->src_devname);
1414 
1415 	devname = ch_entry->src_devname;
1416 	part = DISKPART(ch_entry->src_dev_mm);
1417 
1418 	/* get disklabel */
1419 	error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD,
1420 	    NOCRED, curproc);
1421 	if (error) {
1422 		DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
1423 		    DEVNAME(sc), devname);
1424 		goto unwind;
1425 	}
1426 
1427 	/* make sure the partition is of the right type */
1428 	if (label.d_partitions[part].p_fstype != FS_RAID) {
1429 		DNPRINTF(SR_D_META,
1430 		    "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc),
1431 		    devname,
1432 		    label.d_partitions[part].p_fstype);
1433 		goto unwind;
1434 	}
1435 
1436 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
1437 	if (size <= 0) {
1438 		DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
1439 		    devname);
1440 		goto unwind;
1441 	}
1442 	ch_entry->src_size = size;
1443 
1444 	DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
1445 	    devname, size);
1446 
1447 	return (SR_META_F_NATIVE);
1448 unwind:
1449 	DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
1450 	    devname ? devname : "nodev");
1451 	return (SR_META_F_INVALID);
1452 }
1453 
1454 int
1455 sr_meta_native_attach(struct sr_discipline *sd, int force)
1456 {
1457 	struct sr_softc		*sc = sd->sd_sc;
1458 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
1459 	struct sr_metadata	*md = NULL;
1460 	struct sr_chunk		*ch_entry, *ch_next;
1461 	struct sr_uuid		uuid;
1462 	u_int64_t		version = 0;
1463 	int			sr, not_sr, rv = 1, d, expected = -1, old_meta = 0;
1464 
1465 	DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
1466 
1467 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
1468 	if (md == NULL) {
1469 		printf("%s: not enough memory for metadata buffer\n",
1470 		    DEVNAME(sc));
1471 		goto bad;
1472 	}
1473 
1474 	bzero(&uuid, sizeof uuid);
1475 
1476 	sr = not_sr = d = 0;
1477 	SLIST_FOREACH(ch_entry, cl, src_link) {
1478 		if (ch_entry->src_dev_mm == NODEV)
1479 			continue;
1480 
1481 		if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
1482 			printf("%s: could not read native metadata\n",
1483 			    DEVNAME(sc));
1484 			goto bad;
1485 		}
1486 
1487 		if (md->ssdi.ssd_magic == SR_MAGIC) {
1488 			sr++;
1489 			ch_entry->src_meta.scmi.scm_chunk_id =
1490 			    md->ssdi.ssd_chunk_id;
1491 			if (d == 0) {
1492 				bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
1493 				expected = md->ssdi.ssd_chunk_no;
1494 				version = md->ssd_ondisk;
1495 				d++;
1496 				continue;
1497 			} else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
1498 			    sizeof uuid)) {
1499 				printf("%s: not part of the same volume\n",
1500 				    DEVNAME(sc));
1501 				goto bad;
1502 			}
1503 			if (md->ssd_ondisk != version) {
1504 				old_meta++;
1505 				version = MAX(md->ssd_ondisk, version);
1506 			}
1507 		} else
1508 			not_sr++;
1509 	}
1510 
1511 	if (sr && not_sr) {
1512 		printf("%s: not all chunks are of the native metadata format\n",
1513 		    DEVNAME(sc));
1514 		goto bad;
1515 	}
1516 
1517 	/* mixed metadata versions; mark bad disks offline */
1518 	if (old_meta) {
1519 		d = 0;
1520 		for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl);
1521 		    ch_entry = ch_next, d++) {
1522 			ch_next = SLIST_NEXT(ch_entry, src_link);
1523 
1524 			/* XXX do we want to read this again? */
1525 			if (ch_entry->src_dev_mm == NODEV)
1526 				panic("src_dev_mm == NODEV");
1527 			if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md,
1528 			    NULL))
1529 				printf("%s: could not read native metadata\n",
1530 				    DEVNAME(sc));
1531 			if (md->ssd_ondisk != version)
1532 				sd->sd_vol.sv_chunks[d]->src_meta.scm_status =
1533 				    BIOC_SDOFFLINE;
1534 		}
1535 	}
1536 
1537 	if (expected != sr && !force && expected != -1) {
1538 		DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying "
1539 		    "anyway\n", DEVNAME(sc));
1540 	}
1541 
1542 	rv = 0;
1543 bad:
1544 	if (md)
1545 		free(md, M_DEVBUF);
1546 	return (rv);
1547 }
1548 
1549 int
1550 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
1551     struct sr_metadata *md, void *fm)
1552 {
1553 #ifdef SR_DEBUG
1554 	struct sr_softc		*sc = sd->sd_sc;
1555 #endif
1556 	DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
1557 	    DEVNAME(sc), dev, md);
1558 
1559 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1560 	    B_READ));
1561 }
1562 
1563 int
1564 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
1565     struct sr_metadata *md, void *fm)
1566 {
1567 #ifdef SR_DEBUG
1568 	struct sr_softc		*sc = sd->sd_sc;
1569 #endif
1570 	DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
1571 	    DEVNAME(sc), dev, md);
1572 
1573 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1574 	    B_WRITE));
1575 }
1576 
1577 void
1578 sr_hotplug_register(struct sr_discipline *sd, void *func)
1579 {
1580 	struct sr_hotplug_list	*mhe;
1581 
1582 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n",
1583 	    DEVNAME(sd->sd_sc), func);
1584 
1585 	/* make sure we aren't on the list yet */
1586 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1587 		if (mhe->sh_hotplug == func)
1588 			return;
1589 
1590 	mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF,
1591 	    M_WAITOK | M_ZERO);
1592 	mhe->sh_hotplug = func;
1593 	mhe->sh_sd = sd;
1594 	SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link);
1595 }
1596 
1597 void
1598 sr_hotplug_unregister(struct sr_discipline *sd, void *func)
1599 {
1600 	struct sr_hotplug_list	*mhe;
1601 
1602 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n",
1603 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func);
1604 
1605 	/* make sure we are on the list yet */
1606 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1607 		if (mhe->sh_hotplug == func) {
1608 			SLIST_REMOVE(&sr_hotplug_callbacks, mhe,
1609 			    sr_hotplug_list, shl_link);
1610 			free(mhe, M_DEVBUF);
1611 			if (SLIST_EMPTY(&sr_hotplug_callbacks))
1612 				SLIST_INIT(&sr_hotplug_callbacks);
1613 			return;
1614 		}
1615 }
1616 
1617 void
1618 sr_disk_attach(struct disk *diskp, int action)
1619 {
1620 	struct sr_hotplug_list	*mhe;
1621 
1622 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1623 		if (mhe->sh_sd->sd_ready)
1624 			mhe->sh_hotplug(mhe->sh_sd, diskp, action);
1625 }
1626 
1627 int
1628 sr_match(struct device *parent, void *match, void *aux)
1629 {
1630 	return (1);
1631 }
1632 
1633 void
1634 sr_attach(struct device *parent, struct device *self, void *aux)
1635 {
1636 	struct sr_softc		*sc = (void *)self;
1637 	struct scsibus_attach_args saa;
1638 
1639 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
1640 
1641 	rw_init(&sc->sc_lock, "sr_lock");
1642 	rw_init(&sc->sc_hs_lock, "sr_hs_lock");
1643 
1644 	SLIST_INIT(&sr_hotplug_callbacks);
1645 	SLIST_INIT(&sc->sc_hotspare_list);
1646 
1647 #if NBIO > 0
1648 	if (bio_register(&sc->sc_dev, sr_ioctl) != 0)
1649 		printf("%s: controller registration failed", DEVNAME(sc));
1650 	else
1651 		sc->sc_ioctl = sr_ioctl;
1652 #endif /* NBIO > 0 */
1653 
1654 #ifndef SMALL_KERNEL
1655 	strlcpy(sc->sc_sensordev.xname, DEVNAME(sc),
1656 	    sizeof(sc->sc_sensordev.xname));
1657 	sensordev_install(&sc->sc_sensordev);
1658 #endif /* SMALL_KERNEL */
1659 
1660 	printf("\n");
1661 
1662 	sc->sc_link.adapter_softc = sc;
1663 	sc->sc_link.adapter = &sr_switch;
1664 	sc->sc_link.adapter_target = SR_MAX_LD;
1665 	sc->sc_link.adapter_buswidth = SR_MAX_LD;
1666 	sc->sc_link.luns = 1;
1667 
1668 	bzero(&saa, sizeof(saa));
1669 	saa.saa_sc_link = &sc->sc_link;
1670 
1671 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
1672 	    &saa, scsiprint);
1673 
1674 	softraid_disk_attach = sr_disk_attach;
1675 
1676 	sc->sc_shutdownhook = shutdownhook_establish(sr_shutdownhook, sc);
1677 
1678 	sr_boot_assembly(sc);
1679 }
1680 
1681 int
1682 sr_detach(struct device *self, int flags)
1683 {
1684 	struct sr_softc		*sc = (void *)self;
1685 	int			rv;
1686 
1687 	DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc));
1688 
1689 	if (sc->sc_shutdownhook)
1690 		shutdownhook_disestablish(sc->sc_shutdownhook);
1691 
1692 	sr_shutdown(sc);
1693 
1694 #ifndef SMALL_KERNEL
1695 	if (sc->sc_sensor_task != NULL)
1696 		sensor_task_unregister(sc->sc_sensor_task);
1697 	sensordev_deinstall(&sc->sc_sensordev);
1698 #endif /* SMALL_KERNEL */
1699 
1700 	if (sc->sc_scsibus != NULL) {
1701 		rv = config_detach((struct device *)sc->sc_scsibus, flags);
1702 		if (rv != 0)
1703 			return (rv);
1704 		sc->sc_scsibus = NULL;
1705 	}
1706 
1707 	return (rv);
1708 }
1709 
1710 void
1711 sr_minphys(struct buf *bp, struct scsi_link *sl)
1712 {
1713 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
1714 
1715 	/* XXX currently using SR_MAXFER = MAXPHYS */
1716 	if (bp->b_bcount > SR_MAXFER)
1717 		bp->b_bcount = SR_MAXFER;
1718 	minphys(bp);
1719 }
1720 
1721 void
1722 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
1723 {
1724 	size_t			copy_cnt;
1725 
1726 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
1727 	    xs, size);
1728 
1729 	if (xs->datalen) {
1730 		copy_cnt = MIN(size, xs->datalen);
1731 		bcopy(v, xs->data, copy_cnt);
1732 	}
1733 }
1734 
1735 int
1736 sr_ccb_alloc(struct sr_discipline *sd)
1737 {
1738 	struct sr_ccb		*ccb;
1739 	int			i;
1740 
1741 	if (!sd)
1742 		return (1);
1743 
1744 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
1745 
1746 	if (sd->sd_ccb)
1747 		return (1);
1748 
1749 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
1750 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
1751 	TAILQ_INIT(&sd->sd_ccb_freeq);
1752 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
1753 		ccb = &sd->sd_ccb[i];
1754 		ccb->ccb_dis = sd;
1755 		sr_ccb_put(ccb);
1756 	}
1757 
1758 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
1759 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
1760 
1761 	return (0);
1762 }
1763 
1764 void
1765 sr_ccb_free(struct sr_discipline *sd)
1766 {
1767 	struct sr_ccb		*ccb;
1768 
1769 	if (!sd)
1770 		return;
1771 
1772 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
1773 
1774 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
1775 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1776 
1777 	if (sd->sd_ccb)
1778 		free(sd->sd_ccb, M_DEVBUF);
1779 }
1780 
1781 struct sr_ccb *
1782 sr_ccb_get(struct sr_discipline *sd)
1783 {
1784 	struct sr_ccb		*ccb;
1785 	int			s;
1786 
1787 	s = splbio();
1788 
1789 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
1790 	if (ccb) {
1791 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1792 		ccb->ccb_state = SR_CCB_INPROGRESS;
1793 	}
1794 
1795 	splx(s);
1796 
1797 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
1798 	    ccb);
1799 
1800 	return (ccb);
1801 }
1802 
1803 void
1804 sr_ccb_put(struct sr_ccb *ccb)
1805 {
1806 	struct sr_discipline	*sd = ccb->ccb_dis;
1807 	int			s;
1808 
1809 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
1810 	    ccb);
1811 
1812 	s = splbio();
1813 
1814 	ccb->ccb_wu = NULL;
1815 	ccb->ccb_state = SR_CCB_FREE;
1816 	ccb->ccb_target = -1;
1817 	ccb->ccb_opaque = NULL;
1818 
1819 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
1820 
1821 	splx(s);
1822 }
1823 
1824 int
1825 sr_wu_alloc(struct sr_discipline *sd)
1826 {
1827 	struct sr_workunit	*wu;
1828 	int			i, no_wu;
1829 
1830 	if (!sd)
1831 		return (1);
1832 
1833 	DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
1834 	    sd, sd->sd_max_wu);
1835 
1836 	if (sd->sd_wu)
1837 		return (1);
1838 
1839 	no_wu = sd->sd_max_wu;
1840 	sd->sd_wu_pending = no_wu;
1841 
1842 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
1843 	    M_DEVBUF, M_WAITOK | M_ZERO);
1844 	TAILQ_INIT(&sd->sd_wu_freeq);
1845 	TAILQ_INIT(&sd->sd_wu_pendq);
1846 	TAILQ_INIT(&sd->sd_wu_defq);
1847 	for (i = 0; i < no_wu; i++) {
1848 		wu = &sd->sd_wu[i];
1849 		wu->swu_dis = sd;
1850 		sr_wu_put(sd, wu);
1851 	}
1852 
1853 	return (0);
1854 }
1855 
1856 void
1857 sr_wu_free(struct sr_discipline *sd)
1858 {
1859 	struct sr_workunit	*wu;
1860 
1861 	if (!sd)
1862 		return;
1863 
1864 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
1865 
1866 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
1867 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1868 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
1869 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
1870 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
1871 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
1872 
1873 	if (sd->sd_wu)
1874 		free(sd->sd_wu, M_DEVBUF);
1875 }
1876 
1877 void
1878 sr_wu_put(void *xsd, void *xwu)
1879 {
1880 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
1881 	struct sr_workunit	*wu = (struct sr_workunit *)xwu;
1882 	struct sr_ccb		*ccb;
1883 
1884 	int			s;
1885 
1886 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
1887 
1888 	s = splbio();
1889 	if (wu->swu_cb_active == 1)
1890 		panic("%s: sr_wu_put got active wu", DEVNAME(sd->sd_sc));
1891 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1892 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1893 		sr_ccb_put(ccb);
1894 	}
1895 	splx(s);
1896 
1897 	bzero(wu, sizeof(*wu));
1898 	TAILQ_INIT(&wu->swu_ccb);
1899 	wu->swu_dis = sd;
1900 
1901 	mtx_enter(&sd->sd_wu_mtx);
1902 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
1903 	sd->sd_wu_pending--;
1904 	mtx_leave(&sd->sd_wu_mtx);
1905 }
1906 
1907 void *
1908 sr_wu_get(void *xsd)
1909 {
1910 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
1911 	struct sr_workunit	*wu;
1912 
1913 	mtx_enter(&sd->sd_wu_mtx);
1914 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
1915 	if (wu) {
1916 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1917 		sd->sd_wu_pending++;
1918 	}
1919 	mtx_leave(&sd->sd_wu_mtx);
1920 
1921 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
1922 
1923 	return (wu);
1924 }
1925 
1926 void
1927 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs)
1928 {
1929 	DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs);
1930 
1931 	scsi_done(xs);
1932 }
1933 
1934 void
1935 sr_scsi_cmd(struct scsi_xfer *xs)
1936 {
1937 	int			s;
1938 	struct scsi_link	*link = xs->sc_link;
1939 	struct sr_softc		*sc = link->adapter_softc;
1940 	struct sr_workunit	*wu = NULL;
1941 	struct sr_discipline	*sd;
1942 	struct sr_ccb		*ccb;
1943 
1944 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: target %d xs: %p "
1945 	    "flags: %#x\n", DEVNAME(sc), link->target, xs, xs->flags);
1946 
1947 	sd = sc->sc_dis[link->target];
1948 	if (sd == NULL) {
1949 		printf("%s: sr_scsi_cmd NULL discipline\n", DEVNAME(sc));
1950 		goto stuffup;
1951 	}
1952 
1953 	if (sd->sd_deleted) {
1954 		printf("%s: %s device is being deleted, failing io\n",
1955 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
1956 		goto stuffup;
1957 	}
1958 
1959 	wu = xs->io;
1960 	/* scsi layer *can* re-send wu without calling sr_wu_put(). */
1961 	s = splbio();
1962 	if (wu->swu_cb_active == 1)
1963 		panic("%s: sr_scsi_cmd got active wu", DEVNAME(sd->sd_sc));
1964 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1965 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1966 		sr_ccb_put(ccb);
1967 	}
1968 	splx(s);
1969 
1970 	bzero(wu, sizeof(*wu));
1971 	TAILQ_INIT(&wu->swu_ccb);
1972 	wu->swu_state = SR_WU_INPROGRESS;
1973 	wu->swu_dis = sd;
1974 	wu->swu_xs = xs;
1975 
1976 	switch (xs->cmd->opcode) {
1977 	case READ_COMMAND:
1978 	case READ_BIG:
1979 	case READ_16:
1980 	case WRITE_COMMAND:
1981 	case WRITE_BIG:
1982 	case WRITE_16:
1983 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
1984 		    DEVNAME(sc), xs->cmd->opcode);
1985 		if (sd->sd_scsi_rw(wu))
1986 			goto stuffup;
1987 		break;
1988 
1989 	case SYNCHRONIZE_CACHE:
1990 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
1991 		    DEVNAME(sc));
1992 		if (sd->sd_scsi_sync(wu))
1993 			goto stuffup;
1994 		goto complete;
1995 
1996 	case TEST_UNIT_READY:
1997 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
1998 		    DEVNAME(sc));
1999 		if (sd->sd_scsi_tur(wu))
2000 			goto stuffup;
2001 		goto complete;
2002 
2003 	case START_STOP:
2004 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
2005 		    DEVNAME(sc));
2006 		if (sd->sd_scsi_start_stop(wu))
2007 			goto stuffup;
2008 		goto complete;
2009 
2010 	case INQUIRY:
2011 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
2012 		    DEVNAME(sc));
2013 		if (sd->sd_scsi_inquiry(wu))
2014 			goto stuffup;
2015 		goto complete;
2016 
2017 	case READ_CAPACITY:
2018 	case READ_CAPACITY_16:
2019 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
2020 		    DEVNAME(sc), xs->cmd->opcode);
2021 		if (sd->sd_scsi_read_cap(wu))
2022 			goto stuffup;
2023 		goto complete;
2024 
2025 	case REQUEST_SENSE:
2026 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
2027 		    DEVNAME(sc));
2028 		if (sd->sd_scsi_req_sense(wu))
2029 			goto stuffup;
2030 		goto complete;
2031 
2032 	default:
2033 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
2034 		    DEVNAME(sc), xs->cmd->opcode);
2035 		/* XXX might need to add generic function to handle others */
2036 		goto stuffup;
2037 	}
2038 
2039 	return;
2040 stuffup:
2041 	if (sd && sd->sd_scsi_sense.error_code) {
2042 		xs->error = XS_SENSE;
2043 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
2044 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2045 	} else {
2046 		xs->error = XS_DRIVER_STUFFUP;
2047 	}
2048 complete:
2049 	sr_scsi_done(sd, xs);
2050 }
2051 
2052 int
2053 sr_scsi_probe(struct scsi_link *link)
2054 {
2055 	struct sr_softc		*sc = link->adapter_softc;
2056 	struct sr_discipline	*sd;
2057 
2058 	KASSERT(link->target < SR_MAX_LD && link->lun == 0);
2059 
2060 	sd = sc->sc_dis[link->target];
2061 	if (sd == NULL)
2062 		return (ENODEV);
2063 
2064 	link->pool = &sd->sd_iopool;
2065 	if (sd->sd_openings)
2066 		link->openings = sd->sd_openings(sd);
2067 	else
2068 		link->openings = sd->sd_max_wu;
2069 
2070 	return (0);
2071 }
2072 
2073 int
2074 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
2075 {
2076 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
2077 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
2078 
2079 	switch (cmd) {
2080 	case DIOCGCACHE:
2081 	case DIOCSCACHE:
2082 		return (EOPNOTSUPP);
2083 	default:
2084 		return (sr_ioctl(link->adapter_softc, cmd, addr));
2085 	}
2086 }
2087 
2088 int
2089 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr)
2090 {
2091 	struct sr_softc		*sc = (struct sr_softc *)dev;
2092 	int			rv = 0;
2093 
2094 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc));
2095 
2096 	rw_enter_write(&sc->sc_lock);
2097 
2098 	switch (cmd) {
2099 	case BIOCINQ:
2100 		DNPRINTF(SR_D_IOCTL, "inq\n");
2101 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
2102 		break;
2103 
2104 	case BIOCVOL:
2105 		DNPRINTF(SR_D_IOCTL, "vol\n");
2106 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
2107 		break;
2108 
2109 	case BIOCDISK:
2110 		DNPRINTF(SR_D_IOCTL, "disk\n");
2111 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
2112 		break;
2113 
2114 	case BIOCALARM:
2115 		DNPRINTF(SR_D_IOCTL, "alarm\n");
2116 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
2117 		break;
2118 
2119 	case BIOCBLINK:
2120 		DNPRINTF(SR_D_IOCTL, "blink\n");
2121 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
2122 		break;
2123 
2124 	case BIOCSETSTATE:
2125 		DNPRINTF(SR_D_IOCTL, "setstate\n");
2126 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
2127 		break;
2128 
2129 	case BIOCCREATERAID:
2130 		DNPRINTF(SR_D_IOCTL, "createraid\n");
2131 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
2132 		break;
2133 
2134 	case BIOCDELETERAID:
2135 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
2136 		break;
2137 
2138 	case BIOCDISCIPLINE:
2139 		rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr);
2140 		break;
2141 
2142 	case BIOCINSTALLBOOT:
2143 		rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr);
2144 		break;
2145 
2146 	default:
2147 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
2148 		rv = ENOTTY;
2149 	}
2150 
2151 	rw_exit_write(&sc->sc_lock);
2152 
2153 	return (rv);
2154 }
2155 
2156 int
2157 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
2158 {
2159 	int			i, vol, disk;
2160 
2161 	for (i = 0, vol = 0, disk = 0; i < SR_MAX_LD; i++)
2162 		/* XXX this will not work when we stagger disciplines */
2163 		if (sc->sc_dis[i]) {
2164 			vol++;
2165 			disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
2166 		}
2167 
2168 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
2169 	bi->bi_novol = vol + sc->sc_hotspare_no;
2170 	bi->bi_nodisk = disk + sc->sc_hotspare_no;
2171 
2172 	return (0);
2173 }
2174 
2175 int
2176 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
2177 {
2178 	int			i, vol, rv = EINVAL;
2179 	struct sr_discipline	*sd;
2180 	struct sr_chunk		*hotspare;
2181 	daddr64_t		rb, sz;
2182 
2183 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2184 		/* XXX this will not work when we stagger disciplines */
2185 		if (sc->sc_dis[i])
2186 			vol++;
2187 		if (vol != bv->bv_volid)
2188 			continue;
2189 
2190 		if (sc->sc_dis[i] == NULL)
2191 			goto done;
2192 
2193 		sd = sc->sc_dis[i];
2194 		bv->bv_status = sd->sd_vol_status;
2195 		bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
2196 		bv->bv_level = sd->sd_meta->ssdi.ssd_level;
2197 		bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
2198 
2199 #ifdef CRYPTO
2200 		if (sd->sd_meta->ssdi.ssd_level == 'C' &&
2201 		    sd->mds.mdd_crypto.key_disk != NULL)
2202 			bv->bv_nodisk++;
2203 #endif
2204 
2205 		if (bv->bv_status == BIOC_SVREBUILD) {
2206 			sz = sd->sd_meta->ssdi.ssd_size;
2207 			rb = sd->sd_meta->ssd_rebuild;
2208 			if (rb > 0)
2209 				bv->bv_percent = 100 -
2210 				    ((sz * 100 - rb * 100) / sz) - 1;
2211 			else
2212 				bv->bv_percent = 0;
2213 		}
2214 		strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
2215 		    sizeof(bv->bv_dev));
2216 		strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
2217 		    sizeof(bv->bv_vendor));
2218 		rv = 0;
2219 		goto done;
2220 	}
2221 
2222 	/* Check hotspares list. */
2223 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2224 		vol++;
2225 		if (vol != bv->bv_volid)
2226 			continue;
2227 
2228 		bv->bv_status = BIOC_SVONLINE;
2229 		bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2230 		bv->bv_level = -1;	/* Hotspare. */
2231 		bv->bv_nodisk = 1;
2232 		strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname,
2233 		    sizeof(bv->bv_dev));
2234 		strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname,
2235 		    sizeof(bv->bv_vendor));
2236 		rv = 0;
2237 		goto done;
2238 	}
2239 
2240 done:
2241 	return (rv);
2242 }
2243 
2244 int
2245 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
2246 {
2247 	int			i, vol, rv = EINVAL, id;
2248 	struct sr_chunk		*src, *hotspare;
2249 
2250 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2251 		/* XXX this will not work when we stagger disciplines */
2252 		if (sc->sc_dis[i])
2253 			vol++;
2254 		if (vol != bd->bd_volid)
2255 			continue;
2256 
2257 		if (sc->sc_dis[i] == NULL)
2258 			goto done;
2259 
2260 		id = bd->bd_diskid;
2261 
2262 		if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
2263 			src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
2264 #ifdef CRYPTO
2265 		else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no &&
2266 		    sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' &&
2267 		    sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL)
2268 			src = sc->sc_dis[i]->mds.mdd_crypto.key_disk;
2269 #endif
2270 		else
2271 			break;
2272 
2273 		bd->bd_status = src->src_meta.scm_status;
2274 		bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
2275 		bd->bd_channel = vol;
2276 		bd->bd_target = id;
2277 		strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
2278 		    sizeof(bd->bd_vendor));
2279 		rv = 0;
2280 		goto done;
2281 	}
2282 
2283 	/* Check hotspares list. */
2284 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2285 		vol++;
2286 		if (vol != bd->bd_volid)
2287 			continue;
2288 
2289 		if (bd->bd_diskid != 0)
2290 			break;
2291 
2292 		bd->bd_status = hotspare->src_meta.scm_status;
2293 		bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2294 		bd->bd_channel = vol;
2295 		bd->bd_target = bd->bd_diskid;
2296 		strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname,
2297 		    sizeof(bd->bd_vendor));
2298 		rv = 0;
2299 		goto done;
2300 	}
2301 
2302 done:
2303 	return (rv);
2304 }
2305 
2306 int
2307 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
2308 {
2309 	int			rv = EINVAL;
2310 	int			i, vol, found, c;
2311 	struct sr_discipline	*sd = NULL;
2312 	struct sr_chunk		*ch_entry;
2313 	struct sr_chunk_head	*cl;
2314 
2315 	if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED)
2316 		goto done;
2317 
2318 	if (bs->bs_status == BIOC_SSHOTSPARE) {
2319 		rv = sr_hotspare(sc, (dev_t)bs->bs_other_id);
2320 		goto done;
2321 	}
2322 
2323 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2324 		/* XXX this will not work when we stagger disciplines */
2325 		if (sc->sc_dis[i])
2326 			vol++;
2327 		if (vol != bs->bs_volid)
2328 			continue;
2329 		sd = sc->sc_dis[i];
2330 		break;
2331 	}
2332 	if (sd == NULL)
2333 		goto done;
2334 
2335 	switch (bs->bs_status) {
2336 	case BIOC_SSOFFLINE:
2337 		/* Take chunk offline */
2338 		found = c = 0;
2339 		cl = &sd->sd_vol.sv_chunk_list;
2340 		SLIST_FOREACH(ch_entry, cl, src_link) {
2341 			if (ch_entry->src_dev_mm == bs->bs_other_id) {
2342 				found = 1;
2343 				break;
2344 			}
2345 			c++;
2346 		}
2347 		if (found == 0) {
2348 			printf("%s: chunk not part of array\n", DEVNAME(sc));
2349 			goto done;
2350 		}
2351 
2352 		/* XXX: check current state first */
2353 		sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE);
2354 
2355 		if (sr_meta_save(sd, SR_META_DIRTY)) {
2356 			printf("%s: could not save metadata to %s\n",
2357 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
2358 			goto done;
2359 		}
2360 		rv = 0;
2361 		break;
2362 
2363 	case BIOC_SDSCRUB:
2364 		break;
2365 
2366 	case BIOC_SSREBUILD:
2367 		rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0);
2368 		break;
2369 
2370 	default:
2371 		printf("%s: unsupported state request %d\n",
2372 		    DEVNAME(sc), bs->bs_status);
2373 	}
2374 
2375 done:
2376 	return (rv);
2377 }
2378 
2379 int
2380 sr_chunk_in_use(struct sr_softc *sc, dev_t dev)
2381 {
2382 	struct sr_discipline	*sd;
2383 	struct sr_chunk		*chunk;
2384 	int			i, c;
2385 
2386 	DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev);
2387 
2388 	if (dev == NODEV)
2389 		return BIOC_SDINVALID;
2390 
2391 	/* See if chunk is already in use. */
2392 	for (i = 0; i < SR_MAX_LD; i++) {
2393 		if (sc->sc_dis[i] == NULL)
2394 			continue;
2395 		sd = sc->sc_dis[i];
2396 		for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) {
2397 			chunk = sd->sd_vol.sv_chunks[c];
2398 			if (chunk->src_dev_mm == dev)
2399 				return chunk->src_meta.scm_status;
2400 		}
2401 	}
2402 
2403 	/* Check hotspares list. */
2404 	SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link)
2405 		if (chunk->src_dev_mm == dev)
2406 			return chunk->src_meta.scm_status;
2407 
2408 	return BIOC_SDINVALID;
2409 }
2410 
2411 int
2412 sr_hotspare(struct sr_softc *sc, dev_t dev)
2413 {
2414 	struct sr_discipline	*sd = NULL;
2415 	struct sr_metadata	*sm = NULL;
2416 	struct sr_meta_chunk    *hm;
2417 	struct sr_chunk_head	*cl;
2418 	struct sr_chunk		*chunk, *last, *hotspare = NULL;
2419 	struct sr_uuid		uuid;
2420 	struct disklabel	label;
2421 	struct vnode		*vn;
2422 	daddr64_t		size;
2423 	char			devname[32];
2424 	int			rv = EINVAL;
2425 	int			c, part, open = 0;
2426 
2427 	/*
2428 	 * Add device to global hotspares list.
2429 	 */
2430 
2431 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2432 
2433 	/* Make sure chunk is not already in use. */
2434 	c = sr_chunk_in_use(sc, dev);
2435 	if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) {
2436 		if (c == BIOC_SDHOTSPARE)
2437 			printf("%s: %s is already a hotspare\n",
2438 			    DEVNAME(sc), devname);
2439 		else
2440 			printf("%s: %s is already in use\n",
2441 			    DEVNAME(sc), devname);
2442 		goto done;
2443 	}
2444 
2445 	/* XXX - See if there is an existing degraded volume... */
2446 
2447 	/* Open device. */
2448 	if (bdevvp(dev, &vn)) {
2449 		printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc));
2450 		goto done;
2451 	}
2452 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2453 		DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
2454 		    DEVNAME(sc), devname);
2455 		vput(vn);
2456 		goto fail;
2457 	}
2458 	open = 1; /* close dev on error */
2459 
2460 	/* Get partition details. */
2461 	part = DISKPART(dev);
2462 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2463 	    NOCRED, curproc)) {
2464 		DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n",
2465 		    DEVNAME(sc));
2466 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2467 		vput(vn);
2468 		goto fail;
2469 	}
2470 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2471 		printf("%s: %s partition not of type RAID (%d)\n",
2472 		    DEVNAME(sc), devname,
2473 		    label.d_partitions[part].p_fstype);
2474 		goto fail;
2475 	}
2476 
2477 	/* Calculate partition size. */
2478 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2479 
2480 	/*
2481 	 * Create and populate chunk metadata.
2482 	 */
2483 
2484 	sr_uuid_get(&uuid);
2485 	hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO);
2486 
2487 	hotspare->src_dev_mm = dev;
2488 	hotspare->src_vn = vn;
2489 	strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname));
2490 	hotspare->src_size = size;
2491 
2492 	hm = &hotspare->src_meta;
2493 	hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
2494 	hm->scmi.scm_chunk_id = 0;
2495 	hm->scmi.scm_size = size;
2496 	hm->scmi.scm_coerced_size = size;
2497 	strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname));
2498 	bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid));
2499 
2500 	sr_checksum(sc, hm, &hm->scm_checksum,
2501 	    sizeof(struct sr_meta_chunk_invariant));
2502 
2503 	hm->scm_status = BIOC_SDHOTSPARE;
2504 
2505 	/*
2506 	 * Create and populate our own discipline and metadata.
2507 	 */
2508 
2509 	sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO);
2510 	sm->ssdi.ssd_magic = SR_MAGIC;
2511 	sm->ssdi.ssd_version = SR_META_VERSION;
2512 	sm->ssd_ondisk = 0;
2513 	sm->ssdi.ssd_vol_flags = 0;
2514 	bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid));
2515 	sm->ssdi.ssd_chunk_no = 1;
2516 	sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID;
2517 	sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL;
2518 	sm->ssdi.ssd_size = size;
2519 	strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
2520 	snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
2521 	    "SR %s", "HOTSPARE");
2522 	snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
2523 	    "%03d", SR_META_VERSION);
2524 
2525 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
2526 	sd->sd_sc = sc;
2527 	sd->sd_meta = sm;
2528 	sd->sd_meta_type = SR_META_F_NATIVE;
2529 	sd->sd_vol_status = BIOC_SVONLINE;
2530 	strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name));
2531 	SLIST_INIT(&sd->sd_meta_opt);
2532 
2533 	/* Add chunk to volume. */
2534 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF,
2535 	    M_WAITOK | M_ZERO);
2536 	sd->sd_vol.sv_chunks[0] = hotspare;
2537 	SLIST_INIT(&sd->sd_vol.sv_chunk_list);
2538 	SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link);
2539 
2540 	/* Save metadata. */
2541 	if (sr_meta_save(sd, SR_META_DIRTY)) {
2542 		printf("%s: could not save metadata to %s\n",
2543 		    DEVNAME(sc), devname);
2544 		goto fail;
2545 	}
2546 
2547 	/*
2548 	 * Add chunk to hotspare list.
2549 	 */
2550 	rw_enter_write(&sc->sc_hs_lock);
2551 	cl = &sc->sc_hotspare_list;
2552 	if (SLIST_EMPTY(cl))
2553 		SLIST_INSERT_HEAD(cl, hotspare, src_link);
2554 	else {
2555 		SLIST_FOREACH(chunk, cl, src_link)
2556 			last = chunk;
2557 		SLIST_INSERT_AFTER(last, hotspare, src_link);
2558 	}
2559 	sc->sc_hotspare_no++;
2560 	rw_exit_write(&sc->sc_hs_lock);
2561 
2562 	rv = 0;
2563 	goto done;
2564 
2565 fail:
2566 	if (hotspare)
2567 		free(hotspare, M_DEVBUF);
2568 
2569 done:
2570 	if (sd && sd->sd_vol.sv_chunks)
2571 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
2572 	if (sd)
2573 		free(sd, M_DEVBUF);
2574 	if (sm)
2575 		free(sm, M_DEVBUF);
2576 	if (open) {
2577 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2578 		vput(vn);
2579 	}
2580 
2581 	return (rv);
2582 }
2583 
2584 void
2585 sr_hotspare_rebuild_callback(void *arg1, void *arg2)
2586 {
2587 	sr_hotspare_rebuild((struct sr_discipline *)arg1);
2588 }
2589 
2590 void
2591 sr_hotspare_rebuild(struct sr_discipline *sd)
2592 {
2593 	struct sr_chunk_head	*cl;
2594 	struct sr_chunk		*hotspare, *chunk = NULL;
2595 	struct sr_workunit	*wu;
2596 	struct sr_ccb		*ccb;
2597 	int			i, s, chunk_no, busy;
2598 
2599 	/*
2600 	 * Attempt to locate a hotspare and initiate rebuild.
2601 	 */
2602 
2603 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
2604 		if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
2605 		    BIOC_SDOFFLINE) {
2606 			chunk_no = i;
2607 			chunk = sd->sd_vol.sv_chunks[i];
2608 			break;
2609 		}
2610 	}
2611 
2612 	if (chunk == NULL) {
2613 		printf("%s: no offline chunk found on %s!\n",
2614 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
2615 		return;
2616 	}
2617 
2618 	/* See if we have a suitable hotspare... */
2619 	rw_enter_write(&sd->sd_sc->sc_hs_lock);
2620 	cl = &sd->sd_sc->sc_hotspare_list;
2621 	SLIST_FOREACH(hotspare, cl, src_link)
2622 		if (hotspare->src_size >= chunk->src_size)
2623 			break;
2624 
2625 	if (hotspare != NULL) {
2626 
2627 		printf("%s: %s volume degraded, will attempt to "
2628 		    "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc),
2629 		    sd->sd_meta->ssd_devname, hotspare->src_devname);
2630 
2631 		/*
2632 		 * Ensure that all pending I/O completes on the failed chunk
2633 		 * before trying to initiate a rebuild.
2634 		 */
2635 		i = 0;
2636 		do {
2637 			busy = 0;
2638 
2639 			s = splbio();
2640 			if (wu->swu_cb_active == 1)
2641 				panic("%s: sr_hotspare_rebuild",
2642 				    DEVNAME(sd->sd_sc));
2643 			TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
2644 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2645 					if (ccb->ccb_target == chunk_no)
2646 						busy = 1;
2647 				}
2648 			}
2649 			TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) {
2650 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2651 					if (ccb->ccb_target == chunk_no)
2652 						busy = 1;
2653 				}
2654 			}
2655 			splx(s);
2656 
2657 			if (busy) {
2658 				tsleep(sd, PRIBIO, "sr_hotspare", hz);
2659 				i++;
2660 			}
2661 
2662 		} while (busy && i < 120);
2663 
2664 		DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to "
2665 		    "complete on failed chunk %s\n", DEVNAME(sd->sd_sc),
2666 		    i, chunk->src_devname);
2667 
2668 		if (busy) {
2669 			printf("%s: pending I/O failed to complete on "
2670 			    "failed chunk %s, hotspare rebuild aborted...\n",
2671 			    DEVNAME(sd->sd_sc), chunk->src_devname);
2672 			goto done;
2673 		}
2674 
2675 		s = splbio();
2676 		rw_enter_write(&sd->sd_sc->sc_lock);
2677 		if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) {
2678 
2679 			/* Remove hotspare from available list. */
2680 			sd->sd_sc->sc_hotspare_no--;
2681 			SLIST_REMOVE(cl, hotspare, sr_chunk, src_link);
2682 			free(hotspare, M_DEVBUF);
2683 
2684 		}
2685 		rw_exit_write(&sd->sd_sc->sc_lock);
2686 		splx(s);
2687 	}
2688 done:
2689 	rw_exit_write(&sd->sd_sc->sc_hs_lock);
2690 }
2691 
2692 int
2693 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare)
2694 {
2695 	struct sr_softc		*sc = sd->sd_sc;
2696 	int			rv = EINVAL, part;
2697 	int			c, found, open = 0;
2698 	char			devname[32];
2699 	struct vnode		*vn;
2700 	daddr64_t		size, csize;
2701 	struct disklabel	label;
2702 	struct sr_meta_chunk	*old, *new;
2703 
2704 	/*
2705 	 * Attempt to initiate a rebuild onto the specified device.
2706 	 */
2707 
2708 	if (!(sd->sd_capabilities & SR_CAP_REBUILD)) {
2709 		printf("%s: discipline does not support rebuild\n",
2710 		    DEVNAME(sc));
2711 		goto done;
2712 	}
2713 
2714 	/* make sure volume is in the right state */
2715 	if (sd->sd_vol_status == BIOC_SVREBUILD) {
2716 		printf("%s: rebuild already in progress\n", DEVNAME(sc));
2717 		goto done;
2718 	}
2719 	if (sd->sd_vol_status != BIOC_SVDEGRADED) {
2720 		printf("%s: %s not degraded\n", DEVNAME(sc),
2721 		    sd->sd_meta->ssd_devname);
2722 		goto done;
2723 	}
2724 
2725 	/* find offline chunk */
2726 	for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
2727 		if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
2728 		    BIOC_SDOFFLINE) {
2729 			found = c;
2730 			new = &sd->sd_vol.sv_chunks[c]->src_meta;
2731 			if (c > 0)
2732 				break; /* roll at least once over the for */
2733 		} else {
2734 			csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size;
2735 			old = &sd->sd_vol.sv_chunks[c]->src_meta;
2736 			if (found != -1)
2737 				break;
2738 		}
2739 	if (found == -1) {
2740 		printf("%s: no offline chunks available for rebuild\n",
2741 		    DEVNAME(sc));
2742 		goto done;
2743 	}
2744 
2745 	/* populate meta entry */
2746 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2747 	if (bdevvp(dev, &vn)) {
2748 		printf("%s:, sr_rebuild_init: can't allocate vnode\n",
2749 		    DEVNAME(sc));
2750 		goto done;
2751 	}
2752 
2753 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2754 		DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
2755 		    "open %s\n", DEVNAME(sc), devname);
2756 		vput(vn);
2757 		goto done;
2758 	}
2759 	open = 1; /* close dev on error */
2760 
2761 	/* get partition */
2762 	part = DISKPART(dev);
2763 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2764 	    NOCRED, curproc)) {
2765 		DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n",
2766 		    DEVNAME(sc));
2767 		goto done;
2768 	}
2769 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2770 		printf("%s: %s partition not of type RAID (%d)\n",
2771 		    DEVNAME(sc), devname,
2772 		    label.d_partitions[part].p_fstype);
2773 		goto done;
2774 	}
2775 
2776 	/* is partition large enough? */
2777 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2778 	if (size < csize) {
2779 		printf("%s: partition too small, at least %llu B required\n",
2780 		    DEVNAME(sc), csize << DEV_BSHIFT);
2781 		goto done;
2782 	} else if (size > csize)
2783 		printf("%s: partition too large, wasting %llu B\n",
2784 		    DEVNAME(sc), (size - csize) << DEV_BSHIFT);
2785 
2786 	/* make sure we are not stomping on some other partition */
2787 	c = sr_chunk_in_use(sc, dev);
2788 	if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE &&
2789 	    !(hotspare && c == BIOC_SDHOTSPARE)) {
2790 		printf("%s: %s is already in use\n", DEVNAME(sc), devname);
2791 		goto done;
2792 	}
2793 
2794 	/* Reset rebuild counter since we rebuilding onto a new chunk. */
2795 	sd->sd_meta->ssd_rebuild = 0;
2796 
2797 	/* recreate metadata */
2798 	open = 0; /* leave dev open from here on out */
2799 	sd->sd_vol.sv_chunks[found]->src_dev_mm = dev;
2800 	sd->sd_vol.sv_chunks[found]->src_vn = vn;
2801 	new->scmi.scm_volid = old->scmi.scm_volid;
2802 	new->scmi.scm_chunk_id = found;
2803 	strlcpy(new->scmi.scm_devname, devname,
2804 	    sizeof new->scmi.scm_devname);
2805 	new->scmi.scm_size = size;
2806 	new->scmi.scm_coerced_size = old->scmi.scm_coerced_size;
2807 	bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid,
2808 	    sizeof new->scmi.scm_uuid);
2809 	sr_checksum(sc, new, &new->scm_checksum,
2810 	    sizeof(struct sr_meta_chunk_invariant));
2811 	sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD);
2812 	if (sr_meta_save(sd, SR_META_DIRTY)) {
2813 		printf("%s: could not save metadata to %s\n",
2814 		    DEVNAME(sc), devname);
2815 		open = 1;
2816 		goto done;
2817 	}
2818 
2819 	printf("%s: rebuild of %s started on %s\n", DEVNAME(sc),
2820 	    sd->sd_meta->ssd_devname, devname);
2821 
2822 	sd->sd_reb_abort = 0;
2823 	kthread_create_deferred(sr_rebuild, sd);
2824 
2825 	rv = 0;
2826 done:
2827 	if (open) {
2828 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2829 		vput(vn);
2830 	}
2831 
2832 	return (rv);
2833 }
2834 
2835 void
2836 sr_roam_chunks(struct sr_discipline *sd)
2837 {
2838 	struct sr_softc		*sc = sd->sd_sc;
2839 	struct sr_chunk		*chunk;
2840 	struct sr_meta_chunk	*meta;
2841 	int			roamed = 0;
2842 
2843 	/* Have any chunks roamed? */
2844 	SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) {
2845 		meta = &chunk->src_meta;
2846 		if (strncmp(meta->scmi.scm_devname, chunk->src_devname,
2847 		    sizeof(meta->scmi.scm_devname))) {
2848 
2849 			printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
2850 			    meta->scmi.scm_devname, chunk->src_devname);
2851 
2852 			strlcpy(meta->scmi.scm_devname, chunk->src_devname,
2853 			    sizeof(meta->scmi.scm_devname));
2854 
2855 			roamed++;
2856 		}
2857 	}
2858 
2859 	if (roamed)
2860 		sr_meta_save(sd, SR_META_DIRTY);
2861 }
2862 
2863 int
2864 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
2865 {
2866 	dev_t			*dt;
2867 	int			i, no_chunk, rv = EINVAL, target, vol;
2868 	int			no_meta, updatemeta = 0;
2869 	struct sr_chunk_head	*cl;
2870 	struct sr_discipline	*sd = NULL;
2871 	struct sr_chunk		*ch_entry;
2872 	struct scsi_link	*link;
2873 	struct device		*dev;
2874 	char			devname[32];
2875 
2876 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
2877 	    DEVNAME(sc), user);
2878 
2879 	/* user input */
2880 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
2881 		goto unwind;
2882 
2883 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
2884 	if (user) {
2885 		if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0)
2886 			goto unwind;
2887 	} else
2888 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
2889 
2890 	/* Initialise discipline. */
2891 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
2892 	sd->sd_sc = sc;
2893 	SLIST_INIT(&sd->sd_meta_opt);
2894 	sd->sd_workq = workq_create("srdis", 1, IPL_BIO);
2895 	if (sd->sd_workq == NULL) {
2896 		printf("%s: could not create workq\n", DEVNAME(sc));
2897 		goto unwind;
2898 	}
2899 	if (sr_discipline_init(sd, bc->bc_level)) {
2900 		printf("%s: could not initialize discipline\n", DEVNAME(sc));
2901 		goto unwind;
2902 	}
2903 
2904 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
2905 	cl = &sd->sd_vol.sv_chunk_list;
2906 	SLIST_INIT(cl);
2907 
2908 	/* Ensure that chunks are not already in use. */
2909 	for (i = 0; i < no_chunk; i++) {
2910 		if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) {
2911 			sr_meta_getdevname(sc, dt[i], devname, sizeof(devname));
2912 			printf("%s: chunk %s already in use\n",
2913 			    DEVNAME(sc), devname);
2914 			goto unwind;
2915 		}
2916 	}
2917 
2918 	sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
2919 	if (sd->sd_meta_type == SR_META_F_INVALID) {
2920 		printf("%s: invalid metadata format\n", DEVNAME(sc));
2921 		goto unwind;
2922 	}
2923 
2924 	if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) {
2925 		printf("%s: can't attach metadata type %d\n", DEVNAME(sc),
2926 		    sd->sd_meta_type);
2927 		goto unwind;
2928 	}
2929 
2930 	/* force the raid volume by clearing metadata region */
2931 	if (bc->bc_flags & BIOC_SCFORCE) {
2932 		/* make sure disk isn't up and running */
2933 		if (sr_meta_read(sd))
2934 			if (sr_already_assembled(sd)) {
2935 				printf("%s: disk ", DEVNAME(sc));
2936 				sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
2937 				printf(" is currently in use; can't force "
2938 				    "create\n");
2939 				goto unwind;
2940 			}
2941 
2942 		if (sr_meta_clear(sd)) {
2943 			printf("%s: failed to clear metadata\n", DEVNAME(sc));
2944 			goto unwind;
2945 		}
2946 	}
2947 
2948 	if ((no_meta = sr_meta_read(sd)) == 0) {
2949 		/* fill out all chunk metadata */
2950 		sr_meta_chunks_create(sc, cl);
2951 		ch_entry = SLIST_FIRST(cl);
2952 
2953 		sd->sd_vol_status = BIOC_SVONLINE;
2954 		sd->sd_meta->ssdi.ssd_level = bc->bc_level;
2955 		sd->sd_meta->ssdi.ssd_chunk_no = no_chunk;
2956 
2957 		/* Make the volume UUID available. */
2958 		bcopy(&ch_entry->src_meta.scmi.scm_uuid,
2959 		    &sd->sd_meta->ssdi.ssd_uuid,
2960 		    sizeof(sd->sd_meta->ssdi.ssd_uuid));
2961 
2962 		if (sd->sd_create) {
2963 			if ((i = sd->sd_create(sd, bc, no_chunk,
2964 			    ch_entry->src_meta.scmi.scm_coerced_size))) {
2965 				rv = i;
2966 				goto unwind;
2967 			}
2968 		}
2969 
2970 		/* fill out all volume metadata */
2971 		DNPRINTF(SR_D_IOCTL,
2972 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
2973 		    DEVNAME(sc), sd->sd_meta->ssdi.ssd_size);
2974 		strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD",
2975 		    sizeof(sd->sd_meta->ssdi.ssd_vendor));
2976 		snprintf(sd->sd_meta->ssdi.ssd_product,
2977 		    sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s",
2978 		    sd->sd_name);
2979 		snprintf(sd->sd_meta->ssdi.ssd_revision,
2980 		    sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d",
2981 		    SR_META_VERSION);
2982 
2983 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
2984 		updatemeta = 1;
2985 	} else if (no_meta == no_chunk) {
2986 		if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
2987 			printf("%s: %s was not shutdown properly\n",
2988 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
2989 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
2990 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
2991 			    "metadata\n", DEVNAME(sc));
2992 			goto unwind;
2993 		}
2994 		if (sr_already_assembled(sd)) {
2995 			printf("%s: disk ", DEVNAME(sc));
2996 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
2997 			printf(" already assembled\n");
2998 			goto unwind;
2999 		}
3000 
3001 		if (sd->sd_assemble) {
3002 			if ((i = sd->sd_assemble(sd, bc, no_chunk))) {
3003 				rv = i;
3004 				goto unwind;
3005 			}
3006 		}
3007 
3008 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
3009 		    DEVNAME(sc));
3010 		updatemeta = 0;
3011 	} else if (no_meta == -1) {
3012 		printf("%s: one of the chunks has corrupt metadata; aborting "
3013 		    "assembly\n", DEVNAME(sc));
3014 		goto unwind;
3015 	} else {
3016 		if (sr_already_assembled(sd)) {
3017 			printf("%s: disk ", DEVNAME(sc));
3018 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
3019 			printf(" already assembled; will not partial "
3020 			    "assemble it\n");
3021 			goto unwind;
3022 		}
3023 
3024 		if (sd->sd_assemble) {
3025 			if ((i = sd->sd_assemble(sd, bc, no_chunk))) {
3026 				rv = i;
3027 				goto unwind;
3028 			}
3029 		}
3030 
3031 		printf("%s: trying to bring up %s degraded\n", DEVNAME(sc),
3032 		    sd->sd_meta->ssd_devname);
3033 	}
3034 
3035 	/* metadata SHALL be fully filled in at this point */
3036 
3037 	/* Make sure that metadata level matches assembly level. */
3038 	if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) {
3039 		printf("%s: volume level does not match metadata level!\n",
3040 		    DEVNAME(sc));
3041 		goto unwind;
3042 	}
3043 
3044 	/* allocate all resources */
3045 	if ((rv = sd->sd_alloc_resources(sd)))
3046 		goto unwind;
3047 
3048 	/* Adjust flags if necessary. */
3049 	if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) &&
3050 	    (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) !=
3051 	    (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) {
3052 		sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE;
3053 		sd->sd_meta->ssdi.ssd_vol_flags |=
3054 		    bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
3055 	}
3056 
3057 	if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) {
3058 		/* set volume status */
3059 		sd->sd_set_vol_state(sd);
3060 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3061 			printf("%s: %s offline, will not be brought online\n",
3062 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
3063 			goto unwind;
3064 		}
3065 
3066 		/* setup scsi iopool */
3067 		mtx_init(&sd->sd_wu_mtx, IPL_BIO);
3068 		scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put);
3069 
3070 		/*
3071 		 * we passed all checks return ENXIO if volume can't be created
3072 		 */
3073 		rv = ENXIO;
3074 
3075 		/*
3076 		 * Find a free target.
3077 		 *
3078 		 * XXX: We reserve sd_target == 0 to indicate the
3079 		 * discipline is not linked into sc->sc_dis, so begin
3080 		 * the search with target = 1.
3081 		 */
3082 		for (target = 1; target < SR_MAX_LD; target++)
3083 			if (sc->sc_dis[target] == NULL)
3084 				break;
3085 		if (target == SR_MAX_LD) {
3086 			printf("%s: no free target for %s\n", DEVNAME(sc),
3087 			    sd->sd_meta->ssd_devname);
3088 			goto unwind;
3089 		}
3090 
3091 		/* clear sense data */
3092 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3093 
3094 		/* attach disclipline and kick midlayer to probe it */
3095 		sd->sd_target = target;
3096 		sc->sc_dis[target] = sd;
3097 		if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) {
3098 			printf("%s: scsi_probe_lun failed\n", DEVNAME(sc));
3099 			sc->sc_dis[target] = NULL;
3100 			sd->sd_target = 0;
3101 			goto unwind;
3102 		}
3103 
3104 		link = scsi_get_link(sc->sc_scsibus, target, 0);
3105 		dev = link->device_softc;
3106 		DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n",
3107 		    DEVNAME(sc), dev->dv_xname, sd->sd_target);
3108 
3109 		for (i = 0, vol = -1; i <= sd->sd_target; i++)
3110 			if (sc->sc_dis[i])
3111 				vol++;
3112 
3113 		rv = 0;
3114 		if (updatemeta) {
3115 			/* fill out remaining volume metadata */
3116 			sd->sd_meta->ssdi.ssd_volid = vol;
3117 			strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
3118 			    sizeof(sd->sd_meta->ssd_devname));
3119 			sr_meta_init(sd, cl);
3120 		} else {
3121 			if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
3122 			    sizeof(dev->dv_xname))) {
3123 				printf("%s: volume %s is roaming, it used to "
3124 				    "be %s, updating metadata\n",
3125 				    DEVNAME(sc), dev->dv_xname,
3126 				    sd->sd_meta->ssd_devname);
3127 
3128 				sd->sd_meta->ssdi.ssd_volid = vol;
3129 				strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
3130 				    sizeof(sd->sd_meta->ssd_devname));
3131 			}
3132 		}
3133 
3134 		/* Update device name on any chunks which roamed. */
3135 		sr_roam_chunks(sd);
3136 
3137 #ifndef SMALL_KERNEL
3138 		if (sr_sensors_create(sd))
3139 			printf("%s: unable to create sensor for %s\n",
3140 			    DEVNAME(sc), dev->dv_xname);
3141 		else
3142 			sd->sd_vol.sv_sensor_valid = 1;
3143 #endif /* SMALL_KERNEL */
3144 	} else {
3145 		/* we are not an os disk */
3146 		if (updatemeta) {
3147 			/* fill out remaining volume metadata */
3148 			sd->sd_meta->ssdi.ssd_volid = 0;
3149 			strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname,
3150 			    sizeof(sd->sd_meta->ssd_devname));
3151 			sr_meta_init(sd, cl);
3152 		}
3153 		if (sd->sd_start_discipline(sd))
3154 			goto unwind;
3155 	}
3156 
3157 	/* save metadata to disk */
3158 	rv = sr_meta_save(sd, SR_META_DIRTY);
3159 
3160 	if (sd->sd_vol_status == BIOC_SVREBUILD)
3161 		kthread_create_deferred(sr_rebuild, sd);
3162 
3163 	sd->sd_ready = 1;
3164 
3165 	return (rv);
3166 unwind:
3167 	sr_discipline_shutdown(sd, 0);
3168 
3169 	/* XXX - use internal status values! */
3170 	if (rv == EAGAIN)
3171 		rv = 0;
3172 
3173 	return (rv);
3174 }
3175 
3176 int
3177 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
3178 {
3179 	struct sr_discipline	*sd = NULL;
3180 	int			rv = 1;
3181 	int			i;
3182 
3183 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
3184 	    dr->bd_dev);
3185 
3186 	for (i = 0; i < SR_MAX_LD; i++)
3187 		if (sc->sc_dis[i]) {
3188 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3189 			    dr->bd_dev,
3190 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3191 				sd = sc->sc_dis[i];
3192 				break;
3193 			}
3194 		}
3195 
3196 	if (sd == NULL)
3197 		goto bad;
3198 
3199 	sd->sd_deleted = 1;
3200 	sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE;
3201 	sr_discipline_shutdown(sd, 1);
3202 
3203 	rv = 0;
3204 bad:
3205 	return (rv);
3206 }
3207 
3208 int
3209 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd)
3210 {
3211 	struct sr_discipline	*sd = NULL;
3212 	int			i, rv = 1;
3213 
3214 	/* Dispatch a discipline specific ioctl. */
3215 
3216 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc),
3217 	    bd->bd_dev);
3218 
3219 	for (i = 0; i < SR_MAX_LD; i++)
3220 		if (sc->sc_dis[i]) {
3221 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3222 			    bd->bd_dev,
3223 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3224 				sd = sc->sc_dis[i];
3225 				break;
3226 			}
3227 		}
3228 
3229 	if (sd && sd->sd_ioctl_handler)
3230 		rv = sd->sd_ioctl_handler(sd, bd);
3231 
3232 	return (rv);
3233 }
3234 
3235 int
3236 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb)
3237 {
3238 	void			*bootblk = NULL, *bootldr = NULL;
3239 	struct sr_discipline	*sd = NULL;
3240 	struct sr_chunk		*chunk;
3241 	u_int32_t		bbs, bls;
3242 	int			rv = EINVAL;
3243 	int			i;
3244 
3245 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc),
3246 	    bb->bb_dev);
3247 
3248 	for (i = 0; i < SR_MAX_LD; i++)
3249 		if (sc->sc_dis[i]) {
3250 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3251 			    bb->bb_dev,
3252 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3253 				sd = sc->sc_dis[i];
3254 				break;
3255 			}
3256 		}
3257 
3258 	if (sd == NULL)
3259 		goto done;
3260 
3261 	/* Ensure that boot storage area is large enough. */
3262 	if (sd->sd_meta->ssd_data_offset < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) {
3263 		printf("%s: insufficient boot storage!\n", DEVNAME(sd->sd_sc));
3264 		goto done;
3265 	}
3266 
3267 	if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512)
3268 		goto done;
3269 
3270 	if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512)
3271 		goto done;
3272 
3273 	/* Copy in boot block. */
3274 	bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE;
3275 	bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO);
3276 	if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0)
3277 		goto done;
3278 
3279 	/* Copy in boot loader. */
3280 	bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE;
3281 	bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO);
3282 	if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0)
3283 		goto done;
3284 
3285 	/* Save boot block and boot loader to each chunk. */
3286 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
3287 
3288 		chunk = sd->sd_vol.sv_chunks[i];
3289 
3290 		/* Save boot blocks. */
3291 		DNPRINTF(SR_D_IOCTL,
3292 		    "sr_ioctl_installboot: saving boot block to %s "
3293 		    "(%u bytes)\n", chunk->src_devname, bbs);
3294 
3295 		if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs,
3296 		    SR_BOOT_BLOCKS_OFFSET, B_WRITE)) {
3297 			printf("%s: failed to write boot block\n", DEVNAME(sc));
3298 			goto done;
3299 		}
3300 
3301 		/* Save boot loader.*/
3302 		DNPRINTF(SR_D_IOCTL,
3303 		    "sr_ioctl_installboot: saving boot loader to %s "
3304 		    "(%u bytes)\n", chunk->src_devname, bls);
3305 
3306 		if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls,
3307 		    SR_BOOT_LOADER_OFFSET, B_WRITE)) {
3308 			printf("%s: failed to write boot loader\n",
3309 			   DEVNAME(sc));
3310 			goto done;
3311 		}
3312 
3313 	}
3314 
3315 	/* XXX - Install boot block on disk - MD code. */
3316 
3317 	/* Save boot details in metadata. */
3318 	sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE;
3319 
3320 	/* XXX - Store size of boot block/loader in optional metadata. */
3321 
3322 	/* Save metadata. */
3323 	if (sr_meta_save(sd, SR_META_DIRTY)) {
3324 		printf("%s: could not save metadata to %s\n",
3325 		    DEVNAME(sc), chunk->src_devname);
3326 		goto done;
3327 	}
3328 
3329 	rv = 0;
3330 
3331 done:
3332 	if (bootblk)
3333 		free(bootblk, M_DEVBUF);
3334 	if (bootldr)
3335 		free(bootldr, M_DEVBUF);
3336 
3337 	return (rv);
3338 }
3339 
3340 void
3341 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
3342 {
3343 	struct sr_chunk		*ch_entry, *ch_next;
3344 
3345 	DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
3346 
3347 	if (!cl)
3348 		return;
3349 
3350 	for (ch_entry = SLIST_FIRST(cl);
3351 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
3352 		ch_next = SLIST_NEXT(ch_entry, src_link);
3353 
3354 		DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
3355 		    DEVNAME(sc), ch_entry->src_devname);
3356 		if (ch_entry->src_vn) {
3357 			/*
3358 			 * XXX - explicitly lock the vnode until we can resolve
3359 			 * the problem introduced by vnode aliasing... specfs
3360 			 * has no locking, whereas ufs/ffs does!
3361 			 */
3362 			vn_lock(ch_entry->src_vn, LK_EXCLUSIVE |
3363 			    LK_RETRY, curproc);
3364 			VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED,
3365 			    curproc);
3366 			vput(ch_entry->src_vn);
3367 		}
3368 		free(ch_entry, M_DEVBUF);
3369 	}
3370 	SLIST_INIT(cl);
3371 }
3372 
3373 void
3374 sr_discipline_free(struct sr_discipline *sd)
3375 {
3376 	struct sr_softc		*sc;
3377 	struct sr_meta_opt_head *omh;
3378 	struct sr_meta_opt_item	*omi, *omi_next;
3379 
3380 	if (!sd)
3381 		return;
3382 
3383 	sc = sd->sd_sc;
3384 
3385 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
3386 	    DEVNAME(sc),
3387 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3388 	if (sd->sd_free_resources)
3389 		sd->sd_free_resources(sd);
3390 	if (sd->sd_vol.sv_chunks)
3391 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
3392 	if (sd->sd_meta)
3393 		free(sd->sd_meta, M_DEVBUF);
3394 	if (sd->sd_meta_foreign)
3395 		free(sd->sd_meta_foreign, M_DEVBUF);
3396 
3397 	omh = &sd->sd_meta_opt;
3398 	for (omi = SLIST_FIRST(omh); omi != SLIST_END(omh); omi = omi_next) {
3399 		omi_next = SLIST_NEXT(omi, omi_link);
3400 		free(omi, M_DEVBUF);
3401 	}
3402 
3403 	if (sd->sd_target != 0) {
3404 		KASSERT(sc->sc_dis[sd->sd_target] == sd);
3405 		sc->sc_dis[sd->sd_target] = NULL;
3406 	}
3407 
3408 	explicit_bzero(sd, sizeof *sd);
3409 	free(sd, M_DEVBUF);
3410 }
3411 
3412 void
3413 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save)
3414 {
3415 	struct sr_softc		*sc;
3416 	int			s;
3417 
3418 	if (!sd)
3419 		return;
3420 	sc = sd->sd_sc;
3421 
3422 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
3423 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3424 
3425 	/* If rebuilding, abort rebuild and drain I/O. */
3426 	if (sd->sd_reb_active) {
3427 		sd->sd_reb_abort = 1;
3428 		while (sd->sd_reb_active)
3429 			tsleep(sd, PWAIT, "sr_shutdown", 1);
3430 	}
3431 
3432 	if (meta_save)
3433 		sr_meta_save(sd, 0);
3434 
3435 	s = splbio();
3436 
3437 	sd->sd_ready = 0;
3438 
3439 	/* make sure there isn't a sync pending and yield */
3440 	wakeup(sd);
3441 	while (sd->sd_sync || sd->sd_must_flush)
3442 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
3443 		    EWOULDBLOCK)
3444 			break;
3445 
3446 #ifndef SMALL_KERNEL
3447 	sr_sensors_delete(sd);
3448 #endif /* SMALL_KERNEL */
3449 
3450 	if (sd->sd_target != 0)
3451 		scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0, DETACH_FORCE);
3452 
3453 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
3454 
3455 	if (sd->sd_workq)
3456 		workq_destroy(sd->sd_workq);
3457 
3458 	if (sd)
3459 		sr_discipline_free(sd);
3460 
3461 	splx(s);
3462 }
3463 
3464 int
3465 sr_discipline_init(struct sr_discipline *sd, int level)
3466 {
3467 	int			rv = 1;
3468 
3469 	switch (level) {
3470 	case 0:
3471 		sr_raid0_discipline_init(sd);
3472 		break;
3473 	case 1:
3474 		sr_raid1_discipline_init(sd);
3475 		break;
3476 	case 4:
3477 		sr_raidp_discipline_init(sd, SR_MD_RAID4);
3478 		break;
3479 	case 5:
3480 		sr_raidp_discipline_init(sd, SR_MD_RAID5);
3481 		break;
3482 	case 6:
3483 		sr_raid6_discipline_init(sd);
3484 		break;
3485 #ifdef AOE
3486 	/* AOE target. */
3487 	case 'A':
3488 		sr_aoe_server_discipline_init(sd);
3489 		break;
3490 	/* AOE initiator. */
3491 	case 'a':
3492 		sr_aoe_discipline_init(sd);
3493 		break;
3494 #endif
3495 #ifdef CRYPTO
3496 	case 'C':
3497 		sr_crypto_discipline_init(sd);
3498 		break;
3499 #endif
3500 	default:
3501 		goto bad;
3502 	}
3503 
3504 	rv = 0;
3505 bad:
3506 	return (rv);
3507 }
3508 
3509 int
3510 sr_raid_inquiry(struct sr_workunit *wu)
3511 {
3512 	struct sr_discipline	*sd = wu->swu_dis;
3513 	struct scsi_xfer	*xs = wu->swu_xs;
3514 	struct scsi_inquiry	*cdb = (struct scsi_inquiry *)xs->cmd;
3515 	struct scsi_inquiry_data inq;
3516 
3517 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
3518 
3519 	if (xs->cmdlen != sizeof(*cdb))
3520 		return (EINVAL);
3521 
3522 	if (ISSET(cdb->flags, SI_EVPD))
3523 		return (EOPNOTSUPP);
3524 
3525 	bzero(&inq, sizeof(inq));
3526 	inq.device = T_DIRECT;
3527 	inq.dev_qual2 = 0;
3528 	inq.version = 2;
3529 	inq.response_format = 2;
3530 	inq.additional_length = 32;
3531 	inq.flags |= SID_CmdQue;
3532 	strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
3533 	    sizeof(inq.vendor));
3534 	strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
3535 	    sizeof(inq.product));
3536 	strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
3537 	    sizeof(inq.revision));
3538 	sr_copy_internal_data(xs, &inq, sizeof(inq));
3539 
3540 	return (0);
3541 }
3542 
3543 int
3544 sr_raid_read_cap(struct sr_workunit *wu)
3545 {
3546 	struct sr_discipline	*sd = wu->swu_dis;
3547 	struct scsi_xfer	*xs = wu->swu_xs;
3548 	struct scsi_read_cap_data rcd;
3549 	struct scsi_read_cap_data_16 rcd16;
3550 	daddr64_t		addr;
3551 	int			rv = 1;
3552 
3553 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
3554 
3555 	addr = sd->sd_meta->ssdi.ssd_size - 1;
3556 	if (xs->cmd->opcode == READ_CAPACITY) {
3557 		bzero(&rcd, sizeof(rcd));
3558 		if (addr > 0xffffffffllu)
3559 			_lto4b(0xffffffff, rcd.addr);
3560 		else
3561 			_lto4b(addr, rcd.addr);
3562 		_lto4b(512, rcd.length);
3563 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
3564 		rv = 0;
3565 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
3566 		bzero(&rcd16, sizeof(rcd16));
3567 		_lto8b(addr, rcd16.addr);
3568 		_lto4b(512, rcd16.length);
3569 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
3570 		rv = 0;
3571 	}
3572 
3573 	return (rv);
3574 }
3575 
3576 int
3577 sr_raid_tur(struct sr_workunit *wu)
3578 {
3579 	struct sr_discipline	*sd = wu->swu_dis;
3580 
3581 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
3582 
3583 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3584 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3585 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
3586 		sd->sd_scsi_sense.add_sense_code = 0x04;
3587 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
3588 		sd->sd_scsi_sense.extra_len = 4;
3589 		return (1);
3590 	} else if (sd->sd_vol_status == BIOC_SVINVALID) {
3591 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3592 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
3593 		sd->sd_scsi_sense.add_sense_code = 0x05;
3594 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
3595 		sd->sd_scsi_sense.extra_len = 4;
3596 		return (1);
3597 	}
3598 
3599 	return (0);
3600 }
3601 
3602 int
3603 sr_raid_request_sense(struct sr_workunit *wu)
3604 {
3605 	struct sr_discipline	*sd = wu->swu_dis;
3606 	struct scsi_xfer	*xs = wu->swu_xs;
3607 
3608 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
3609 	    DEVNAME(sd->sd_sc));
3610 
3611 	/* use latest sense data */
3612 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
3613 
3614 	/* clear sense data */
3615 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3616 
3617 	return (0);
3618 }
3619 
3620 int
3621 sr_raid_start_stop(struct sr_workunit *wu)
3622 {
3623 	struct scsi_xfer	*xs = wu->swu_xs;
3624 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
3625 
3626 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
3627 	    DEVNAME(wu->swu_dis->sd_sc));
3628 
3629 	if (!ss)
3630 		return (1);
3631 
3632 	/*
3633 	 * do nothing!
3634 	 * a softraid discipline should always reflect correct status
3635 	 */
3636 	return (0);
3637 }
3638 
3639 int
3640 sr_raid_sync(struct sr_workunit *wu)
3641 {
3642 	struct sr_discipline	*sd = wu->swu_dis;
3643 	int			s, rv = 0, ios;
3644 
3645 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
3646 
3647 	/* when doing a fake sync don't count the wu */
3648 	ios = wu->swu_fake ? 0 : 1;
3649 
3650 	s = splbio();
3651 	sd->sd_sync = 1;
3652 
3653 	while (sd->sd_wu_pending > ios)
3654 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
3655 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
3656 			    DEVNAME(sd->sd_sc));
3657 			rv = 1;
3658 			break;
3659 		}
3660 
3661 	sd->sd_sync = 0;
3662 	splx(s);
3663 
3664 	wakeup(&sd->sd_sync);
3665 
3666 	return (rv);
3667 }
3668 
3669 void
3670 sr_startwu_callback(void *arg1, void *arg2)
3671 {
3672 	struct sr_discipline	*sd = arg1;
3673 	struct sr_workunit	*wu = arg2;
3674 	struct sr_ccb		*ccb;
3675 	int			s;
3676 
3677 	s = splbio();
3678 	if (wu->swu_cb_active == 1)
3679 		panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc));
3680 	wu->swu_cb_active = 1;
3681 
3682 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
3683 		VOP_STRATEGY(&ccb->ccb_buf);
3684 
3685 	wu->swu_cb_active = 0;
3686 	splx(s);
3687 }
3688 
3689 void
3690 sr_raid_startwu(struct sr_workunit *wu)
3691 {
3692 	struct sr_discipline	*sd = wu->swu_dis;
3693 
3694 	splassert(IPL_BIO);
3695 
3696 	if (wu->swu_state == SR_WU_RESTART)
3697 		/*
3698 		 * no need to put the wu on the pending queue since we
3699 		 * are restarting the io
3700 		 */
3701 		 ;
3702 	else
3703 		/* move wu to pending queue */
3704 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
3705 
3706 	/* start all individual ios */
3707 	workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback,
3708 	    sd, wu);
3709 }
3710 
3711 void
3712 sr_checksum_print(u_int8_t *md5)
3713 {
3714 	int			i;
3715 
3716 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
3717 		printf("%02x", md5[i]);
3718 }
3719 
3720 void
3721 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
3722 {
3723 	MD5_CTX			ctx;
3724 
3725 	DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
3726 	    md5, len);
3727 
3728 	MD5Init(&ctx);
3729 	MD5Update(&ctx, src, len);
3730 	MD5Final(md5, &ctx);
3731 }
3732 
3733 void
3734 sr_uuid_get(struct sr_uuid *uuid)
3735 {
3736 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
3737 	/* UUID version 4: random */
3738 	uuid->sui_id[6] &= 0x0f;
3739 	uuid->sui_id[6] |= 0x40;
3740 	/* RFC4122 variant */
3741 	uuid->sui_id[8] &= 0x3f;
3742 	uuid->sui_id[8] |= 0x80;
3743 }
3744 
3745 void
3746 sr_uuid_print(struct sr_uuid *uuid, int cr)
3747 {
3748 	printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
3749 	    "%02x%02x%02x%02x%02x%02x",
3750 	    uuid->sui_id[0], uuid->sui_id[1],
3751 	    uuid->sui_id[2], uuid->sui_id[3],
3752 	    uuid->sui_id[4], uuid->sui_id[5],
3753 	    uuid->sui_id[6], uuid->sui_id[7],
3754 	    uuid->sui_id[8], uuid->sui_id[9],
3755 	    uuid->sui_id[10], uuid->sui_id[11],
3756 	    uuid->sui_id[12], uuid->sui_id[13],
3757 	    uuid->sui_id[14], uuid->sui_id[15]);
3758 
3759 	if (cr)
3760 		printf("\n");
3761 }
3762 
3763 int
3764 sr_already_assembled(struct sr_discipline *sd)
3765 {
3766 	struct sr_softc		*sc = sd->sd_sc;
3767 	int			i;
3768 
3769 	for (i = 0; i < SR_MAX_LD; i++)
3770 		if (sc->sc_dis[i])
3771 			if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
3772 			    &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
3773 			    sizeof(sd->sd_meta->ssdi.ssd_uuid)))
3774 				return (1);
3775 
3776 	return (0);
3777 }
3778 
3779 int32_t
3780 sr_validate_stripsize(u_int32_t b)
3781 {
3782 	int			s = 0;
3783 
3784 	if (b % 512)
3785 		return (-1);
3786 
3787 	while ((b & 1) == 0) {
3788 		b >>= 1;
3789 		s++;
3790 	}
3791 
3792 	/* only multiple of twos */
3793 	b >>= 1;
3794 	if (b)
3795 		return(-1);
3796 
3797 	return (s);
3798 }
3799 
3800 void
3801 sr_shutdownhook(void *arg)
3802 {
3803 	sr_shutdown((struct sr_softc *)arg);
3804 }
3805 
3806 void
3807 sr_shutdown(struct sr_softc *sc)
3808 {
3809 	int			i;
3810 
3811 	DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc));
3812 
3813 	/* XXX this will not work when we stagger disciplines */
3814 	for (i = 0; i < SR_MAX_LD; i++)
3815 		if (sc->sc_dis[i])
3816 			sr_discipline_shutdown(sc->sc_dis[i], 1);
3817 }
3818 
3819 int
3820 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
3821 {
3822 	struct sr_discipline	*sd = wu->swu_dis;
3823 	struct scsi_xfer	*xs = wu->swu_xs;
3824 	int			rv = 1;
3825 
3826 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
3827 	    xs->cmd->opcode);
3828 
3829 	if (sd->sd_meta->ssd_data_offset == 0)
3830 		panic("invalid data offset");
3831 
3832 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3833 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
3834 		    DEVNAME(sd->sd_sc), func);
3835 		goto bad;
3836 	}
3837 
3838 	if (xs->datalen == 0) {
3839 		printf("%s: %s: illegal block count for %s\n",
3840 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
3841 		goto bad;
3842 	}
3843 
3844 	if (xs->cmdlen == 10)
3845 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
3846 	else if (xs->cmdlen == 16)
3847 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
3848 	else if (xs->cmdlen == 6)
3849 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
3850 	else {
3851 		printf("%s: %s: illegal cmdlen for %s\n",
3852 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
3853 		goto bad;
3854 	}
3855 
3856 	wu->swu_blk_start = *blk;
3857 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
3858 
3859 	if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
3860 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
3861 		    "end: %lld length: %d\n",
3862 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
3863 		    wu->swu_blk_end, xs->datalen);
3864 
3865 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
3866 		    SSD_ERRCODE_VALID;
3867 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
3868 		sd->sd_scsi_sense.add_sense_code = 0x21;
3869 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
3870 		sd->sd_scsi_sense.extra_len = 4;
3871 		goto bad;
3872 	}
3873 
3874 	rv = 0;
3875 bad:
3876 	return (rv);
3877 }
3878 
3879 int
3880 sr_check_io_collision(struct sr_workunit *wu)
3881 {
3882 	struct sr_discipline	*sd = wu->swu_dis;
3883 	struct sr_workunit	*wup;
3884 
3885 	splassert(IPL_BIO);
3886 
3887 	/* walk queue backwards and fill in collider if we have one */
3888 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
3889 		if (wu->swu_blk_end < wup->swu_blk_start ||
3890 		    wup->swu_blk_end < wu->swu_blk_start)
3891 			continue;
3892 
3893 		/* we have an LBA collision, defer wu */
3894 		wu->swu_state = SR_WU_DEFERRED;
3895 		if (wup->swu_collider)
3896 			/* wu is on deferred queue, append to last wu */
3897 			while (wup->swu_collider)
3898 				wup = wup->swu_collider;
3899 
3900 		wup->swu_collider = wu;
3901 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
3902 		sd->sd_wu_collisions++;
3903 		goto queued;
3904 	}
3905 
3906 	return (0);
3907 queued:
3908 	return (1);
3909 }
3910 
3911 void
3912 sr_rebuild(void *arg)
3913 {
3914 	struct sr_discipline	*sd = arg;
3915 	struct sr_softc		*sc = sd->sd_sc;
3916 
3917 	if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc,
3918 	    DEVNAME(sc)) != 0)
3919 		printf("%s: unable to start backgound operation\n",
3920 		    DEVNAME(sc));
3921 }
3922 
3923 void
3924 sr_rebuild_thread(void *arg)
3925 {
3926 	struct sr_discipline	*sd = arg;
3927 	struct sr_softc		*sc = sd->sd_sc;
3928 	daddr64_t		whole_blk, partial_blk, blk, sz, lba;
3929 	daddr64_t		psz, rb, restart;
3930 	struct sr_workunit	*wu_r, *wu_w;
3931 	struct scsi_xfer	xs_r, xs_w;
3932 	struct scsi_rw_16	*cr, *cw;
3933 	int			c, s, slept, percent = 0, old_percent = -1;
3934 	u_int8_t		*buf;
3935 
3936 	whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
3937 	partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE;
3938 
3939 	restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE;
3940 	if (restart > whole_blk) {
3941 		printf("%s: bogus rebuild restart offset, starting from 0\n",
3942 		    DEVNAME(sc));
3943 		restart = 0;
3944 	}
3945 	if (restart) {
3946 		/*
3947 		 * XXX there is a hole here; there is a posibility that we
3948 		 * had a restart however the chunk that was supposed to
3949 		 * be rebuilt is no longer valid; we can reach this situation
3950 		 * when a rebuild is in progress and the box crashes and
3951 		 * on reboot the rebuild chunk is different (like zero'd or
3952 		 * replaced).  We need to check the uuid of the chunk that is
3953 		 * being rebuilt to assert this.
3954 		 */
3955 		psz = sd->sd_meta->ssdi.ssd_size;
3956 		rb = sd->sd_meta->ssd_rebuild;
3957 		if (rb > 0)
3958 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
3959 		else
3960 			percent = 0;
3961 		printf("%s: resuming rebuild on %s at %d%%\n",
3962 		    DEVNAME(sc), sd->sd_meta->ssd_devname, percent);
3963 	}
3964 
3965 	sd->sd_reb_active = 1;
3966 
3967 	/* currently this is 64k therefore we can use dma_alloc */
3968 	buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK);
3969 	for (blk = restart; blk <= whole_blk; blk++) {
3970 		lba = blk * SR_REBUILD_IO_SIZE;
3971 		sz = SR_REBUILD_IO_SIZE;
3972 		if (blk == whole_blk) {
3973 			if (partial_blk == 0)
3974 				break;
3975 			sz = partial_blk;
3976 		}
3977 
3978 		/* get some wu */
3979 		if ((wu_r = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
3980 			panic("%s: rebuild exhausted wu_r", DEVNAME(sc));
3981 		if ((wu_w = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
3982 			panic("%s: rebuild exhausted wu_w", DEVNAME(sc));
3983 
3984 		/* setup read io */
3985 		bzero(&xs_r, sizeof xs_r);
3986 		xs_r.error = XS_NOERROR;
3987 		xs_r.flags = SCSI_DATA_IN;
3988 		xs_r.datalen = sz << DEV_BSHIFT;
3989 		xs_r.data = buf;
3990 		xs_r.cmdlen = sizeof(*cr);
3991 		xs_r.cmd = &xs_r.cmdstore;
3992 		cr = (struct scsi_rw_16 *)xs_r.cmd;
3993 		cr->opcode = READ_16;
3994 		_lto4b(sz, cr->length);
3995 		_lto8b(lba, cr->addr);
3996 		wu_r->swu_flags |= SR_WUF_REBUILD;
3997 		wu_r->swu_xs = &xs_r;
3998 		if (sd->sd_scsi_rw(wu_r)) {
3999 			printf("%s: could not create read io\n",
4000 			    DEVNAME(sc));
4001 			goto fail;
4002 		}
4003 
4004 		/* setup write io */
4005 		bzero(&xs_w, sizeof xs_w);
4006 		xs_w.error = XS_NOERROR;
4007 		xs_w.flags = SCSI_DATA_OUT;
4008 		xs_w.datalen = sz << DEV_BSHIFT;
4009 		xs_w.data = buf;
4010 		xs_w.cmdlen = sizeof(*cw);
4011 		xs_w.cmd = &xs_w.cmdstore;
4012 		cw = (struct scsi_rw_16 *)xs_w.cmd;
4013 		cw->opcode = WRITE_16;
4014 		_lto4b(sz, cw->length);
4015 		_lto8b(lba, cw->addr);
4016 		wu_w->swu_flags |= SR_WUF_REBUILD;
4017 		wu_w->swu_xs = &xs_w;
4018 		if (sd->sd_scsi_rw(wu_w)) {
4019 			printf("%s: could not create write io\n",
4020 			    DEVNAME(sc));
4021 			goto fail;
4022 		}
4023 
4024 		/*
4025 		 * collide with the read io so that we get automatically
4026 		 * started when the read is done
4027 		 */
4028 		wu_w->swu_state = SR_WU_DEFERRED;
4029 		wu_r->swu_collider = wu_w;
4030 		s = splbio();
4031 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
4032 
4033 		/* schedule io */
4034 		if (sr_check_io_collision(wu_r))
4035 			goto queued;
4036 
4037 		sr_raid_startwu(wu_r);
4038 queued:
4039 		splx(s);
4040 
4041 		/* wait for read completion */
4042 		slept = 0;
4043 		while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
4044 			tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
4045 			slept = 1;
4046 		}
4047 		/* yield if we didn't sleep */
4048 		if (slept == 0)
4049 			tsleep(sc, PWAIT, "sr_yield", 1);
4050 
4051 		scsi_io_put(&sd->sd_iopool, wu_r);
4052 		scsi_io_put(&sd->sd_iopool, wu_w);
4053 
4054 		sd->sd_meta->ssd_rebuild = lba;
4055 
4056 		/* save metadata every percent */
4057 		psz = sd->sd_meta->ssdi.ssd_size;
4058 		rb = sd->sd_meta->ssd_rebuild;
4059 		if (rb > 0)
4060 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
4061 		else
4062 			percent = 0;
4063 		if (percent != old_percent && blk != whole_blk) {
4064 			if (sr_meta_save(sd, SR_META_DIRTY))
4065 				printf("%s: could not save metadata to %s\n",
4066 				    DEVNAME(sc), sd->sd_meta->ssd_devname);
4067 			old_percent = percent;
4068 		}
4069 
4070 		if (sd->sd_reb_abort)
4071 			goto abort;
4072 	}
4073 
4074 	/* all done */
4075 	sd->sd_meta->ssd_rebuild = 0;
4076 	for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
4077 		if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
4078 		    BIOC_SDREBUILD) {
4079 			sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE);
4080 			break;
4081 		}
4082 
4083 abort:
4084 	if (sr_meta_save(sd, SR_META_DIRTY))
4085 		printf("%s: could not save metadata to %s\n",
4086 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
4087 fail:
4088 	dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT);
4089 	sd->sd_reb_active = 0;
4090 	kthread_exit(0);
4091 }
4092 
4093 #ifndef SMALL_KERNEL
4094 int
4095 sr_sensors_create(struct sr_discipline *sd)
4096 {
4097 	struct sr_softc		*sc = sd->sd_sc;
4098 	int			rv = 1;
4099 
4100 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
4101 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
4102 
4103 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
4104 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
4105 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
4106 	    sizeof(sd->sd_vol.sv_sensor.desc));
4107 
4108 	sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4109 	sd->sd_vol.sv_sensor_attached = 1;
4110 
4111 	if (sc->sc_sensor_task == NULL) {
4112 		sc->sc_sensor_task = sensor_task_register(sc,
4113 		    sr_sensors_refresh, 10);
4114 		if (sc->sc_sensor_task == NULL)
4115 			goto bad;
4116 	}
4117 
4118 	rv = 0;
4119 bad:
4120 	return (rv);
4121 }
4122 
4123 void
4124 sr_sensors_delete(struct sr_discipline *sd)
4125 {
4126 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc));
4127 
4128 	if (sd->sd_vol.sv_sensor_attached)
4129 		sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4130 }
4131 
4132 void
4133 sr_sensors_refresh(void *arg)
4134 {
4135 	struct sr_softc		*sc = arg;
4136 	struct sr_volume	*sv;
4137 	struct sr_discipline	*sd;
4138 	int			i, vol;
4139 
4140 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
4141 
4142 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
4143 		/* XXX this will not work when we stagger disciplines */
4144 		if (!sc->sc_dis[i])
4145 			continue;
4146 
4147 		sd = sc->sc_dis[i];
4148 		sv = &sd->sd_vol;
4149 
4150 		switch(sd->sd_vol_status) {
4151 		case BIOC_SVOFFLINE:
4152 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
4153 			sv->sv_sensor.status = SENSOR_S_CRIT;
4154 			break;
4155 
4156 		case BIOC_SVDEGRADED:
4157 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
4158 			sv->sv_sensor.status = SENSOR_S_WARN;
4159 			break;
4160 
4161 		case BIOC_SVSCRUB:
4162 		case BIOC_SVONLINE:
4163 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
4164 			sv->sv_sensor.status = SENSOR_S_OK;
4165 			break;
4166 
4167 		default:
4168 			sv->sv_sensor.value = 0; /* unknown */
4169 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
4170 		}
4171 	}
4172 }
4173 #endif /* SMALL_KERNEL */
4174 
4175 #ifdef SR_FANCY_STATS
4176 void				sr_print_stats(void);
4177 
4178 void
4179 sr_print_stats(void)
4180 {
4181 	struct sr_softc		*sc;
4182 	struct sr_discipline	*sd;
4183 	int			i, vol;
4184 
4185 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
4186 		if (softraid_cd.cd_devs[i]) {
4187 			sc = softraid_cd.cd_devs[i];
4188 			/* we'll only have one softc */
4189 			break;
4190 		}
4191 
4192 	if (!sc) {
4193 		printf("no softraid softc found\n");
4194 		return;
4195 	}
4196 
4197 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
4198 		/* XXX this will not work when we stagger disciplines */
4199 		if (!sc->sc_dis[i])
4200 			continue;
4201 
4202 		sd = sc->sc_dis[i];
4203 		printf("%s: ios pending: %d  collisions %llu\n",
4204 		    sd->sd_meta->ssd_devname,
4205 		    sd->sd_wu_pending,
4206 		    sd->sd_wu_collisions);
4207 	}
4208 }
4209 #endif /* SR_FANCY_STATS */
4210 
4211 #ifdef SR_DEBUG
4212 void
4213 sr_meta_print(struct sr_metadata *m)
4214 {
4215 	int			i;
4216 	struct sr_meta_chunk	*mc;
4217 	struct sr_meta_opt	*mo;
4218 
4219 	if (!(sr_debug & SR_D_META))
4220 		return;
4221 
4222 	printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
4223 	printf("\tssd_version %d\n", m->ssdi.ssd_version);
4224 	printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags);
4225 	printf("\tssd_uuid ");
4226 	sr_uuid_print(&m->ssdi.ssd_uuid, 1);
4227 	printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
4228 	printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
4229 	printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
4230 	printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
4231 	printf("\tssd_level %d\n", m->ssdi.ssd_level);
4232 	printf("\tssd_size %lld\n", m->ssdi.ssd_size);
4233 	printf("\tssd_devname %s\n", m->ssd_devname);
4234 	printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
4235 	printf("\tssd_product %s\n", m->ssdi.ssd_product);
4236 	printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
4237 	printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
4238 	printf("\tssd_checksum ");
4239 	sr_checksum_print(m->ssd_checksum);
4240 	printf("\n");
4241 	printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
4242 	printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
4243 
4244 	mc = (struct sr_meta_chunk *)(m + 1);
4245 	for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
4246 		printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
4247 		printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
4248 		printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
4249 		printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
4250 		printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
4251 		printf("\t\tscm_uuid ");
4252 		sr_uuid_print(&mc->scmi.scm_uuid, 1);
4253 		printf("\t\tscm_checksum ");
4254 		sr_checksum_print(mc->scm_checksum);
4255 		printf("\n");
4256 		printf("\t\tscm_status %d\n", mc->scm_status);
4257 	}
4258 
4259 	mo = (struct sr_meta_opt *)(mc);
4260 	for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) {
4261 		printf("\t\t\tsom_type %d\n", mo->somi.som_type);
4262 		printf("\t\t\tsom_checksum ");
4263 		sr_checksum_print(mo->som_checksum);
4264 		printf("\n");
4265 	}
4266 }
4267 
4268 void
4269 sr_dump_mem(u_int8_t *p, int len)
4270 {
4271 	int			i;
4272 
4273 	for (i = 0; i < len; i++)
4274 		printf("%02x ", *p++);
4275 	printf("\n");
4276 }
4277 
4278 #endif /* SR_DEBUG */
4279