xref: /openbsd-src/sys/dev/softraid.c (revision ac9b4aacc1da35008afea06a5d23c2f2dea9b93e)
1 /* $OpenBSD: softraid.c,v 1.274 2012/01/30 13:13:03 jsing Exp $ */
2 /*
3  * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  * Copyright (c) 2009 Joel Sing <jsing@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bio.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/buf.h>
25 #include <sys/device.h>
26 #include <sys/ioctl.h>
27 #include <sys/proc.h>
28 #include <sys/malloc.h>
29 #include <sys/pool.h>
30 #include <sys/kernel.h>
31 #include <sys/disk.h>
32 #include <sys/rwlock.h>
33 #include <sys/queue.h>
34 #include <sys/fcntl.h>
35 #include <sys/disklabel.h>
36 #include <sys/mount.h>
37 #include <sys/sensors.h>
38 #include <sys/stat.h>
39 #include <sys/conf.h>
40 #include <sys/uio.h>
41 #include <sys/workq.h>
42 #include <sys/kthread.h>
43 #include <sys/dkio.h>
44 
45 #ifdef AOE
46 #include <sys/mbuf.h>
47 #include <net/if_aoe.h>
48 #endif /* AOE */
49 
50 #include <crypto/cryptodev.h>
51 
52 #include <scsi/scsi_all.h>
53 #include <scsi/scsiconf.h>
54 #include <scsi/scsi_disk.h>
55 
56 #include <dev/softraidvar.h>
57 #include <dev/rndvar.h>
58 
59 /* #define SR_FANCY_STATS */
60 
61 #ifdef SR_DEBUG
62 #define SR_FANCY_STATS
63 uint32_t	sr_debug = 0
64 		    /* | SR_D_CMD */
65 		    /* | SR_D_MISC */
66 		    /* | SR_D_INTR */
67 		    /* | SR_D_IOCTL */
68 		    /* | SR_D_CCB */
69 		    /* | SR_D_WU */
70 		    /* | SR_D_META */
71 		    /* | SR_D_DIS */
72 		    /* | SR_D_STATE */
73 		;
74 #endif
75 
76 struct sr_softc *softraid0;
77 
78 int		sr_match(struct device *, void *, void *);
79 void		sr_attach(struct device *, struct device *, void *);
80 int		sr_detach(struct device *, int);
81 void		sr_map_root(void);
82 
83 struct cfattach softraid_ca = {
84 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
85 };
86 
87 struct cfdriver softraid_cd = {
88 	NULL, "softraid", DV_DULL
89 };
90 
91 /* scsi & discipline */
92 void			sr_scsi_cmd(struct scsi_xfer *);
93 void			sr_minphys(struct buf *, struct scsi_link *);
94 int			sr_scsi_probe(struct scsi_link *);
95 void			sr_copy_internal_data(struct scsi_xfer *,
96 			    void *, size_t);
97 int			sr_scsi_ioctl(struct scsi_link *, u_long,
98 			    caddr_t, int);
99 int			sr_bio_ioctl(struct device *, u_long, caddr_t);
100 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
101 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
102 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
103 int			sr_ioctl_setstate(struct sr_softc *,
104 			    struct bioc_setstate *);
105 int			sr_ioctl_createraid(struct sr_softc *,
106 			    struct bioc_createraid *, int);
107 int			sr_ioctl_deleteraid(struct sr_softc *,
108 			    struct bioc_deleteraid *);
109 int			sr_ioctl_discipline(struct sr_softc *,
110 			    struct bioc_discipline *);
111 int			sr_ioctl_installboot(struct sr_softc *,
112 			    struct bioc_installboot *);
113 void			sr_chunks_unwind(struct sr_softc *,
114 			    struct sr_chunk_head *);
115 void			sr_discipline_free(struct sr_discipline *);
116 void			sr_discipline_shutdown(struct sr_discipline *, int);
117 int			sr_discipline_init(struct sr_discipline *, int);
118 void			sr_set_chunk_state(struct sr_discipline *, int, int);
119 void			sr_set_vol_state(struct sr_discipline *);
120 
121 /* utility functions */
122 void			sr_shutdown(struct sr_softc *);
123 void			sr_shutdownhook(void *);
124 void			sr_uuid_generate(struct sr_uuid *);
125 char			*sr_uuid_format(struct sr_uuid *);
126 void			sr_uuid_print(struct sr_uuid *, int);
127 void			sr_checksum_print(u_int8_t *);
128 int			sr_boot_assembly(struct sr_softc *);
129 int			sr_already_assembled(struct sr_discipline *);
130 int			sr_hotspare(struct sr_softc *, dev_t);
131 void			sr_hotspare_rebuild(struct sr_discipline *);
132 int			sr_rebuild_init(struct sr_discipline *, dev_t, int);
133 void			sr_rebuild(void *);
134 void			sr_rebuild_thread(void *);
135 void			sr_roam_chunks(struct sr_discipline *);
136 int			sr_chunk_in_use(struct sr_softc *, dev_t);
137 void			sr_startwu_callback(void *, void *);
138 int			sr_rw(struct sr_softc *, dev_t, char *, size_t,
139 			    daddr64_t, long);
140 
141 /* don't include these on RAMDISK */
142 #ifndef SMALL_KERNEL
143 void			sr_sensors_refresh(void *);
144 int			sr_sensors_create(struct sr_discipline *);
145 void			sr_sensors_delete(struct sr_discipline *);
146 #endif
147 
148 /* metadata */
149 int			sr_meta_probe(struct sr_discipline *, dev_t *, int);
150 int			sr_meta_attach(struct sr_discipline *, int, int);
151 int			sr_meta_rw(struct sr_discipline *, dev_t, void *,
152 			    size_t, daddr64_t, long);
153 int			sr_meta_clear(struct sr_discipline *);
154 void			sr_meta_init(struct sr_discipline *, int, int);
155 void			sr_meta_init_complete(struct sr_discipline *);
156 void			sr_meta_opt_handler(struct sr_discipline *,
157 			    struct sr_meta_opt_hdr *);
158 
159 /* hotplug magic */
160 void			sr_disk_attach(struct disk *, int);
161 
162 struct sr_hotplug_list {
163 	void			(*sh_hotplug)(struct sr_discipline *,
164 				    struct disk *, int);
165 	struct sr_discipline	*sh_sd;
166 
167 	SLIST_ENTRY(sr_hotplug_list) shl_link;
168 };
169 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list);
170 
171 struct			sr_hotplug_list_head	sr_hotplug_callbacks;
172 extern void		(*softraid_disk_attach)(struct disk *, int);
173 
174 /* scsi glue */
175 struct scsi_adapter sr_switch = {
176 	sr_scsi_cmd, sr_minphys, sr_scsi_probe, NULL, sr_scsi_ioctl
177 };
178 
179 /* native metadata format */
180 int			sr_meta_native_bootprobe(struct sr_softc *, dev_t,
181 			    struct sr_boot_chunk_head *);
182 #define SR_META_NOTCLAIMED	(0)
183 #define SR_META_CLAIMED		(1)
184 int			sr_meta_native_probe(struct sr_softc *,
185 			   struct sr_chunk *);
186 int			sr_meta_native_attach(struct sr_discipline *, int);
187 int			sr_meta_native_write(struct sr_discipline *, dev_t,
188 			    struct sr_metadata *,void *);
189 
190 #ifdef SR_DEBUG
191 void			sr_meta_print(struct sr_metadata *);
192 #else
193 #define			sr_meta_print(m)
194 #endif
195 
196 /* the metadata driver should remain stateless */
197 struct sr_meta_driver {
198 	daddr64_t		smd_offset;	/* metadata location */
199 	u_int32_t		smd_size;	/* size of metadata */
200 
201 	int			(*smd_probe)(struct sr_softc *,
202 				   struct sr_chunk *);
203 	int			(*smd_attach)(struct sr_discipline *, int);
204 	int			(*smd_detach)(struct sr_discipline *);
205 	int			(*smd_read)(struct sr_discipline *, dev_t,
206 				    struct sr_metadata *, void *);
207 	int			(*smd_write)(struct sr_discipline *, dev_t,
208 				    struct sr_metadata *, void *);
209 	int			(*smd_validate)(struct sr_discipline *,
210 				    struct sr_metadata *, void *);
211 } smd[] = {
212 	{ SR_META_OFFSET, SR_META_SIZE * 512,
213 	  sr_meta_native_probe, sr_meta_native_attach, NULL,
214 	  sr_meta_native_read, sr_meta_native_write, NULL },
215 	{ 0, 0, NULL, NULL, NULL, NULL }
216 };
217 
218 int
219 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force)
220 {
221 	struct sr_softc		*sc = sd->sd_sc;
222 	struct sr_chunk_head	*cl;
223 	struct sr_chunk		*ch_entry, *chunk1, *chunk2;
224 	int			rv = 1, i = 0;
225 
226 	DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
227 
228 	/* in memory copy of metadata */
229 	sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
230 	if (!sd->sd_meta) {
231 		sr_error(sc, "could not allocate memory for metadata");
232 		goto bad;
233 	}
234 
235 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
236 		/* in memory copy of foreign metadata */
237 		sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size,
238 		    M_DEVBUF, M_ZERO | M_NOWAIT);
239 		if (!sd->sd_meta_foreign) {
240 			/* unwind frees sd_meta */
241 			sr_error(sc, "could not allocate memory for foreign "
242 			    "metadata");
243 			goto bad;
244 		}
245 	}
246 
247 	/* we have a valid list now create an array index */
248 	cl = &sd->sd_vol.sv_chunk_list;
249 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no,
250 	    M_DEVBUF, M_WAITOK | M_ZERO);
251 
252 	/* fill out chunk array */
253 	i = 0;
254 	SLIST_FOREACH(ch_entry, cl, src_link)
255 		sd->sd_vol.sv_chunks[i++] = ch_entry;
256 
257 	/* attach metadata */
258 	if (smd[sd->sd_meta_type].smd_attach(sd, force))
259 		goto bad;
260 
261 	/* Force chunks into correct order now that metadata is attached. */
262 	SLIST_FOREACH(ch_entry, cl, src_link)
263 		SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link);
264 	for (i = 0; i < chunk_no; i++) {
265 		ch_entry = sd->sd_vol.sv_chunks[i];
266 		chunk2 = NULL;
267 		SLIST_FOREACH(chunk1, cl, src_link) {
268 			if (chunk1->src_meta.scmi.scm_chunk_id >
269 			    ch_entry->src_meta.scmi.scm_chunk_id)
270 				break;
271 			chunk2 = chunk1;
272 		}
273 		if (chunk2 == NULL)
274 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
275 		else
276 			SLIST_INSERT_AFTER(chunk2, ch_entry, src_link);
277 	}
278 	i = 0;
279 	SLIST_FOREACH(ch_entry, cl, src_link)
280 		sd->sd_vol.sv_chunks[i++] = ch_entry;
281 
282 	rv = 0;
283 bad:
284 	return (rv);
285 }
286 
287 int
288 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
289 {
290 	struct sr_softc		*sc = sd->sd_sc;
291 	struct vnode		*vn;
292 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
293 	struct sr_chunk_head	*cl;
294 	char			devname[32];
295 	int			i, d, type, found, prevf, error;
296 	dev_t			dev;
297 
298 	DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
299 
300 	if (no_chunk == 0)
301 		goto unwind;
302 
303 	cl = &sd->sd_vol.sv_chunk_list;
304 
305 	for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
306 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
307 		    M_WAITOK | M_ZERO);
308 		/* keep disks in user supplied order */
309 		if (ch_prev)
310 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
311 		else
312 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
313 		ch_prev = ch_entry;
314 		dev = dt[d];
315 		ch_entry->src_dev_mm = dev;
316 
317 		if (dev == NODEV) {
318 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
319 			continue;
320 		} else {
321 			sr_meta_getdevname(sc, dev, devname, sizeof(devname));
322 			if (bdevvp(dev, &vn)) {
323 				sr_error(sc, "sr_meta_probe: cannot allocate "
324 				    "vnode");
325 				goto unwind;
326 			}
327 
328 			/*
329 			 * XXX leaving dev open for now; move this to attach
330 			 * and figure out the open/close dance for unwind.
331 			 */
332 			error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
333 			if (error) {
334 				DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
335 				    "open %s\n", DEVNAME(sc), devname);
336 				vput(vn);
337 				goto unwind;
338 			}
339 
340 			strlcpy(ch_entry->src_devname, devname,
341 			    sizeof(ch_entry->src_devname));
342 			ch_entry->src_vn = vn;
343 		}
344 
345 		/* determine if this is a device we understand */
346 		for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
347 			type = smd[i].smd_probe(sc, ch_entry);
348 			if (type == SR_META_F_INVALID)
349 				continue;
350 			else {
351 				found = type;
352 				break;
353 			}
354 		}
355 
356 		if (found == SR_META_F_INVALID)
357 			goto unwind;
358 		if (prevf == SR_META_F_INVALID)
359 			prevf = found;
360 		if (prevf != found) {
361 			DNPRINTF(SR_D_META, "%s: prevf != found\n",
362 			    DEVNAME(sc));
363 			goto unwind;
364 		}
365 	}
366 
367 	return (prevf);
368 unwind:
369 	return (SR_META_F_INVALID);
370 }
371 
372 void
373 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
374 {
375 	int			maj, unit, part;
376 	char			*name;
377 
378 	DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
379 	    DEVNAME(sc), buf, size);
380 
381 	if (!buf)
382 		return;
383 
384 	maj = major(dev);
385 	part = DISKPART(dev);
386 	unit = DISKUNIT(dev);
387 
388 	name = findblkname(maj);
389 	if (name == NULL)
390 		return;
391 
392 	snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
393 }
394 
395 int
396 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr64_t offset,
397     long flags)
398 {
399 	struct vnode		*vp;
400 	struct buf		b;
401 	size_t			bufsize, dma_bufsize;
402 	int			rv = 1;
403 	char			*dma_buf;
404 
405 	DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %d, %llu 0x%x)\n",
406 	    DEVNAME(sc), dev, buf, size, offset, flags);
407 
408 	dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size;
409 	dma_buf = dma_alloc(dma_bufsize, PR_WAITOK);
410 
411 	if (bdevvp(dev, &vp)) {
412 		printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc));
413 		goto done;
414 	}
415 
416 	while (size > 0) {
417 		DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %d, offset %llu)\n",
418 		    DEVNAME(sc), dma_buf, size, offset);
419 
420 		bufsize = (size > MAXPHYS) ? MAXPHYS : size;
421 		if (flags == B_WRITE)
422 			bcopy(buf, dma_buf, bufsize);
423 
424 		bzero(&b, sizeof(b));
425 		b.b_flags = flags | B_PHYS;
426 		b.b_proc = curproc;
427 		b.b_dev = dev;
428 		b.b_iodone = NULL;
429 		b.b_error = 0;
430 		b.b_blkno = offset;
431 		b.b_data = dma_buf;
432 		b.b_bcount = bufsize;
433 		b.b_bufsize = bufsize;
434 		b.b_resid = bufsize;
435 		b.b_vp = vp;
436 
437 		if ((b.b_flags & B_READ) == 0)
438 			vp->v_numoutput++;
439 
440 		LIST_INIT(&b.b_dep);
441 		VOP_STRATEGY(&b);
442 		biowait(&b);
443 
444 		if (b.b_flags & B_ERROR) {
445 			printf("%s: I/O error %d on dev 0x%x at block %llu\n",
446 			    DEVNAME(sc), b.b_error, dev, b.b_blkno);
447 			goto done;
448 		}
449 
450 		if (flags == B_READ)
451 			bcopy(dma_buf, buf, bufsize);
452 
453 		size -= bufsize;
454 		buf += bufsize;
455 		offset += howmany(bufsize, DEV_BSIZE);
456 	}
457 
458 	rv = 0;
459 
460 done:
461 	if (vp)
462 		vput(vp);
463 
464 	dma_free(dma_buf, dma_bufsize);
465 
466 	return (rv);
467 }
468 
469 int
470 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t size,
471     daddr64_t offset, long flags)
472 {
473 	int			rv = 1;
474 
475 	DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
476 	    DEVNAME(sd->sd_sc), dev, md, size, offset, flags);
477 
478 	if (md == NULL) {
479 		printf("%s: sr_meta_rw: invalid metadata pointer\n",
480 		    DEVNAME(sd->sd_sc));
481 		goto done;
482 	}
483 
484 	rv = sr_rw(sd->sd_sc, dev, md, size, offset, flags);
485 
486 done:
487 	return (rv);
488 }
489 
490 int
491 sr_meta_clear(struct sr_discipline *sd)
492 {
493 	struct sr_softc		*sc = sd->sd_sc;
494 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
495 	struct sr_chunk		*ch_entry;
496 	void			*m;
497 	int			rv = 1;
498 
499 	DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
500 
501 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
502 		sr_error(sc, "cannot clear foreign metadata");
503 		goto done;
504 	}
505 
506 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
507 	SLIST_FOREACH(ch_entry, cl, src_link) {
508 		if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
509 			/* XXX mark disk offline */
510 			DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
511 			    "clear %s\n", ch_entry->src_devname);
512 			rv++;
513 			continue;
514 		}
515 		bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
516 	}
517 
518 	bzero(sd->sd_meta, SR_META_SIZE * 512);
519 
520 	free(m, M_DEVBUF);
521 	rv = 0;
522 done:
523 	return (rv);
524 }
525 
526 void
527 sr_meta_init(struct sr_discipline *sd, int level, int no_chunk)
528 {
529 	struct sr_softc		*sc = sd->sd_sc;
530 	struct sr_metadata	*sm = sd->sd_meta;
531 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
532 	struct sr_meta_chunk	*scm;
533 	struct sr_chunk		*chunk;
534 	int			cid = 0;
535 	u_int64_t		max_chunk_sz = 0, min_chunk_sz = 0;
536 
537 	DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
538 
539 	if (!sm)
540 		return;
541 
542 	/* Initialise volume metadata. */
543 	sm->ssdi.ssd_magic = SR_MAGIC;
544 	sm->ssdi.ssd_version = SR_META_VERSION;
545 	sm->ssdi.ssd_vol_flags = sd->sd_meta_flags;
546 	sm->ssdi.ssd_volid = 0;
547 	sm->ssdi.ssd_chunk_no = no_chunk;
548 	sm->ssdi.ssd_level = level;
549 
550 	sm->ssd_data_offset = SR_DATA_OFFSET;
551 	sm->ssd_ondisk = 0;
552 
553 	sr_uuid_generate(&sm->ssdi.ssd_uuid);
554 
555 	/* Initialise chunk metadata and get min/max chunk sizes. */
556 	SLIST_FOREACH(chunk, cl, src_link) {
557 		scm = &chunk->src_meta;
558 		scm->scmi.scm_size = chunk->src_size;
559 		scm->scmi.scm_chunk_id = cid++;
560 		scm->scm_status = BIOC_SDONLINE;
561 		scm->scmi.scm_volid = 0;
562 		strlcpy(scm->scmi.scm_devname, chunk->src_devname,
563 		    sizeof(scm->scmi.scm_devname));
564 		bcopy(&sm->ssdi.ssd_uuid, &scm->scmi.scm_uuid,
565 		    sizeof(scm->scmi.scm_uuid));
566 		sr_checksum(sc, scm, &scm->scm_checksum,
567 		    sizeof(scm->scm_checksum));
568 
569 		if (min_chunk_sz == 0)
570 			min_chunk_sz = scm->scmi.scm_size;
571 		min_chunk_sz = MIN(min_chunk_sz, scm->scmi.scm_size);
572 		max_chunk_sz = MAX(max_chunk_sz, scm->scmi.scm_size);
573 	}
574 
575 	/* Equalize chunk sizes. */
576 	SLIST_FOREACH(chunk, cl, src_link)
577 		chunk->src_meta.scmi.scm_coerced_size = min_chunk_sz;
578 
579 	sd->sd_vol.sv_chunk_minsz = min_chunk_sz;
580 	sd->sd_vol.sv_chunk_maxsz = max_chunk_sz;
581 }
582 
583 void
584 sr_meta_init_complete(struct sr_discipline *sd)
585 {
586 #ifdef SR_DEBUG
587 	struct sr_softc		*sc = sd->sd_sc;
588 #endif
589 	struct sr_metadata	*sm = sd->sd_meta;
590 
591 	DNPRINTF(SR_D_META, "%s: sr_meta_complete\n", DEVNAME(sc));
592 
593 	/* Complete initialisation of volume metadata. */
594 	strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
595 	snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
596 	    "SR %s", sd->sd_name);
597 	snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
598 	    "%03d", sm->ssdi.ssd_version);
599 }
600 
601 void
602 sr_meta_opt_handler(struct sr_discipline *sd, struct sr_meta_opt_hdr *om)
603 {
604 	if (om->som_type != SR_OPT_BOOT)
605 		panic("unknown optional metadata type");
606 }
607 
608 void
609 sr_meta_save_callback(void *arg1, void *arg2)
610 {
611 	struct sr_discipline	*sd = arg1;
612 	int			s;
613 
614 	s = splbio();
615 
616 	if (sr_meta_save(arg1, SR_META_DIRTY))
617 		printf("%s: save metadata failed\n", DEVNAME(sd->sd_sc));
618 
619 	sd->sd_must_flush = 0;
620 	splx(s);
621 }
622 
623 int
624 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
625 {
626 	struct sr_softc		*sc = sd->sd_sc;
627 	struct sr_metadata	*sm = sd->sd_meta, *m;
628 	struct sr_meta_driver	*s;
629 	struct sr_chunk		*src;
630 	struct sr_meta_chunk	*cm;
631 	struct sr_workunit	wu;
632 	struct sr_meta_opt_hdr	*omh;
633 	struct sr_meta_opt_item *omi;
634 	int			i;
635 
636 	DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
637 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
638 
639 	if (!sm) {
640 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
641 		goto bad;
642 	}
643 
644 	/* meta scratchpad */
645 	s = &smd[sd->sd_meta_type];
646 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
647 	if (!m) {
648 		printf("%s: could not allocate metadata scratch area\n",
649 		    DEVNAME(sc));
650 		goto bad;
651 	}
652 
653 	/* from here on out metadata is updated */
654 restart:
655 	sm->ssd_ondisk++;
656 	sm->ssd_meta_flags = flags;
657 	bcopy(sm, m, sizeof(*m));
658 
659 	/* Chunk metadata. */
660 	cm = (struct sr_meta_chunk *)(m + 1);
661 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
662 		src = sd->sd_vol.sv_chunks[i];
663 		bcopy(&src->src_meta, cm, sizeof(*cm));
664 		cm++;
665 	}
666 
667 	/* Optional metadata. */
668 	omh = (struct sr_meta_opt_hdr *)(cm);
669 	SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) {
670 		DNPRINTF(SR_D_META, "%s: saving optional metadata type %u with "
671 		    "length %u\n", DEVNAME(sc), omi->omi_som->som_type,
672 		    omi->omi_som->som_length);
673 		bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
674 		sr_checksum(sc, omi->omi_som, &omi->omi_som->som_checksum,
675 		    omi->omi_som->som_length);
676 		bcopy(omi->omi_som, omh, omi->omi_som->som_length);
677 		omh = (struct sr_meta_opt_hdr *)((u_int8_t *)omh +
678 		    omi->omi_som->som_length);
679 	}
680 
681 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
682 		src = sd->sd_vol.sv_chunks[i];
683 
684 		/* skip disks that are offline */
685 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
686 			continue;
687 
688 		/* calculate metadata checksum for correct chunk */
689 		m->ssdi.ssd_chunk_id = i;
690 		sr_checksum(sc, m, &m->ssd_checksum,
691 		    sizeof(struct sr_meta_invariant));
692 
693 #ifdef SR_DEBUG
694 		DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
695 		    "chunkid: %d checksum: ",
696 		    DEVNAME(sc), src->src_meta.scmi.scm_devname,
697 		    m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
698 
699 		if (sr_debug & SR_D_META)
700 			sr_checksum_print((u_int8_t *)&m->ssd_checksum);
701 		DNPRINTF(SR_D_META, "\n");
702 		sr_meta_print(m);
703 #endif
704 
705 		/* translate and write to disk */
706 		if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
707 			printf("%s: could not write metadata to %s\n",
708 			    DEVNAME(sc), src->src_devname);
709 			/* restart the meta write */
710 			src->src_meta.scm_status = BIOC_SDOFFLINE;
711 			/* XXX recalculate volume status */
712 			goto restart;
713 		}
714 	}
715 
716 	/* not all disciplines have sync */
717 	if (sd->sd_scsi_sync) {
718 		bzero(&wu, sizeof(wu));
719 		wu.swu_fake = 1;
720 		wu.swu_dis = sd;
721 		sd->sd_scsi_sync(&wu);
722 	}
723 	free(m, M_DEVBUF);
724 	return (0);
725 bad:
726 	return (1);
727 }
728 
729 int
730 sr_meta_read(struct sr_discipline *sd)
731 {
732 	struct sr_softc		*sc = sd->sd_sc;
733 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
734 	struct sr_metadata	*sm;
735 	struct sr_chunk		*ch_entry;
736 	struct sr_meta_chunk	*cp;
737 	struct sr_meta_driver	*s;
738 	void			*fm = NULL;
739 	int			no_disk = 0, got_meta = 0;
740 
741 	DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
742 
743 	sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
744 	s = &smd[sd->sd_meta_type];
745 	if (sd->sd_meta_type != SR_META_F_NATIVE)
746 		fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO);
747 
748 	cp = (struct sr_meta_chunk *)(sm + 1);
749 	SLIST_FOREACH(ch_entry, cl, src_link) {
750 		/* skip disks that are offline */
751 		if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) {
752 			DNPRINTF(SR_D_META,
753 			    "%s: %s chunk marked offline, spoofing status\n",
754 			    DEVNAME(sc), ch_entry->src_devname);
755 			cp++; /* adjust chunk pointer to match failure */
756 			continue;
757 		} else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
758 			/* read and translate */
759 			/* XXX mark chunk offline, elsewhere!! */
760 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
761 			cp++; /* adjust chunk pointer to match failure */
762 			DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
763 			    DEVNAME(sc));
764 			continue;
765 		}
766 
767 		if (sm->ssdi.ssd_magic != SR_MAGIC) {
768 			DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
769 			    DEVNAME(sc));
770 			continue;
771 		}
772 
773 		/* validate metadata */
774 		if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
775 			DNPRINTF(SR_D_META, "%s: invalid metadata\n",
776 			    DEVNAME(sc));
777 			no_disk = -1;
778 			goto done;
779 		}
780 
781 		/* assume first chunk contains metadata */
782 		if (got_meta == 0) {
783 			sr_meta_opt_load(sc, sm, &sd->sd_meta_opt);
784 			bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
785 			got_meta = 1;
786 		}
787 
788 		bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
789 
790 		no_disk++;
791 		cp++;
792 	}
793 
794 	free(sm, M_DEVBUF);
795 	if (fm)
796 		free(fm, M_DEVBUF);
797 
798 done:
799 	DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
800 	    no_disk);
801 	return (no_disk);
802 }
803 
804 void
805 sr_meta_opt_load(struct sr_softc *sc, struct sr_metadata *sm,
806     struct sr_meta_opt_head *som)
807 {
808 	struct sr_meta_opt_hdr	*omh;
809 	struct sr_meta_opt_item *omi;
810 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
811 	int			i;
812 
813 	/* Process optional metadata. */
814 	omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
815 	    sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
816 	for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
817 
818 		omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
819 		    M_WAITOK | M_ZERO);
820 		SLIST_INSERT_HEAD(som, omi, omi_link);
821 
822 		if (omh->som_length == 0) {
823 
824 			/* Load old fixed length optional metadata. */
825 			DNPRINTF(SR_D_META, "%s: old optional metadata of type "
826 			    "%u\n", DEVNAME(sc), omh->som_type);
827 
828 			/* Validate checksum. */
829 			sr_checksum(sc, (void *)omh, &checksum,
830 			    SR_OLD_META_OPT_SIZE - MD5_DIGEST_LENGTH);
831 			if (bcmp(&checksum, (void *)omh + SR_OLD_META_OPT_MD5,
832 			    sizeof(checksum)))
833 				panic("%s: invalid optional metadata "
834 				    "checksum", DEVNAME(sc));
835 
836 			/* Determine correct length. */
837 			switch (omh->som_type) {
838 			case SR_OPT_CRYPTO:
839 				omh->som_length = sizeof(struct sr_meta_crypto);
840 				break;
841 			case SR_OPT_BOOT:
842 				omh->som_length = sizeof(struct sr_meta_boot);
843 				break;
844 			case SR_OPT_KEYDISK:
845 				omh->som_length =
846 				    sizeof(struct sr_meta_keydisk);
847 				break;
848 			default:
849 				panic("unknown old optional metadata "
850 				    "type %u\n", omh->som_type);
851 			}
852 
853 			omi->omi_som = malloc(omh->som_length, M_DEVBUF,
854 			    M_WAITOK | M_ZERO);
855 			bcopy((u_int8_t *)omh + SR_OLD_META_OPT_OFFSET,
856 			    (u_int8_t *)omi->omi_som + sizeof(*omi->omi_som),
857 			    omh->som_length - sizeof(*omi->omi_som));
858 			omi->omi_som->som_type = omh->som_type;
859 			omi->omi_som->som_length = omh->som_length;
860 
861 			omh = (struct sr_meta_opt_hdr *)((void *)omh +
862 			    SR_OLD_META_OPT_SIZE);
863 		} else {
864 
865 			/* Load variable length optional metadata. */
866 			DNPRINTF(SR_D_META, "%s: optional metadata of type %u, "
867 			    "length %u\n", DEVNAME(sc), omh->som_type,
868 			    omh->som_length);
869 			omi->omi_som = malloc(omh->som_length, M_DEVBUF,
870 			    M_WAITOK | M_ZERO);
871 			bcopy(omh, omi->omi_som, omh->som_length);
872 
873 			/* Validate checksum. */
874 			bcopy(&omi->omi_som->som_checksum, &checksum,
875 			    MD5_DIGEST_LENGTH);
876 			bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
877 			sr_checksum(sc, omi->omi_som,
878 			    &omi->omi_som->som_checksum, omh->som_length);
879 			if (bcmp(&checksum, &omi->omi_som->som_checksum,
880 			    sizeof(checksum)))
881 				panic("%s: invalid optional metadata checksum",
882 				    DEVNAME(sc));
883 
884 			omh = (struct sr_meta_opt_hdr *)((void *)omh +
885 			    omh->som_length);
886 		}
887 	}
888 }
889 
890 int
891 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
892     void *fm)
893 {
894 	struct sr_softc		*sc = sd->sd_sc;
895 	struct sr_meta_driver	*s;
896 #ifdef SR_DEBUG
897 	struct sr_meta_chunk	*mc;
898 #endif
899 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
900 	char			devname[32];
901 	int			rv = 1;
902 
903 	DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
904 
905 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
906 
907 	s = &smd[sd->sd_meta_type];
908 	if (sd->sd_meta_type != SR_META_F_NATIVE)
909 		if (s->smd_validate(sd, sm, fm)) {
910 			sr_error(sc, "invalid foreign metadata");
911 			goto done;
912 		}
913 
914 	/*
915 	 * at this point all foreign metadata has been translated to the native
916 	 * format and will be treated just like the native format
917 	 */
918 
919 	if (sm->ssdi.ssd_magic != SR_MAGIC) {
920 		sr_error(sc, "not valid softraid metadata");
921 		goto done;
922 	}
923 
924 	/* Verify metadata checksum. */
925 	sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
926 	if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
927 		sr_error(sc, "invalid metadata checksum");
928 		goto done;
929 	}
930 
931 	/* Handle changes between versions. */
932 	if (sm->ssdi.ssd_version == 3) {
933 
934 		/*
935 		 * Version 3 - update metadata version and fix up data offset
936 		 * value since this did not exist in version 3.
937 		 */
938 		if (sm->ssd_data_offset == 0)
939 			sm->ssd_data_offset = SR_META_V3_DATA_OFFSET;
940 
941 	} else if (sm->ssdi.ssd_version == 4) {
942 
943 		/*
944 		 * Version 4 - original metadata format did not store
945 		 * data offset so fix this up if necessary.
946 		 */
947 		if (sm->ssd_data_offset == 0)
948 			sm->ssd_data_offset = SR_DATA_OFFSET;
949 
950 	} else if (sm->ssdi.ssd_version == SR_META_VERSION) {
951 
952 		/*
953 		 * Version 5 - variable length optional metadata. Migration
954 		 * from earlier fixed length optional metadata is handled
955 		 * in sr_meta_read().
956 		 */
957 
958 	} else {
959 
960 		sr_error(sc, "cannot read metadata version %u on %s, "
961 		    "expected version %u or earlier",
962 		    sm->ssdi.ssd_version, devname, SR_META_VERSION);
963 		goto done;
964 
965 	}
966 
967 	/* Update version number and revision string. */
968 	sm->ssdi.ssd_version = SR_META_VERSION;
969 	snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
970 	    "%03d", SR_META_VERSION);
971 
972 #ifdef SR_DEBUG
973 	/* warn if disk changed order */
974 	mc = (struct sr_meta_chunk *)(sm + 1);
975 	if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
976 	    sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
977 		DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n",
978 		    DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname,
979 		    devname);
980 #endif
981 
982 	/* we have meta data on disk */
983 	DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
984 	    DEVNAME(sc), devname);
985 
986 	rv = 0;
987 done:
988 	return (rv);
989 }
990 
991 int
992 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
993     struct sr_boot_chunk_head *bch)
994 {
995 	struct vnode		*vn;
996 	struct disklabel	label;
997 	struct sr_metadata	*md = NULL;
998 	struct sr_discipline	*fake_sd = NULL;
999 	struct sr_boot_chunk	*bc;
1000 	char			devname[32];
1001 	dev_t			chrdev, rawdev;
1002 	int			error, i;
1003 	int			rv = SR_META_NOTCLAIMED;
1004 
1005 	DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
1006 
1007 	/*
1008 	 * Use character raw device to avoid SCSI complaints about missing
1009 	 * media on removable media devices.
1010 	 */
1011 	chrdev = blktochr(devno);
1012 	rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART);
1013 	if (cdevvp(rawdev, &vn)) {
1014 		sr_error(sc, "sr_meta_native_bootprobe: cannot allocate vnode");
1015 		goto done;
1016 	}
1017 
1018 	/* open device */
1019 	error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1020 	if (error) {
1021 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
1022 		    "failed\n", DEVNAME(sc));
1023 		vput(vn);
1024 		goto done;
1025 	}
1026 
1027 	/* get disklabel */
1028 	error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED,
1029 	    curproc);
1030 	if (error) {
1031 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
1032 		    "failed\n", DEVNAME(sc));
1033 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1034 		vput(vn);
1035 		goto done;
1036 	}
1037 
1038 	/* we are done, close device */
1039 	error = VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1040 	if (error) {
1041 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
1042 		    "failed\n", DEVNAME(sc));
1043 		vput(vn);
1044 		goto done;
1045 	}
1046 	vput(vn);
1047 
1048 	/* Make sure this is a 512-byte/sector device. */
1049 	if (label.d_secsize != DEV_BSIZE) {
1050 		DNPRINTF(SR_D_META, "%s: %s has unsupported sector size (%d)",
1051 		    DEVNAME(sc), devname, label.d_secsize);
1052 		goto done;
1053 	}
1054 
1055 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
1056 	if (md == NULL) {
1057 		sr_error(sc, "not enough memory for metadata buffer");
1058 		goto done;
1059 	}
1060 
1061 	/* create fake sd to use utility functions */
1062 	fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF,
1063 	    M_ZERO | M_NOWAIT);
1064 	if (fake_sd == NULL) {
1065 		sr_error(sc, "not enough memory for fake discipline");
1066 		goto done;
1067 	}
1068 	fake_sd->sd_sc = sc;
1069 	fake_sd->sd_meta_type = SR_META_F_NATIVE;
1070 
1071 	for (i = 0; i < MAXPARTITIONS; i++) {
1072 		if (label.d_partitions[i].p_fstype != FS_RAID)
1073 			continue;
1074 
1075 		/* open partition */
1076 		rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i);
1077 		if (bdevvp(rawdev, &vn)) {
1078 			sr_error(sc, "sr_meta_native_bootprobe: cannot "
1079 			    "allocate vnode for partition");
1080 			goto done;
1081 		}
1082 		error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1083 		if (error) {
1084 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
1085 			    "open failed, partition %d\n",
1086 			    DEVNAME(sc), i);
1087 			vput(vn);
1088 			continue;
1089 		}
1090 
1091 		if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) {
1092 			sr_error(sc, "native bootprobe could not read native "
1093 			    "metadata");
1094 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1095 			vput(vn);
1096 			continue;
1097 		}
1098 
1099 		/* are we a softraid partition? */
1100 		if (md->ssdi.ssd_magic != SR_MAGIC) {
1101 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1102 			vput(vn);
1103 			continue;
1104 		}
1105 
1106 		sr_meta_getdevname(sc, rawdev, devname, sizeof(devname));
1107 		if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) {
1108 			if (md->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE) {
1109 				DNPRINTF(SR_D_META, "%s: don't save %s\n",
1110 				    DEVNAME(sc), devname);
1111 			} else {
1112 				/* XXX fix M_WAITOK, this is boot time */
1113 				bc = malloc(sizeof(struct sr_boot_chunk),
1114 				    M_DEVBUF, M_WAITOK | M_ZERO);
1115 				bc->sbc_metadata =
1116 				    malloc(sizeof(struct sr_metadata),
1117 				    M_DEVBUF, M_WAITOK | M_ZERO);
1118 				bcopy(md, bc->sbc_metadata,
1119 				    sizeof(struct sr_metadata));
1120 				bc->sbc_mm = rawdev;
1121 				SLIST_INSERT_HEAD(bch, bc, sbc_link);
1122 				rv = SR_META_CLAIMED;
1123 			}
1124 		}
1125 
1126 		/* we are done, close partition */
1127 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1128 		vput(vn);
1129 	}
1130 
1131 done:
1132 	if (fake_sd)
1133 		free(fake_sd, M_DEVBUF);
1134 	if (md)
1135 		free(md, M_DEVBUF);
1136 
1137 	return (rv);
1138 }
1139 
1140 int
1141 sr_boot_assembly(struct sr_softc *sc)
1142 {
1143 	struct sr_boot_volume_head bvh;
1144 	struct sr_boot_chunk_head bch, kdh;
1145 	struct sr_boot_volume	*bv, *bv1, *bv2;
1146 	struct sr_boot_chunk	*bc, *bcnext, *bc1, *bc2;
1147 	struct sr_disk_head	sdklist;
1148 	struct sr_disk		*sdk;
1149 	struct disk		*dk;
1150 	struct bioc_createraid	bcr;
1151 	struct sr_meta_chunk	*hm;
1152 	struct sr_chunk_head	*cl;
1153 	struct sr_chunk		*hotspare, *chunk, *last;
1154 	u_int64_t		*ondisk = NULL;
1155 	dev_t			*devs = NULL;
1156 	char			devname[32];
1157 	int			rv = 0, i;
1158 
1159 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
1160 
1161 	SLIST_INIT(&sdklist);
1162 	SLIST_INIT(&bvh);
1163 	SLIST_INIT(&bch);
1164 	SLIST_INIT(&kdh);
1165 
1166 	dk = TAILQ_FIRST(&disklist);
1167 	while (dk != TAILQ_END(&disklist)) {
1168 
1169 		/* See if this disk has been checked. */
1170 		SLIST_FOREACH(sdk, &sdklist, sdk_link)
1171 			if (sdk->sdk_devno == dk->dk_devno)
1172 				break;
1173 
1174 		if (sdk != NULL || dk->dk_devno == NODEV) {
1175 			dk = TAILQ_NEXT(dk, dk_link);
1176 			continue;
1177 		}
1178 
1179 		/* Add this disk to the list that we've checked. */
1180 		sdk = malloc(sizeof(struct sr_disk), M_DEVBUF,
1181 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1182 		if (sdk == NULL)
1183 			goto unwind;
1184 		sdk->sdk_devno = dk->dk_devno;
1185 		SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link);
1186 
1187 		/* Only check sd(4) and wd(4) devices. */
1188 		if (strncmp(dk->dk_name, "sd", 2) &&
1189 		    strncmp(dk->dk_name, "wd", 2)) {
1190 			dk = TAILQ_NEXT(dk, dk_link);
1191 			continue;
1192 		}
1193 
1194 		/* native softraid uses partitions */
1195 		sr_meta_native_bootprobe(sc, dk->dk_devno, &bch);
1196 
1197 		/* probe non-native disks if native failed. */
1198 
1199 		/* Restart scan since we may have slept. */
1200 		dk = TAILQ_FIRST(&disklist);
1201 	}
1202 
1203 	/*
1204 	 * Create a list of volumes and associate chunks with each volume.
1205 	 */
1206 	for (bc = SLIST_FIRST(&bch); bc != SLIST_END(&bch); bc = bcnext) {
1207 
1208 		bcnext = SLIST_NEXT(bc, sbc_link);
1209 		SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link);
1210 		bc->sbc_chunk_id = bc->sbc_metadata->ssdi.ssd_chunk_id;
1211 
1212 		/* Handle key disks separately. */
1213 		if (bc->sbc_metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) {
1214 			SLIST_INSERT_HEAD(&kdh, bc, sbc_link);
1215 			continue;
1216 		}
1217 
1218 		SLIST_FOREACH(bv, &bvh, sbv_link) {
1219 			if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid,
1220 			    &bv->sbv_uuid,
1221 			    sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) == 0)
1222 				break;
1223 		}
1224 
1225 		if (bv == NULL) {
1226 			bv = malloc(sizeof(struct sr_boot_volume),
1227 			    M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO);
1228 			if (bv == NULL) {
1229 				sr_error(sc, "failed to allocate boot volume");
1230 				goto unwind;
1231 			}
1232 
1233 			bv->sbv_level = bc->sbc_metadata->ssdi.ssd_level;
1234 			bv->sbv_volid = bc->sbc_metadata->ssdi.ssd_volid;
1235 			bv->sbv_chunk_no = bc->sbc_metadata->ssdi.ssd_chunk_no;
1236 			bcopy(&bc->sbc_metadata->ssdi.ssd_uuid, &bv->sbv_uuid,
1237 			    sizeof(bc->sbc_metadata->ssdi.ssd_uuid));
1238 			SLIST_INIT(&bv->sbv_chunks);
1239 
1240 			/* Maintain volume order. */
1241 			bv2 = NULL;
1242 			SLIST_FOREACH(bv1, &bvh, sbv_link) {
1243 				if (bv1->sbv_volid > bv->sbv_volid)
1244 					break;
1245 				bv2 = bv1;
1246 			}
1247 			if (bv2 == NULL) {
1248 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1249 				    "at head\n", DEVNAME(sc), bv->sbv_volid);
1250 				SLIST_INSERT_HEAD(&bvh, bv, sbv_link);
1251 			} else {
1252 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1253 				    "after %u\n", DEVNAME(sc), bv->sbv_volid,
1254 				    bv2->sbv_volid);
1255 				SLIST_INSERT_AFTER(bv2, bv, sbv_link);
1256 			}
1257 		}
1258 
1259 		/* Maintain chunk order. */
1260 		bc2 = NULL;
1261 		SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) {
1262 			if (bc1->sbc_chunk_id > bc->sbc_chunk_id)
1263 				break;
1264 			bc2 = bc1;
1265 		}
1266 		if (bc2 == NULL) {
1267 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1268 			    "at head\n", DEVNAME(sc), bv->sbv_volid,
1269 			    bc->sbc_chunk_id);
1270 			SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link);
1271 		} else {
1272 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1273 			    "after %u\n", DEVNAME(sc), bv->sbv_volid,
1274 			    bc->sbc_chunk_id, bc2->sbc_chunk_id);
1275 			SLIST_INSERT_AFTER(bc2, bc, sbc_link);
1276 		}
1277 
1278 		bv->sbv_chunks_found++;
1279 	}
1280 
1281 	/* Allocate memory for device and ondisk version arrays. */
1282 	devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF,
1283 	    M_NOWAIT | M_CANFAIL);
1284 	if (devs == NULL) {
1285 		printf("%s: failed to allocate device array\n", DEVNAME(sc));
1286 		goto unwind;
1287 	}
1288 	ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF,
1289 	    M_NOWAIT | M_CANFAIL);
1290 	if (ondisk == NULL) {
1291 		printf("%s: failed to allocate ondisk array\n", DEVNAME(sc));
1292 		goto unwind;
1293 	}
1294 
1295 	/*
1296 	 * Assemble hotspare "volumes".
1297 	 */
1298 	SLIST_FOREACH(bv, &bvh, sbv_link) {
1299 
1300 		/* Check if this is a hotspare "volume". */
1301 		if (bv->sbv_level != SR_HOTSPARE_LEVEL ||
1302 		    bv->sbv_chunk_no != 1)
1303 			continue;
1304 
1305 #ifdef SR_DEBUG
1306 		DNPRINTF(SR_D_META, "%s: assembling hotspare volume ",
1307 		    DEVNAME(sc));
1308 		if (sr_debug & SR_D_META)
1309 			sr_uuid_print(&bv->sbv_uuid, 0);
1310 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1311 		    bv->sbv_volid, bv->sbv_chunk_no);
1312 #endif
1313 
1314 		/* Create hotspare chunk metadata. */
1315 		hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF,
1316 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1317 		if (hotspare == NULL) {
1318 			printf("%s: failed to allocate hotspare\n",
1319 			    DEVNAME(sc));
1320 			goto unwind;
1321 		}
1322 
1323 		bc = SLIST_FIRST(&bv->sbv_chunks);
1324 		sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname));
1325 		hotspare->src_dev_mm = bc->sbc_mm;
1326 		strlcpy(hotspare->src_devname, devname,
1327 		    sizeof(hotspare->src_devname));
1328 		hotspare->src_size = bc->sbc_metadata->ssdi.ssd_size;
1329 
1330 		hm = &hotspare->src_meta;
1331 		hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
1332 		hm->scmi.scm_chunk_id = 0;
1333 		hm->scmi.scm_size = bc->sbc_metadata->ssdi.ssd_size;
1334 		hm->scmi.scm_coerced_size = bc->sbc_metadata->ssdi.ssd_size;
1335 		strlcpy(hm->scmi.scm_devname, devname,
1336 		    sizeof(hm->scmi.scm_devname));
1337 		bcopy(&bc->sbc_metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid,
1338 		    sizeof(struct sr_uuid));
1339 
1340 		sr_checksum(sc, hm, &hm->scm_checksum,
1341 		    sizeof(struct sr_meta_chunk_invariant));
1342 
1343 		hm->scm_status = BIOC_SDHOTSPARE;
1344 
1345 		/* Add chunk to hotspare list. */
1346 		rw_enter_write(&sc->sc_hs_lock);
1347 		cl = &sc->sc_hotspare_list;
1348 		if (SLIST_EMPTY(cl))
1349 			SLIST_INSERT_HEAD(cl, hotspare, src_link);
1350 		else {
1351 			SLIST_FOREACH(chunk, cl, src_link)
1352 				last = chunk;
1353 			SLIST_INSERT_AFTER(last, hotspare, src_link);
1354 		}
1355 		sc->sc_hotspare_no++;
1356 		rw_exit_write(&sc->sc_hs_lock);
1357 
1358 	}
1359 
1360 	/*
1361 	 * Assemble RAID volumes.
1362 	 */
1363 	SLIST_FOREACH(bv, &bvh, sbv_link) {
1364 
1365 		bzero(&bc, sizeof(bc));
1366 
1367 		/* Check if this is a hotspare "volume". */
1368 		if (bv->sbv_level == SR_HOTSPARE_LEVEL &&
1369 		    bv->sbv_chunk_no == 1)
1370 			continue;
1371 
1372 #ifdef SR_DEBUG
1373 		DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc));
1374 		if (sr_debug & SR_D_META)
1375 			sr_uuid_print(&bv->sbv_uuid, 0);
1376 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1377 		    bv->sbv_volid, bv->sbv_chunk_no);
1378 #endif
1379 
1380 		/*
1381 		 * If this is a crypto volume, try to find a matching
1382 		 * key disk...
1383 		 */
1384 		bcr.bc_key_disk = NODEV;
1385 		if (bv->sbv_level == 'C') {
1386 			SLIST_FOREACH(bc, &kdh, sbc_link) {
1387 				if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid,
1388 				    &bv->sbv_uuid,
1389 				    sizeof(bc->sbc_metadata->ssdi.ssd_uuid))
1390 				    == 0)
1391 					bcr.bc_key_disk = bc->sbc_mm;
1392 			}
1393 		}
1394 
1395 		for (i = 0; i < BIOC_CRMAXLEN; i++) {
1396 			devs[i] = NODEV; /* mark device as illegal */
1397 			ondisk[i] = 0;
1398 		}
1399 
1400 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
1401 			if (devs[bc->sbc_chunk_id] != NODEV) {
1402 				bv->sbv_chunks_found--;
1403 				sr_meta_getdevname(sc, bc->sbc_mm, devname,
1404 				    sizeof(devname));
1405 				printf("%s: found duplicate chunk %u for "
1406 				    "volume %u on device %s\n", DEVNAME(sc),
1407 				    bc->sbc_chunk_id, bv->sbv_volid, devname);
1408 			}
1409 
1410 			if (devs[bc->sbc_chunk_id] == NODEV ||
1411 			    bc->sbc_metadata->ssd_ondisk >
1412 			    ondisk[bc->sbc_chunk_id]) {
1413 				devs[bc->sbc_chunk_id] = bc->sbc_mm;
1414 				ondisk[bc->sbc_chunk_id] =
1415 				    bc->sbc_metadata->ssd_ondisk;
1416 				DNPRINTF(SR_D_META, "%s: using ondisk "
1417 				    "metadata version %llu for chunk %u\n",
1418 				    DEVNAME(sc), ondisk[bc->sbc_chunk_id],
1419 				    bc->sbc_chunk_id);
1420 			}
1421 		}
1422 
1423 		if (bv->sbv_chunk_no != bv->sbv_chunks_found) {
1424 			printf("%s: not all chunks were provided; "
1425 			    "attempting to bring volume %d online\n",
1426 			    DEVNAME(sc), bv->sbv_volid);
1427 		}
1428 
1429 		bcr.bc_level = bv->sbv_level;
1430 		bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t);
1431 		bcr.bc_dev_list = devs;
1432 		bcr.bc_flags = BIOC_SCDEVT;
1433 
1434 		rw_enter_write(&sc->sc_lock);
1435 		bio_status_init(&sc->sc_status, &sc->sc_dev);
1436 		sr_ioctl_createraid(sc, &bcr, 0);
1437 		rw_exit_write(&sc->sc_lock);
1438 
1439 		rv++;
1440 	}
1441 
1442 	/* done with metadata */
1443 unwind:
1444 	/* Free boot volumes and associated chunks. */
1445 	for (bv1 = SLIST_FIRST(&bvh); bv1 != SLIST_END(&bvh); bv1 = bv2) {
1446 		bv2 = SLIST_NEXT(bv1, sbv_link);
1447 		for (bc1 = SLIST_FIRST(&bv1->sbv_chunks);
1448 		    bc1 != SLIST_END(&bv1->sbv_chunks); bc1 = bc2) {
1449 			bc2 = SLIST_NEXT(bc1, sbc_link);
1450 			if (bc1->sbc_metadata)
1451 				free(bc1->sbc_metadata, M_DEVBUF);
1452 			free(bc1, M_DEVBUF);
1453 		}
1454 		free(bv1, M_DEVBUF);
1455 	}
1456 	/* Free keydisks chunks. */
1457 	for (bc1 = SLIST_FIRST(&kdh); bc1 != SLIST_END(&kdh); bc1 = bc2) {
1458 		bc2 = SLIST_NEXT(bc1, sbc_link);
1459 		if (bc1->sbc_metadata)
1460 			free(bc1->sbc_metadata, M_DEVBUF);
1461 		free(bc1, M_DEVBUF);
1462 	}
1463 	/* Free unallocated chunks. */
1464 	for (bc1 = SLIST_FIRST(&bch); bc1 != SLIST_END(&bch); bc1 = bc2) {
1465 		bc2 = SLIST_NEXT(bc1, sbc_link);
1466 		if (bc1->sbc_metadata)
1467 			free(bc1->sbc_metadata, M_DEVBUF);
1468 		free(bc1, M_DEVBUF);
1469 	}
1470 
1471 	while (!SLIST_EMPTY(&sdklist)) {
1472 		sdk = SLIST_FIRST(&sdklist);
1473 		SLIST_REMOVE_HEAD(&sdklist, sdk_link);
1474 		free(sdk, M_DEVBUF);
1475 	}
1476 
1477 	if (devs)
1478 		free(devs, M_DEVBUF);
1479 	if (ondisk)
1480 		free(ondisk, M_DEVBUF);
1481 
1482 	return (rv);
1483 }
1484 
1485 void
1486 sr_map_root(void)
1487 {
1488 	struct sr_softc		*sc = softraid0;
1489 	struct sr_meta_opt_item	*omi;
1490 	struct sr_meta_boot	*sbm;
1491 	u_char			duid[8];
1492 	int			i, j;
1493 
1494 	if (sc == NULL)
1495 		return;
1496 
1497 	DNPRINTF(SR_D_MISC, "%s: sr_map_root\n", DEVNAME(sc));
1498 	bzero(duid, sizeof(duid));
1499 	if (bcmp(rootduid, duid, sizeof(duid)) == 0) {
1500 		DNPRINTF(SR_D_MISC, "%s: root duid is zero\n", DEVNAME(sc));
1501 		return;
1502 	}
1503 
1504 	for (i = 0; i < SR_MAX_LD; i++) {
1505 		if (sc->sc_dis[i] == NULL)
1506 			continue;
1507 		SLIST_FOREACH(omi, &sc->sc_dis[i]->sd_meta_opt, omi_link) {
1508 			if (omi->omi_som->som_type != SR_OPT_BOOT)
1509 				continue;
1510 			sbm = (struct sr_meta_boot *)omi->omi_som;
1511 			for (j = 0; j < SR_MAX_BOOT_DISKS; j++) {
1512 				if (bcmp(rootduid, sbm->sbm_boot_duid[j],
1513 				    sizeof(rootduid)) == 0) {
1514 					bcopy(sbm->sbm_root_duid, rootduid,
1515 					    sizeof(rootduid));
1516 					DNPRINTF(SR_D_MISC, "%s: root duid "
1517 					    "mapped to %02hx%02hx%02hx%02hx"
1518 					    "%02hx%02hx%02hx%02hx\n",
1519 					    DEVNAME(sc), rootduid[0],
1520 					    rootduid[1], rootduid[2],
1521 					    rootduid[3], rootduid[4],
1522 					    rootduid[5], rootduid[6],
1523 					    rootduid[7]);
1524 					return;
1525 				}
1526 			}
1527 		}
1528 	}
1529 }
1530 
1531 int
1532 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
1533 {
1534 	struct disklabel	label;
1535 	char			*devname;
1536 	int			error, part;
1537 	daddr64_t		size;
1538 
1539 	DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
1540 	   DEVNAME(sc), ch_entry->src_devname);
1541 
1542 	devname = ch_entry->src_devname;
1543 	part = DISKPART(ch_entry->src_dev_mm);
1544 
1545 	/* get disklabel */
1546 	error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD,
1547 	    NOCRED, curproc);
1548 	if (error) {
1549 		DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
1550 		    DEVNAME(sc), devname);
1551 		goto unwind;
1552 	}
1553 	bcopy(label.d_uid, ch_entry->src_duid, sizeof(ch_entry->src_duid));
1554 
1555 	/* Make sure this is a 512-byte/sector device. */
1556 	if (label.d_secsize != DEV_BSIZE) {
1557 		sr_error(sc, "%s has unsupported sector size (%d)",
1558 		    devname, label.d_secsize);
1559 		goto unwind;
1560 	}
1561 
1562 	/* make sure the partition is of the right type */
1563 	if (label.d_partitions[part].p_fstype != FS_RAID) {
1564 		DNPRINTF(SR_D_META,
1565 		    "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc),
1566 		    devname,
1567 		    label.d_partitions[part].p_fstype);
1568 		goto unwind;
1569 	}
1570 
1571 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
1572 	if (size <= 0) {
1573 		DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
1574 		    devname);
1575 		goto unwind;
1576 	}
1577 	ch_entry->src_size = size;
1578 
1579 	DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
1580 	    devname, size);
1581 
1582 	return (SR_META_F_NATIVE);
1583 unwind:
1584 	DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
1585 	    devname ? devname : "nodev");
1586 	return (SR_META_F_INVALID);
1587 }
1588 
1589 int
1590 sr_meta_native_attach(struct sr_discipline *sd, int force)
1591 {
1592 	struct sr_softc		*sc = sd->sd_sc;
1593 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
1594 	struct sr_metadata	*md = NULL;
1595 	struct sr_chunk		*ch_entry, *ch_next;
1596 	struct sr_uuid		uuid;
1597 	u_int64_t		version = 0;
1598 	int			sr, not_sr, rv = 1, d, expected = -1, old_meta = 0;
1599 
1600 	DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
1601 
1602 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
1603 	if (md == NULL) {
1604 		sr_error(sc, "not enough memory for metadata buffer");
1605 		goto bad;
1606 	}
1607 
1608 	bzero(&uuid, sizeof uuid);
1609 
1610 	sr = not_sr = d = 0;
1611 	SLIST_FOREACH(ch_entry, cl, src_link) {
1612 		if (ch_entry->src_dev_mm == NODEV)
1613 			continue;
1614 
1615 		if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
1616 			sr_error(sc, "could not read native metadata");
1617 			goto bad;
1618 		}
1619 
1620 		if (md->ssdi.ssd_magic == SR_MAGIC) {
1621 			sr++;
1622 			ch_entry->src_meta.scmi.scm_chunk_id =
1623 			    md->ssdi.ssd_chunk_id;
1624 			if (d == 0) {
1625 				bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
1626 				expected = md->ssdi.ssd_chunk_no;
1627 				version = md->ssd_ondisk;
1628 				d++;
1629 				continue;
1630 			} else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
1631 			    sizeof uuid)) {
1632 				sr_error(sc, "not part of the same volume");
1633 				goto bad;
1634 			}
1635 			if (md->ssd_ondisk != version) {
1636 				old_meta++;
1637 				version = MAX(md->ssd_ondisk, version);
1638 			}
1639 		} else
1640 			not_sr++;
1641 	}
1642 
1643 	if (sr && not_sr) {
1644 		sr_error(sc, "not all chunks are of the native metadata "
1645 		    "format");
1646 		goto bad;
1647 	}
1648 
1649 	/* mixed metadata versions; mark bad disks offline */
1650 	if (old_meta) {
1651 		d = 0;
1652 		for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl);
1653 		    ch_entry = ch_next, d++) {
1654 			ch_next = SLIST_NEXT(ch_entry, src_link);
1655 
1656 			/* XXX do we want to read this again? */
1657 			if (ch_entry->src_dev_mm == NODEV)
1658 				panic("src_dev_mm == NODEV");
1659 			if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md,
1660 			    NULL))
1661 				sr_warn(sc, "could not read native metadata");
1662 			if (md->ssd_ondisk != version)
1663 				sd->sd_vol.sv_chunks[d]->src_meta.scm_status =
1664 				    BIOC_SDOFFLINE;
1665 		}
1666 	}
1667 
1668 	if (expected != sr && !force && expected != -1) {
1669 		DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying "
1670 		    "anyway\n", DEVNAME(sc));
1671 	}
1672 
1673 	rv = 0;
1674 bad:
1675 	if (md)
1676 		free(md, M_DEVBUF);
1677 	return (rv);
1678 }
1679 
1680 int
1681 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
1682     struct sr_metadata *md, void *fm)
1683 {
1684 #ifdef SR_DEBUG
1685 	struct sr_softc		*sc = sd->sd_sc;
1686 #endif
1687 	DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
1688 	    DEVNAME(sc), dev, md);
1689 
1690 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1691 	    B_READ));
1692 }
1693 
1694 int
1695 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
1696     struct sr_metadata *md, void *fm)
1697 {
1698 #ifdef SR_DEBUG
1699 	struct sr_softc		*sc = sd->sd_sc;
1700 #endif
1701 	DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
1702 	    DEVNAME(sc), dev, md);
1703 
1704 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1705 	    B_WRITE));
1706 }
1707 
1708 void
1709 sr_hotplug_register(struct sr_discipline *sd, void *func)
1710 {
1711 	struct sr_hotplug_list	*mhe;
1712 
1713 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n",
1714 	    DEVNAME(sd->sd_sc), func);
1715 
1716 	/* make sure we aren't on the list yet */
1717 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1718 		if (mhe->sh_hotplug == func)
1719 			return;
1720 
1721 	mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF,
1722 	    M_WAITOK | M_ZERO);
1723 	mhe->sh_hotplug = func;
1724 	mhe->sh_sd = sd;
1725 	SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link);
1726 }
1727 
1728 void
1729 sr_hotplug_unregister(struct sr_discipline *sd, void *func)
1730 {
1731 	struct sr_hotplug_list	*mhe;
1732 
1733 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n",
1734 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func);
1735 
1736 	/* make sure we are on the list yet */
1737 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1738 		if (mhe->sh_hotplug == func) {
1739 			SLIST_REMOVE(&sr_hotplug_callbacks, mhe,
1740 			    sr_hotplug_list, shl_link);
1741 			free(mhe, M_DEVBUF);
1742 			if (SLIST_EMPTY(&sr_hotplug_callbacks))
1743 				SLIST_INIT(&sr_hotplug_callbacks);
1744 			return;
1745 		}
1746 }
1747 
1748 void
1749 sr_disk_attach(struct disk *diskp, int action)
1750 {
1751 	struct sr_hotplug_list	*mhe;
1752 
1753 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1754 		if (mhe->sh_sd->sd_ready)
1755 			mhe->sh_hotplug(mhe->sh_sd, diskp, action);
1756 }
1757 
1758 int
1759 sr_match(struct device *parent, void *match, void *aux)
1760 {
1761 	return (1);
1762 }
1763 
1764 void
1765 sr_attach(struct device *parent, struct device *self, void *aux)
1766 {
1767 	struct sr_softc		*sc = (void *)self;
1768 	struct scsibus_attach_args saa;
1769 
1770 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
1771 
1772 	if (softraid0 == NULL)
1773 		softraid0 = sc;
1774 
1775 	rw_init(&sc->sc_lock, "sr_lock");
1776 	rw_init(&sc->sc_hs_lock, "sr_hs_lock");
1777 
1778 	SLIST_INIT(&sr_hotplug_callbacks);
1779 	SLIST_INIT(&sc->sc_hotspare_list);
1780 
1781 #if NBIO > 0
1782 	if (bio_register(&sc->sc_dev, sr_bio_ioctl) != 0)
1783 		printf("%s: controller registration failed", DEVNAME(sc));
1784 #endif /* NBIO > 0 */
1785 
1786 #ifndef SMALL_KERNEL
1787 	strlcpy(sc->sc_sensordev.xname, DEVNAME(sc),
1788 	    sizeof(sc->sc_sensordev.xname));
1789 	sensordev_install(&sc->sc_sensordev);
1790 #endif /* SMALL_KERNEL */
1791 
1792 	printf("\n");
1793 
1794 	sc->sc_link.adapter_softc = sc;
1795 	sc->sc_link.adapter = &sr_switch;
1796 	sc->sc_link.adapter_target = SR_MAX_LD;
1797 	sc->sc_link.adapter_buswidth = SR_MAX_LD;
1798 	sc->sc_link.luns = 1;
1799 
1800 	bzero(&saa, sizeof(saa));
1801 	saa.saa_sc_link = &sc->sc_link;
1802 
1803 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
1804 	    &saa, scsiprint);
1805 
1806 	softraid_disk_attach = sr_disk_attach;
1807 
1808 	sc->sc_shutdownhook = shutdownhook_establish(sr_shutdownhook, sc);
1809 
1810 	sr_boot_assembly(sc);
1811 }
1812 
1813 int
1814 sr_detach(struct device *self, int flags)
1815 {
1816 	struct sr_softc		*sc = (void *)self;
1817 	int			rv;
1818 
1819 	DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc));
1820 
1821 	if (sc->sc_shutdownhook)
1822 		shutdownhook_disestablish(sc->sc_shutdownhook);
1823 
1824 	softraid_disk_attach = NULL;
1825 
1826 	sr_shutdown(sc);
1827 
1828 #ifndef SMALL_KERNEL
1829 	if (sc->sc_sensor_task != NULL)
1830 		sensor_task_unregister(sc->sc_sensor_task);
1831 	sensordev_deinstall(&sc->sc_sensordev);
1832 #endif /* SMALL_KERNEL */
1833 
1834 	if (sc->sc_scsibus != NULL) {
1835 		rv = config_detach((struct device *)sc->sc_scsibus, flags);
1836 		if (rv != 0)
1837 			return (rv);
1838 		sc->sc_scsibus = NULL;
1839 	}
1840 
1841 	return (rv);
1842 }
1843 
1844 void
1845 sr_info(struct sr_softc *sc, const char *fmt, ...)
1846 {
1847 	va_list			ap;
1848 
1849 	rw_assert_wrlock(&sc->sc_lock);
1850 
1851 	va_start(ap, fmt);
1852 	bio_status(&sc->sc_status, 0, BIO_MSG_INFO, fmt, &ap);
1853 	va_end(ap);
1854 }
1855 
1856 void
1857 sr_warn(struct sr_softc *sc, const char *fmt, ...)
1858 {
1859 	va_list			ap;
1860 
1861 	rw_assert_wrlock(&sc->sc_lock);
1862 
1863 	va_start(ap, fmt);
1864 	bio_status(&sc->sc_status, 1, BIO_MSG_WARN, fmt, &ap);
1865 	va_end(ap);
1866 }
1867 
1868 void
1869 sr_error(struct sr_softc *sc, const char *fmt, ...)
1870 {
1871 	va_list			ap;
1872 
1873 	rw_assert_wrlock(&sc->sc_lock);
1874 
1875 	va_start(ap, fmt);
1876 	bio_status(&sc->sc_status, 1, BIO_MSG_ERROR, fmt, &ap);
1877 	va_end(ap);
1878 }
1879 
1880 void
1881 sr_minphys(struct buf *bp, struct scsi_link *sl)
1882 {
1883 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
1884 
1885 	/* XXX currently using SR_MAXFER = MAXPHYS */
1886 	if (bp->b_bcount > SR_MAXFER)
1887 		bp->b_bcount = SR_MAXFER;
1888 	minphys(bp);
1889 }
1890 
1891 void
1892 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
1893 {
1894 	size_t			copy_cnt;
1895 
1896 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
1897 	    xs, size);
1898 
1899 	if (xs->datalen) {
1900 		copy_cnt = MIN(size, xs->datalen);
1901 		bcopy(v, xs->data, copy_cnt);
1902 	}
1903 }
1904 
1905 int
1906 sr_ccb_alloc(struct sr_discipline *sd)
1907 {
1908 	struct sr_ccb		*ccb;
1909 	int			i;
1910 
1911 	if (!sd)
1912 		return (1);
1913 
1914 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
1915 
1916 	if (sd->sd_ccb)
1917 		return (1);
1918 
1919 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
1920 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
1921 	TAILQ_INIT(&sd->sd_ccb_freeq);
1922 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
1923 		ccb = &sd->sd_ccb[i];
1924 		ccb->ccb_dis = sd;
1925 		sr_ccb_put(ccb);
1926 	}
1927 
1928 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
1929 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
1930 
1931 	return (0);
1932 }
1933 
1934 void
1935 sr_ccb_free(struct sr_discipline *sd)
1936 {
1937 	struct sr_ccb		*ccb;
1938 
1939 	if (!sd)
1940 		return;
1941 
1942 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
1943 
1944 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
1945 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1946 
1947 	if (sd->sd_ccb)
1948 		free(sd->sd_ccb, M_DEVBUF);
1949 }
1950 
1951 struct sr_ccb *
1952 sr_ccb_get(struct sr_discipline *sd)
1953 {
1954 	struct sr_ccb		*ccb;
1955 	int			s;
1956 
1957 	s = splbio();
1958 
1959 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
1960 	if (ccb) {
1961 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1962 		ccb->ccb_state = SR_CCB_INPROGRESS;
1963 	}
1964 
1965 	splx(s);
1966 
1967 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
1968 	    ccb);
1969 
1970 	return (ccb);
1971 }
1972 
1973 void
1974 sr_ccb_put(struct sr_ccb *ccb)
1975 {
1976 	struct sr_discipline	*sd = ccb->ccb_dis;
1977 	int			s;
1978 
1979 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
1980 	    ccb);
1981 
1982 	s = splbio();
1983 
1984 	ccb->ccb_wu = NULL;
1985 	ccb->ccb_state = SR_CCB_FREE;
1986 	ccb->ccb_target = -1;
1987 	ccb->ccb_opaque = NULL;
1988 
1989 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
1990 
1991 	splx(s);
1992 }
1993 
1994 int
1995 sr_wu_alloc(struct sr_discipline *sd)
1996 {
1997 	struct sr_workunit	*wu;
1998 	int			i, no_wu;
1999 
2000 	if (!sd)
2001 		return (1);
2002 
2003 	DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
2004 	    sd, sd->sd_max_wu);
2005 
2006 	if (sd->sd_wu)
2007 		return (1);
2008 
2009 	no_wu = sd->sd_max_wu;
2010 	sd->sd_wu_pending = no_wu;
2011 
2012 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
2013 	    M_DEVBUF, M_WAITOK | M_ZERO);
2014 	TAILQ_INIT(&sd->sd_wu_freeq);
2015 	TAILQ_INIT(&sd->sd_wu_pendq);
2016 	TAILQ_INIT(&sd->sd_wu_defq);
2017 	for (i = 0; i < no_wu; i++) {
2018 		wu = &sd->sd_wu[i];
2019 		wu->swu_dis = sd;
2020 		sr_wu_put(sd, wu);
2021 	}
2022 
2023 	return (0);
2024 }
2025 
2026 void
2027 sr_wu_free(struct sr_discipline *sd)
2028 {
2029 	struct sr_workunit	*wu;
2030 
2031 	if (!sd)
2032 		return;
2033 
2034 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
2035 
2036 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
2037 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
2038 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
2039 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
2040 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
2041 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
2042 
2043 	if (sd->sd_wu)
2044 		free(sd->sd_wu, M_DEVBUF);
2045 }
2046 
2047 void
2048 sr_wu_put(void *xsd, void *xwu)
2049 {
2050 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
2051 	struct sr_workunit	*wu = (struct sr_workunit *)xwu;
2052 	struct sr_ccb		*ccb;
2053 
2054 	int			s;
2055 
2056 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
2057 
2058 	s = splbio();
2059 	if (wu->swu_cb_active == 1)
2060 		panic("%s: sr_wu_put got active wu", DEVNAME(sd->sd_sc));
2061 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
2062 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
2063 		sr_ccb_put(ccb);
2064 	}
2065 	splx(s);
2066 
2067 	bzero(wu, sizeof(*wu));
2068 	TAILQ_INIT(&wu->swu_ccb);
2069 	wu->swu_dis = sd;
2070 
2071 	mtx_enter(&sd->sd_wu_mtx);
2072 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
2073 	sd->sd_wu_pending--;
2074 	mtx_leave(&sd->sd_wu_mtx);
2075 }
2076 
2077 void *
2078 sr_wu_get(void *xsd)
2079 {
2080 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
2081 	struct sr_workunit	*wu;
2082 
2083 	mtx_enter(&sd->sd_wu_mtx);
2084 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
2085 	if (wu) {
2086 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
2087 		sd->sd_wu_pending++;
2088 	}
2089 	mtx_leave(&sd->sd_wu_mtx);
2090 
2091 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
2092 
2093 	return (wu);
2094 }
2095 
2096 void
2097 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs)
2098 {
2099 	DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs);
2100 
2101 	scsi_done(xs);
2102 }
2103 
2104 void
2105 sr_scsi_cmd(struct scsi_xfer *xs)
2106 {
2107 	int			s;
2108 	struct scsi_link	*link = xs->sc_link;
2109 	struct sr_softc		*sc = link->adapter_softc;
2110 	struct sr_workunit	*wu = NULL;
2111 	struct sr_discipline	*sd;
2112 	struct sr_ccb		*ccb;
2113 
2114 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: target %d xs: %p "
2115 	    "flags: %#x\n", DEVNAME(sc), link->target, xs, xs->flags);
2116 
2117 	sd = sc->sc_dis[link->target];
2118 	if (sd == NULL) {
2119 		printf("%s: sr_scsi_cmd NULL discipline\n", DEVNAME(sc));
2120 		goto stuffup;
2121 	}
2122 
2123 	if (sd->sd_deleted) {
2124 		printf("%s: %s device is being deleted, failing io\n",
2125 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
2126 		goto stuffup;
2127 	}
2128 
2129 	wu = xs->io;
2130 	/* scsi layer *can* re-send wu without calling sr_wu_put(). */
2131 	s = splbio();
2132 	if (wu->swu_cb_active == 1)
2133 		panic("%s: sr_scsi_cmd got active wu", DEVNAME(sd->sd_sc));
2134 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
2135 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
2136 		sr_ccb_put(ccb);
2137 	}
2138 	splx(s);
2139 
2140 	bzero(wu, sizeof(*wu));
2141 	TAILQ_INIT(&wu->swu_ccb);
2142 	wu->swu_state = SR_WU_INPROGRESS;
2143 	wu->swu_dis = sd;
2144 	wu->swu_xs = xs;
2145 
2146 	switch (xs->cmd->opcode) {
2147 	case READ_COMMAND:
2148 	case READ_BIG:
2149 	case READ_16:
2150 	case WRITE_COMMAND:
2151 	case WRITE_BIG:
2152 	case WRITE_16:
2153 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
2154 		    DEVNAME(sc), xs->cmd->opcode);
2155 		if (sd->sd_scsi_rw(wu))
2156 			goto stuffup;
2157 		break;
2158 
2159 	case SYNCHRONIZE_CACHE:
2160 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
2161 		    DEVNAME(sc));
2162 		if (sd->sd_scsi_sync(wu))
2163 			goto stuffup;
2164 		goto complete;
2165 
2166 	case TEST_UNIT_READY:
2167 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
2168 		    DEVNAME(sc));
2169 		if (sd->sd_scsi_tur(wu))
2170 			goto stuffup;
2171 		goto complete;
2172 
2173 	case START_STOP:
2174 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
2175 		    DEVNAME(sc));
2176 		if (sd->sd_scsi_start_stop(wu))
2177 			goto stuffup;
2178 		goto complete;
2179 
2180 	case INQUIRY:
2181 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
2182 		    DEVNAME(sc));
2183 		if (sd->sd_scsi_inquiry(wu))
2184 			goto stuffup;
2185 		goto complete;
2186 
2187 	case READ_CAPACITY:
2188 	case READ_CAPACITY_16:
2189 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
2190 		    DEVNAME(sc), xs->cmd->opcode);
2191 		if (sd->sd_scsi_read_cap(wu))
2192 			goto stuffup;
2193 		goto complete;
2194 
2195 	case REQUEST_SENSE:
2196 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
2197 		    DEVNAME(sc));
2198 		if (sd->sd_scsi_req_sense(wu))
2199 			goto stuffup;
2200 		goto complete;
2201 
2202 	default:
2203 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
2204 		    DEVNAME(sc), xs->cmd->opcode);
2205 		/* XXX might need to add generic function to handle others */
2206 		goto stuffup;
2207 	}
2208 
2209 	return;
2210 stuffup:
2211 	if (sd && sd->sd_scsi_sense.error_code) {
2212 		xs->error = XS_SENSE;
2213 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
2214 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2215 	} else {
2216 		xs->error = XS_DRIVER_STUFFUP;
2217 	}
2218 complete:
2219 	sr_scsi_done(sd, xs);
2220 }
2221 
2222 int
2223 sr_scsi_probe(struct scsi_link *link)
2224 {
2225 	struct sr_softc		*sc = link->adapter_softc;
2226 	struct sr_discipline	*sd;
2227 
2228 	KASSERT(link->target < SR_MAX_LD && link->lun == 0);
2229 
2230 	sd = sc->sc_dis[link->target];
2231 	if (sd == NULL)
2232 		return (ENODEV);
2233 
2234 	link->pool = &sd->sd_iopool;
2235 	if (sd->sd_openings)
2236 		link->openings = sd->sd_openings(sd);
2237 	else
2238 		link->openings = sd->sd_max_wu;
2239 
2240 	return (0);
2241 }
2242 
2243 int
2244 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
2245 {
2246 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
2247 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
2248 
2249 	/* Pass bio ioctls through to bio handler. */
2250 	if (IOCGROUP(cmd) == 'B')
2251 		return (sr_bio_ioctl(link->adapter_softc, cmd, addr));
2252 
2253 	switch (cmd) {
2254 	case DIOCGCACHE:
2255 	case DIOCSCACHE:
2256 		return (EOPNOTSUPP);
2257 	default:
2258 		return (ENOTTY);
2259 	}
2260 }
2261 
2262 int
2263 sr_bio_ioctl(struct device *dev, u_long cmd, caddr_t addr)
2264 {
2265 	struct sr_softc		*sc = (struct sr_softc *)dev;
2266 	struct bio		*bio = (struct bio *)addr;
2267 	int			rv = 0;
2268 
2269 	DNPRINTF(SR_D_IOCTL, "%s: sr_bio_ioctl ", DEVNAME(sc));
2270 
2271 	rw_enter_write(&sc->sc_lock);
2272 
2273 	bio_status_init(&sc->sc_status, &sc->sc_dev);
2274 
2275 	switch (cmd) {
2276 	case BIOCINQ:
2277 		DNPRINTF(SR_D_IOCTL, "inq\n");
2278 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
2279 		break;
2280 
2281 	case BIOCVOL:
2282 		DNPRINTF(SR_D_IOCTL, "vol\n");
2283 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
2284 		break;
2285 
2286 	case BIOCDISK:
2287 		DNPRINTF(SR_D_IOCTL, "disk\n");
2288 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
2289 		break;
2290 
2291 	case BIOCALARM:
2292 		DNPRINTF(SR_D_IOCTL, "alarm\n");
2293 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
2294 		break;
2295 
2296 	case BIOCBLINK:
2297 		DNPRINTF(SR_D_IOCTL, "blink\n");
2298 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
2299 		break;
2300 
2301 	case BIOCSETSTATE:
2302 		DNPRINTF(SR_D_IOCTL, "setstate\n");
2303 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
2304 		break;
2305 
2306 	case BIOCCREATERAID:
2307 		DNPRINTF(SR_D_IOCTL, "createraid\n");
2308 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
2309 		break;
2310 
2311 	case BIOCDELETERAID:
2312 		DNPRINTF(SR_D_IOCTL, "deleteraid\n");
2313 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
2314 		break;
2315 
2316 	case BIOCDISCIPLINE:
2317 		DNPRINTF(SR_D_IOCTL, "discipline\n");
2318 		rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr);
2319 		break;
2320 
2321 	case BIOCINSTALLBOOT:
2322 		DNPRINTF(SR_D_IOCTL, "installboot\n");
2323 		rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr);
2324 		break;
2325 
2326 	default:
2327 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
2328 		rv = ENOTTY;
2329 	}
2330 
2331 	sc->sc_status.bs_status = (rv ? BIO_STATUS_ERROR : BIO_STATUS_SUCCESS);
2332 
2333 	bcopy(&sc->sc_status, &bio->bio_status, sizeof(struct bio_status));
2334 
2335 	rw_exit_write(&sc->sc_lock);
2336 
2337 	return (0);
2338 }
2339 
2340 int
2341 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
2342 {
2343 	int			i, vol, disk;
2344 
2345 	for (i = 0, vol = 0, disk = 0; i < SR_MAX_LD; i++)
2346 		/* XXX this will not work when we stagger disciplines */
2347 		if (sc->sc_dis[i]) {
2348 			vol++;
2349 			disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
2350 		}
2351 
2352 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
2353 	bi->bi_novol = vol + sc->sc_hotspare_no;
2354 	bi->bi_nodisk = disk + sc->sc_hotspare_no;
2355 
2356 	return (0);
2357 }
2358 
2359 int
2360 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
2361 {
2362 	int			i, vol, rv = EINVAL;
2363 	struct sr_discipline	*sd;
2364 	struct sr_chunk		*hotspare;
2365 	daddr64_t		rb, sz;
2366 
2367 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2368 		/* XXX this will not work when we stagger disciplines */
2369 		if (sc->sc_dis[i])
2370 			vol++;
2371 		if (vol != bv->bv_volid)
2372 			continue;
2373 
2374 		if (sc->sc_dis[i] == NULL)
2375 			goto done;
2376 
2377 		sd = sc->sc_dis[i];
2378 		bv->bv_status = sd->sd_vol_status;
2379 		bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
2380 		bv->bv_level = sd->sd_meta->ssdi.ssd_level;
2381 		bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
2382 
2383 #ifdef CRYPTO
2384 		if (sd->sd_meta->ssdi.ssd_level == 'C' &&
2385 		    sd->mds.mdd_crypto.key_disk != NULL)
2386 			bv->bv_nodisk++;
2387 #endif
2388 
2389 		if (bv->bv_status == BIOC_SVREBUILD) {
2390 			sz = sd->sd_meta->ssdi.ssd_size;
2391 			rb = sd->sd_meta->ssd_rebuild;
2392 			if (rb > 0)
2393 				bv->bv_percent = 100 -
2394 				    ((sz * 100 - rb * 100) / sz) - 1;
2395 			else
2396 				bv->bv_percent = 0;
2397 		}
2398 		strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
2399 		    sizeof(bv->bv_dev));
2400 		strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
2401 		    sizeof(bv->bv_vendor));
2402 		rv = 0;
2403 		goto done;
2404 	}
2405 
2406 	/* Check hotspares list. */
2407 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2408 		vol++;
2409 		if (vol != bv->bv_volid)
2410 			continue;
2411 
2412 		bv->bv_status = BIOC_SVONLINE;
2413 		bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2414 		bv->bv_level = -1;	/* Hotspare. */
2415 		bv->bv_nodisk = 1;
2416 		strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname,
2417 		    sizeof(bv->bv_dev));
2418 		strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname,
2419 		    sizeof(bv->bv_vendor));
2420 		rv = 0;
2421 		goto done;
2422 	}
2423 
2424 done:
2425 	return (rv);
2426 }
2427 
2428 int
2429 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
2430 {
2431 	int			i, vol, rv = EINVAL, id;
2432 	struct sr_chunk		*src, *hotspare;
2433 
2434 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2435 		/* XXX this will not work when we stagger disciplines */
2436 		if (sc->sc_dis[i])
2437 			vol++;
2438 		if (vol != bd->bd_volid)
2439 			continue;
2440 
2441 		if (sc->sc_dis[i] == NULL)
2442 			goto done;
2443 
2444 		id = bd->bd_diskid;
2445 
2446 		if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
2447 			src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
2448 #ifdef CRYPTO
2449 		else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no &&
2450 		    sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' &&
2451 		    sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL)
2452 			src = sc->sc_dis[i]->mds.mdd_crypto.key_disk;
2453 #endif
2454 		else
2455 			break;
2456 
2457 		bd->bd_status = src->src_meta.scm_status;
2458 		bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
2459 		bd->bd_channel = vol;
2460 		bd->bd_target = id;
2461 		strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
2462 		    sizeof(bd->bd_vendor));
2463 		rv = 0;
2464 		goto done;
2465 	}
2466 
2467 	/* Check hotspares list. */
2468 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2469 		vol++;
2470 		if (vol != bd->bd_volid)
2471 			continue;
2472 
2473 		if (bd->bd_diskid != 0)
2474 			break;
2475 
2476 		bd->bd_status = hotspare->src_meta.scm_status;
2477 		bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2478 		bd->bd_channel = vol;
2479 		bd->bd_target = bd->bd_diskid;
2480 		strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname,
2481 		    sizeof(bd->bd_vendor));
2482 		rv = 0;
2483 		goto done;
2484 	}
2485 
2486 done:
2487 	return (rv);
2488 }
2489 
2490 int
2491 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
2492 {
2493 	int			rv = EINVAL;
2494 	int			i, vol, found, c;
2495 	struct sr_discipline	*sd = NULL;
2496 	struct sr_chunk		*ch_entry;
2497 	struct sr_chunk_head	*cl;
2498 
2499 	if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED)
2500 		goto done;
2501 
2502 	if (bs->bs_status == BIOC_SSHOTSPARE) {
2503 		rv = sr_hotspare(sc, (dev_t)bs->bs_other_id);
2504 		goto done;
2505 	}
2506 
2507 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
2508 		/* XXX this will not work when we stagger disciplines */
2509 		if (sc->sc_dis[i])
2510 			vol++;
2511 		if (vol != bs->bs_volid)
2512 			continue;
2513 		sd = sc->sc_dis[i];
2514 		break;
2515 	}
2516 	if (sd == NULL)
2517 		goto done;
2518 
2519 	switch (bs->bs_status) {
2520 	case BIOC_SSOFFLINE:
2521 		/* Take chunk offline */
2522 		found = c = 0;
2523 		cl = &sd->sd_vol.sv_chunk_list;
2524 		SLIST_FOREACH(ch_entry, cl, src_link) {
2525 			if (ch_entry->src_dev_mm == bs->bs_other_id) {
2526 				found = 1;
2527 				break;
2528 			}
2529 			c++;
2530 		}
2531 		if (found == 0) {
2532 			sr_error(sc, "chunk not part of array");
2533 			goto done;
2534 		}
2535 
2536 		/* XXX: check current state first */
2537 		sd->sd_set_chunk_state(sd, c, BIOC_SDOFFLINE);
2538 
2539 		if (sr_meta_save(sd, SR_META_DIRTY)) {
2540 			sr_error(sc, "could not save metadata for %s",
2541 			    sd->sd_meta->ssd_devname);
2542 			goto done;
2543 		}
2544 		rv = 0;
2545 		break;
2546 
2547 	case BIOC_SDSCRUB:
2548 		break;
2549 
2550 	case BIOC_SSREBUILD:
2551 		rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0);
2552 		break;
2553 
2554 	default:
2555 		sr_error(sc, "unsupported state request %d", bs->bs_status);
2556 	}
2557 
2558 done:
2559 	return (rv);
2560 }
2561 
2562 int
2563 sr_chunk_in_use(struct sr_softc *sc, dev_t dev)
2564 {
2565 	struct sr_discipline	*sd;
2566 	struct sr_chunk		*chunk;
2567 	int			i, c;
2568 
2569 	DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev);
2570 
2571 	if (dev == NODEV)
2572 		return BIOC_SDINVALID;
2573 
2574 	/* See if chunk is already in use. */
2575 	for (i = 0; i < SR_MAX_LD; i++) {
2576 		if (sc->sc_dis[i] == NULL)
2577 			continue;
2578 		sd = sc->sc_dis[i];
2579 		for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) {
2580 			chunk = sd->sd_vol.sv_chunks[c];
2581 			if (chunk->src_dev_mm == dev)
2582 				return chunk->src_meta.scm_status;
2583 		}
2584 	}
2585 
2586 	/* Check hotspares list. */
2587 	SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link)
2588 		if (chunk->src_dev_mm == dev)
2589 			return chunk->src_meta.scm_status;
2590 
2591 	return BIOC_SDINVALID;
2592 }
2593 
2594 int
2595 sr_hotspare(struct sr_softc *sc, dev_t dev)
2596 {
2597 	struct sr_discipline	*sd = NULL;
2598 	struct sr_metadata	*sm = NULL;
2599 	struct sr_meta_chunk    *hm;
2600 	struct sr_chunk_head	*cl;
2601 	struct sr_chunk		*chunk, *last, *hotspare = NULL;
2602 	struct sr_uuid		uuid;
2603 	struct disklabel	label;
2604 	struct vnode		*vn;
2605 	daddr64_t		size;
2606 	char			devname[32];
2607 	int			rv = EINVAL;
2608 	int			c, part, open = 0;
2609 
2610 	/*
2611 	 * Add device to global hotspares list.
2612 	 */
2613 
2614 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2615 
2616 	/* Make sure chunk is not already in use. */
2617 	c = sr_chunk_in_use(sc, dev);
2618 	if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) {
2619 		if (c == BIOC_SDHOTSPARE)
2620 			sr_error(sc, "%s is already a hotspare", devname);
2621 		else
2622 			sr_error(sc, "%s is already in use", devname);
2623 		goto done;
2624 	}
2625 
2626 	/* XXX - See if there is an existing degraded volume... */
2627 
2628 	/* Open device. */
2629 	if (bdevvp(dev, &vn)) {
2630 		sr_error(sc, "sr_hotspare: cannot allocate vnode");
2631 		goto done;
2632 	}
2633 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2634 		DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
2635 		    DEVNAME(sc), devname);
2636 		vput(vn);
2637 		goto fail;
2638 	}
2639 	open = 1; /* close dev on error */
2640 
2641 	/* Get partition details. */
2642 	part = DISKPART(dev);
2643 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2644 	    NOCRED, curproc)) {
2645 		DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n",
2646 		    DEVNAME(sc));
2647 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2648 		vput(vn);
2649 		goto fail;
2650 	}
2651 	if (label.d_secsize != DEV_BSIZE) {
2652 		sr_error(sc, "%s has unsupported sector size (%d)",
2653 		    devname, label.d_secsize);
2654 		goto fail;
2655 	}
2656 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2657 		sr_error(sc, "%s partition not of type RAID (%d)",
2658 		    devname, label.d_partitions[part].p_fstype);
2659 		goto fail;
2660 	}
2661 
2662 	/* Calculate partition size. */
2663 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2664 
2665 	/*
2666 	 * Create and populate chunk metadata.
2667 	 */
2668 
2669 	sr_uuid_generate(&uuid);
2670 	hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO);
2671 
2672 	hotspare->src_dev_mm = dev;
2673 	hotspare->src_vn = vn;
2674 	strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname));
2675 	hotspare->src_size = size;
2676 
2677 	hm = &hotspare->src_meta;
2678 	hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
2679 	hm->scmi.scm_chunk_id = 0;
2680 	hm->scmi.scm_size = size;
2681 	hm->scmi.scm_coerced_size = size;
2682 	strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname));
2683 	bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid));
2684 
2685 	sr_checksum(sc, hm, &hm->scm_checksum,
2686 	    sizeof(struct sr_meta_chunk_invariant));
2687 
2688 	hm->scm_status = BIOC_SDHOTSPARE;
2689 
2690 	/*
2691 	 * Create and populate our own discipline and metadata.
2692 	 */
2693 
2694 	sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO);
2695 	sm->ssdi.ssd_magic = SR_MAGIC;
2696 	sm->ssdi.ssd_version = SR_META_VERSION;
2697 	sm->ssd_ondisk = 0;
2698 	sm->ssdi.ssd_vol_flags = 0;
2699 	bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid));
2700 	sm->ssdi.ssd_chunk_no = 1;
2701 	sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID;
2702 	sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL;
2703 	sm->ssdi.ssd_size = size;
2704 	strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
2705 	snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
2706 	    "SR %s", "HOTSPARE");
2707 	snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
2708 	    "%03d", SR_META_VERSION);
2709 
2710 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
2711 	sd->sd_sc = sc;
2712 	sd->sd_meta = sm;
2713 	sd->sd_meta_type = SR_META_F_NATIVE;
2714 	sd->sd_vol_status = BIOC_SVONLINE;
2715 	strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name));
2716 	SLIST_INIT(&sd->sd_meta_opt);
2717 
2718 	/* Add chunk to volume. */
2719 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF,
2720 	    M_WAITOK | M_ZERO);
2721 	sd->sd_vol.sv_chunks[0] = hotspare;
2722 	SLIST_INIT(&sd->sd_vol.sv_chunk_list);
2723 	SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link);
2724 
2725 	/* Save metadata. */
2726 	if (sr_meta_save(sd, SR_META_DIRTY)) {
2727 		sr_error(sc, "could not save metadata to %s", devname);
2728 		goto fail;
2729 	}
2730 
2731 	/*
2732 	 * Add chunk to hotspare list.
2733 	 */
2734 	rw_enter_write(&sc->sc_hs_lock);
2735 	cl = &sc->sc_hotspare_list;
2736 	if (SLIST_EMPTY(cl))
2737 		SLIST_INSERT_HEAD(cl, hotspare, src_link);
2738 	else {
2739 		SLIST_FOREACH(chunk, cl, src_link)
2740 			last = chunk;
2741 		SLIST_INSERT_AFTER(last, hotspare, src_link);
2742 	}
2743 	sc->sc_hotspare_no++;
2744 	rw_exit_write(&sc->sc_hs_lock);
2745 
2746 	rv = 0;
2747 	goto done;
2748 
2749 fail:
2750 	if (hotspare)
2751 		free(hotspare, M_DEVBUF);
2752 
2753 done:
2754 	if (sd && sd->sd_vol.sv_chunks)
2755 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
2756 	if (sd)
2757 		free(sd, M_DEVBUF);
2758 	if (sm)
2759 		free(sm, M_DEVBUF);
2760 	if (open) {
2761 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2762 		vput(vn);
2763 	}
2764 
2765 	return (rv);
2766 }
2767 
2768 void
2769 sr_hotspare_rebuild_callback(void *arg1, void *arg2)
2770 {
2771 	sr_hotspare_rebuild((struct sr_discipline *)arg1);
2772 }
2773 
2774 void
2775 sr_hotspare_rebuild(struct sr_discipline *sd)
2776 {
2777 	struct sr_softc		*sc = sd->sd_sc;
2778 	struct sr_chunk_head	*cl;
2779 	struct sr_chunk		*hotspare, *chunk = NULL;
2780 	struct sr_workunit	*wu;
2781 	struct sr_ccb		*ccb;
2782 	int			i, s, chunk_no, busy;
2783 
2784 	/*
2785 	 * Attempt to locate a hotspare and initiate rebuild.
2786 	 */
2787 
2788 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
2789 		if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
2790 		    BIOC_SDOFFLINE) {
2791 			chunk_no = i;
2792 			chunk = sd->sd_vol.sv_chunks[i];
2793 			break;
2794 		}
2795 	}
2796 
2797 	if (chunk == NULL) {
2798 		printf("%s: no offline chunk found on %s!\n",
2799 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
2800 		return;
2801 	}
2802 
2803 	/* See if we have a suitable hotspare... */
2804 	rw_enter_write(&sc->sc_hs_lock);
2805 	cl = &sc->sc_hotspare_list;
2806 	SLIST_FOREACH(hotspare, cl, src_link)
2807 		if (hotspare->src_size >= chunk->src_size)
2808 			break;
2809 
2810 	if (hotspare != NULL) {
2811 
2812 		printf("%s: %s volume degraded, will attempt to "
2813 		    "rebuild on hotspare %s\n", DEVNAME(sc),
2814 		    sd->sd_meta->ssd_devname, hotspare->src_devname);
2815 
2816 		/*
2817 		 * Ensure that all pending I/O completes on the failed chunk
2818 		 * before trying to initiate a rebuild.
2819 		 */
2820 		i = 0;
2821 		do {
2822 			busy = 0;
2823 
2824 			s = splbio();
2825 			TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
2826 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2827 					if (ccb->ccb_target == chunk_no)
2828 						busy = 1;
2829 				}
2830 			}
2831 			TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) {
2832 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2833 					if (ccb->ccb_target == chunk_no)
2834 						busy = 1;
2835 				}
2836 			}
2837 			splx(s);
2838 
2839 			if (busy) {
2840 				tsleep(sd, PRIBIO, "sr_hotspare", hz);
2841 				i++;
2842 			}
2843 
2844 		} while (busy && i < 120);
2845 
2846 		DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to "
2847 		    "complete on failed chunk %s\n", DEVNAME(sc),
2848 		    i, chunk->src_devname);
2849 
2850 		if (busy) {
2851 			printf("%s: pending I/O failed to complete on "
2852 			    "failed chunk %s, hotspare rebuild aborted...\n",
2853 			    DEVNAME(sc), chunk->src_devname);
2854 			goto done;
2855 		}
2856 
2857 		s = splbio();
2858 		rw_enter_write(&sc->sc_lock);
2859 		bio_status_init(&sc->sc_status, &sc->sc_dev);
2860 		if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) {
2861 
2862 			/* Remove hotspare from available list. */
2863 			sc->sc_hotspare_no--;
2864 			SLIST_REMOVE(cl, hotspare, sr_chunk, src_link);
2865 			free(hotspare, M_DEVBUF);
2866 
2867 		}
2868 		rw_exit_write(&sc->sc_lock);
2869 		splx(s);
2870 	}
2871 done:
2872 	rw_exit_write(&sc->sc_hs_lock);
2873 }
2874 
2875 int
2876 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare)
2877 {
2878 	struct sr_softc		*sc = sd->sd_sc;
2879 	struct sr_chunk		*chunk = NULL;
2880 	struct sr_meta_chunk	*meta;
2881 	struct disklabel	label;
2882 	struct vnode		*vn;
2883 	daddr64_t		size, csize;
2884 	char			devname[32];
2885 	int			rv = EINVAL, open = 0;
2886 	int			cid, i, part, status;
2887 
2888 	/*
2889 	 * Attempt to initiate a rebuild onto the specified device.
2890 	 */
2891 
2892 	if (!(sd->sd_capabilities & SR_CAP_REBUILD)) {
2893 		sr_error(sc, "discipline does not support rebuild");
2894 		goto done;
2895 	}
2896 
2897 	/* make sure volume is in the right state */
2898 	if (sd->sd_vol_status == BIOC_SVREBUILD) {
2899 		sr_error(sc, "rebuild already in progress");
2900 		goto done;
2901 	}
2902 	if (sd->sd_vol_status != BIOC_SVDEGRADED) {
2903 		sr_error(sc, "volume not degraded");
2904 		goto done;
2905 	}
2906 
2907 	/* Find first offline chunk. */
2908 	for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) {
2909 		if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status ==
2910 		    BIOC_SDOFFLINE) {
2911 			chunk = sd->sd_vol.sv_chunks[cid];
2912 			break;
2913 		}
2914 	}
2915 	if (chunk == NULL) {
2916 		sr_error(sc, "no offline chunks available to rebuild");
2917 		goto done;
2918 	}
2919 
2920 	/* Get coerced size from another online chunk. */
2921 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
2922 		if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
2923 		    BIOC_SDONLINE) {
2924 			meta = &sd->sd_vol.sv_chunks[i]->src_meta;
2925 			csize = meta->scmi.scm_coerced_size;
2926 			break;
2927 		}
2928 	}
2929 
2930 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2931 	if (bdevvp(dev, &vn)) {
2932 		printf("%s: sr_rebuild_init: can't allocate vnode\n",
2933 		    DEVNAME(sc));
2934 		goto done;
2935 	}
2936 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2937 		DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
2938 		    "open %s\n", DEVNAME(sc), devname);
2939 		vput(vn);
2940 		goto done;
2941 	}
2942 	open = 1; /* close dev on error */
2943 
2944 	/* Get disklabel and check partition. */
2945 	part = DISKPART(dev);
2946 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2947 	    NOCRED, curproc)) {
2948 		DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n",
2949 		    DEVNAME(sc));
2950 		goto done;
2951 	}
2952 	if (label.d_secsize != DEV_BSIZE) {
2953 		sr_error(sc, "%s has unsupported sector size (%d)",
2954 		    devname, label.d_secsize);
2955 		goto done;
2956 	}
2957 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2958 		sr_error(sc, "%s partition not of type RAID (%d)",
2959 		    devname, label.d_partitions[part].p_fstype);
2960 		goto done;
2961 	}
2962 
2963 	/* Is the partition large enough? */
2964 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2965 	if (size < csize) {
2966 		sr_error(sc, "%s partition too small, at least %llu bytes "
2967 		    "required", devname, csize << DEV_BSHIFT);
2968 		goto done;
2969 	} else if (size > csize)
2970 		sr_warn(sc, "%s partition too large, wasting %llu bytes",
2971 		    devname, (size - csize) << DEV_BSHIFT);
2972 
2973 	/* Ensure that this chunk is not already in use. */
2974 	status = sr_chunk_in_use(sc, dev);
2975 	if (status != BIOC_SDINVALID && status != BIOC_SDOFFLINE &&
2976 	    !(hotspare && status == BIOC_SDHOTSPARE)) {
2977 		sr_error(sc, "%s is already in use", devname);
2978 		goto done;
2979 	}
2980 
2981 	/* Reset rebuild counter since we rebuilding onto a new chunk. */
2982 	sd->sd_meta->ssd_rebuild = 0;
2983 
2984 	open = 0; /* leave dev open from here on out */
2985 
2986 	/* Fix up chunk. */
2987 	bcopy(label.d_uid, chunk->src_duid, sizeof(chunk->src_duid));
2988 	chunk->src_dev_mm = dev;
2989 	chunk->src_vn = vn;
2990 
2991 	/* Reconstruct metadata. */
2992 	meta = &chunk->src_meta;
2993 	meta->scmi.scm_volid = sd->sd_meta->ssdi.ssd_volid;
2994 	meta->scmi.scm_chunk_id = cid;
2995 	strlcpy(meta->scmi.scm_devname, devname,
2996 	    sizeof(meta->scmi.scm_devname));
2997 	meta->scmi.scm_size = size;
2998 	meta->scmi.scm_coerced_size = csize;
2999 	bcopy(&sd->sd_meta->ssdi.ssd_uuid, &meta->scmi.scm_uuid,
3000 	    sizeof(meta->scmi.scm_uuid));
3001 	sr_checksum(sc, meta, &meta->scm_checksum,
3002 	    sizeof(struct sr_meta_chunk_invariant));
3003 
3004 	sd->sd_set_chunk_state(sd, cid, BIOC_SDREBUILD);
3005 
3006 	if (sr_meta_save(sd, SR_META_DIRTY)) {
3007 		sr_error(sc, "could not save metadata to %s", devname);
3008 		open = 1;
3009 		goto done;
3010 	}
3011 
3012 	sr_warn(sc, "rebuild of %s started on %s",
3013 	    sd->sd_meta->ssd_devname, devname);
3014 
3015 	sd->sd_reb_abort = 0;
3016 	kthread_create_deferred(sr_rebuild, sd);
3017 
3018 	rv = 0;
3019 done:
3020 	if (open) {
3021 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
3022 		vput(vn);
3023 	}
3024 
3025 	return (rv);
3026 }
3027 
3028 void
3029 sr_roam_chunks(struct sr_discipline *sd)
3030 {
3031 	struct sr_softc		*sc = sd->sd_sc;
3032 	struct sr_chunk		*chunk;
3033 	struct sr_meta_chunk	*meta;
3034 	int			roamed = 0;
3035 
3036 	/* Have any chunks roamed? */
3037 	SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) {
3038 		meta = &chunk->src_meta;
3039 		if (strncmp(meta->scmi.scm_devname, chunk->src_devname,
3040 		    sizeof(meta->scmi.scm_devname))) {
3041 
3042 			printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
3043 			    meta->scmi.scm_devname, chunk->src_devname);
3044 
3045 			strlcpy(meta->scmi.scm_devname, chunk->src_devname,
3046 			    sizeof(meta->scmi.scm_devname));
3047 
3048 			roamed++;
3049 		}
3050 	}
3051 
3052 	if (roamed)
3053 		sr_meta_save(sd, SR_META_DIRTY);
3054 }
3055 
3056 int
3057 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
3058 {
3059 	struct sr_meta_opt_item *omi;
3060 	struct sr_chunk_head	*cl;
3061 	struct sr_discipline	*sd = NULL;
3062 	struct sr_chunk		*ch_entry;
3063 	struct scsi_link	*link;
3064 	struct device		*dev;
3065 	char			*uuid, devname[32];
3066 	dev_t			*dt;
3067 	int			i, no_chunk, rv = EINVAL, target, vol;
3068 	int			no_meta;
3069 
3070 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
3071 	    DEVNAME(sc), user);
3072 
3073 	/* user input */
3074 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
3075 		goto unwind;
3076 
3077 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
3078 	if (user) {
3079 		if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0)
3080 			goto unwind;
3081 	} else
3082 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
3083 
3084 	/* Initialise discipline. */
3085 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
3086 	sd->sd_sc = sc;
3087 	SLIST_INIT(&sd->sd_meta_opt);
3088 	sd->sd_workq = workq_create("srdis", 1, IPL_BIO);
3089 	if (sd->sd_workq == NULL) {
3090 		sr_error(sc, "could not create discipline workq");
3091 		goto unwind;
3092 	}
3093 	if (sr_discipline_init(sd, bc->bc_level)) {
3094 		sr_error(sc, "could not initialize discipline");
3095 		goto unwind;
3096 	}
3097 
3098 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
3099 	cl = &sd->sd_vol.sv_chunk_list;
3100 	SLIST_INIT(cl);
3101 
3102 	/* Ensure that chunks are not already in use. */
3103 	for (i = 0; i < no_chunk; i++) {
3104 		if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) {
3105 			sr_meta_getdevname(sc, dt[i], devname, sizeof(devname));
3106 			sr_error(sc, "chunk %s already in use", devname);
3107 			goto unwind;
3108 		}
3109 	}
3110 
3111 	sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
3112 	if (sd->sd_meta_type == SR_META_F_INVALID) {
3113 		sr_error(sc, "invalid metadata format");
3114 		goto unwind;
3115 	}
3116 
3117 	if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE))
3118 		goto unwind;
3119 
3120 	/* force the raid volume by clearing metadata region */
3121 	if (bc->bc_flags & BIOC_SCFORCE) {
3122 		/* make sure disk isn't up and running */
3123 		if (sr_meta_read(sd))
3124 			if (sr_already_assembled(sd)) {
3125 				uuid = sr_uuid_format(
3126 				    &sd->sd_meta->ssdi.ssd_uuid);
3127 				sr_error(sc, "disk %s is currently in use; "
3128 				    "cannot force create", uuid);
3129 				free(uuid, M_DEVBUF);
3130 				goto unwind;
3131 			}
3132 
3133 		if (sr_meta_clear(sd)) {
3134 			sr_error(sc, "failed to clear metadata");
3135 			goto unwind;
3136 		}
3137 	}
3138 
3139 	no_meta = sr_meta_read(sd);
3140 	if (no_meta == -1) {
3141 
3142 		/* Corrupt metadata on one or more chunks. */
3143 		sr_error(sc, "one of the chunks has corrupt metadata; "
3144 		    "aborting assembly");
3145 		goto unwind;
3146 
3147 	} else if (no_meta == 0) {
3148 
3149 		/* Initialise volume and chunk metadata. */
3150 		sr_meta_init(sd, bc->bc_level, no_chunk);
3151 		sd->sd_vol_status = BIOC_SVONLINE;
3152 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
3153 		if (sd->sd_create) {
3154 			if ((i = sd->sd_create(sd, bc, no_chunk,
3155 			    sd->sd_vol.sv_chunk_minsz))) {
3156 				rv = i;
3157 				goto unwind;
3158 			}
3159 		}
3160 		sr_meta_init_complete(sd);
3161 
3162 		DNPRINTF(SR_D_IOCTL,
3163 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
3164 		    DEVNAME(sc), sd->sd_meta->ssdi.ssd_size);
3165 
3166 		/* Warn if we've wasted chunk space due to coercing. */
3167 		if ((sd->sd_capabilities & SR_CAP_NON_COERCED) == 0 &&
3168 		    sd->sd_vol.sv_chunk_minsz != sd->sd_vol.sv_chunk_maxsz)
3169 			sr_warn(sc, "chunk sizes are not equal; up to %llu "
3170 			    "blocks wasted per chunk",
3171 			    sd->sd_vol.sv_chunk_maxsz -
3172 			    sd->sd_vol.sv_chunk_minsz);
3173 
3174 	} else {
3175 
3176 		/* Ensure metadata level matches requested assembly level. */
3177 		if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) {
3178 			sr_error(sc, "volume level does not match metadata "
3179 			    "level");
3180 			goto unwind;
3181 		}
3182 
3183 		if (sr_already_assembled(sd)) {
3184 			uuid = sr_uuid_format(&sd->sd_meta->ssdi.ssd_uuid);
3185 			sr_error(sc, "disk %s already assembled", uuid);
3186 			free(uuid, M_DEVBUF);
3187 			goto unwind;
3188 		}
3189 
3190 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
3191 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
3192 			    "metadata\n", DEVNAME(sc));
3193 			goto unwind;
3194 		}
3195 
3196 		if (no_meta != no_chunk)
3197 			sr_warn(sc, "trying to bring up %s degraded",
3198 			    sd->sd_meta->ssd_devname);
3199 
3200 		if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
3201 			sr_warn(sc, "%s was not shutdown properly",
3202 			    sd->sd_meta->ssd_devname);
3203 
3204 		SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link)
3205 			if (sd->sd_meta_opt_handler == NULL ||
3206 			    sd->sd_meta_opt_handler(sd, omi->omi_som) != 0)
3207 				sr_meta_opt_handler(sd, omi->omi_som);
3208 
3209 		if (sd->sd_assemble) {
3210 			if ((i = sd->sd_assemble(sd, bc, no_chunk))) {
3211 				rv = i;
3212 				goto unwind;
3213 			}
3214 		}
3215 
3216 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
3217 		    DEVNAME(sc));
3218 
3219 	}
3220 
3221 	/* Metadata MUST be fully populated by this point. */
3222 
3223 	/* Allocate all resources. */
3224 	if ((rv = sd->sd_alloc_resources(sd)))
3225 		goto unwind;
3226 
3227 	/* Adjust flags if necessary. */
3228 	if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) &&
3229 	    (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) !=
3230 	    (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) {
3231 		sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE;
3232 		sd->sd_meta->ssdi.ssd_vol_flags |=
3233 		    bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
3234 	}
3235 
3236 	if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) {
3237 
3238 		/* Initialise volume state. */
3239 		sd->sd_set_vol_state(sd);
3240 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3241 			sr_error(sc, "%s is offline, will not be brought "
3242 			    "online", sd->sd_meta->ssd_devname);
3243 			goto unwind;
3244 		}
3245 
3246 		/* Setup SCSI iopool. */
3247 		mtx_init(&sd->sd_wu_mtx, IPL_BIO);
3248 		scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put);
3249 
3250 		/*
3251 		 * All checks passed - return ENXIO if volume cannot be created.
3252 		 */
3253 		rv = ENXIO;
3254 
3255 		/*
3256 		 * Find a free target.
3257 		 *
3258 		 * XXX: We reserve sd_target == 0 to indicate the
3259 		 * discipline is not linked into sc->sc_dis, so begin
3260 		 * the search with target = 1.
3261 		 */
3262 		for (target = 1; target < SR_MAX_LD; target++)
3263 			if (sc->sc_dis[target] == NULL)
3264 				break;
3265 		if (target == SR_MAX_LD) {
3266 			sr_error(sc, "no free target for %s",
3267 			    sd->sd_meta->ssd_devname);
3268 			goto unwind;
3269 		}
3270 
3271 		/* Clear sense data. */
3272 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3273 
3274 		/* Attach discipline and get midlayer to probe it. */
3275 		sd->sd_target = target;
3276 		sc->sc_dis[target] = sd;
3277 		if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) {
3278 			sr_error(sc, "scsi_probe_lun failed");
3279 			sc->sc_dis[target] = NULL;
3280 			sd->sd_target = 0;
3281 			goto unwind;
3282 		}
3283 
3284 		link = scsi_get_link(sc->sc_scsibus, target, 0);
3285 		dev = link->device_softc;
3286 		DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n",
3287 		    DEVNAME(sc), dev->dv_xname, sd->sd_target);
3288 
3289 		for (i = 0, vol = -1; i <= sd->sd_target; i++)
3290 			if (sc->sc_dis[i])
3291 				vol++;
3292 
3293 		rv = 0;
3294 
3295 		if (sd->sd_meta->ssd_devname[0] != '\0' &&
3296 		    strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
3297 		    sizeof(dev->dv_xname)))
3298 			sr_warn(sc, "volume %s is roaming, it used to be %s, "
3299 			    "updating metadata", dev->dv_xname,
3300 			    sd->sd_meta->ssd_devname);
3301 
3302 		/* Populate remaining volume metadata. */
3303 		sd->sd_meta->ssdi.ssd_volid = vol;
3304 		strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
3305 		    sizeof(sd->sd_meta->ssd_devname));
3306 
3307 		sr_info(sc, "%s volume attached as %s",
3308 		    sd->sd_meta->ssdi.ssd_product, sd->sd_meta->ssd_devname);
3309 
3310 		/* Update device name on any roaming chunks. */
3311 		sr_roam_chunks(sd);
3312 
3313 #ifndef SMALL_KERNEL
3314 		if (sr_sensors_create(sd))
3315 			sr_warn(sc, "unable to create sensor for %s",
3316 			    dev->dv_xname);
3317 #endif /* SMALL_KERNEL */
3318 	} else {
3319 		/* This volume does not attach as a system disk. */
3320 		ch_entry = SLIST_FIRST(cl); /* XXX */
3321 		strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname,
3322 		    sizeof(sd->sd_meta->ssd_devname));
3323 
3324 		if (sd->sd_start_discipline(sd))
3325 			goto unwind;
3326 	}
3327 
3328 	/* Save current metadata to disk. */
3329 	rv = sr_meta_save(sd, SR_META_DIRTY);
3330 
3331 	if (sd->sd_vol_status == BIOC_SVREBUILD)
3332 		kthread_create_deferred(sr_rebuild, sd);
3333 
3334 	sd->sd_ready = 1;
3335 
3336 	return (rv);
3337 
3338 unwind:
3339 	sr_discipline_shutdown(sd, 0);
3340 
3341 	if (rv == EAGAIN)
3342 		rv = 0;
3343 
3344 	return (rv);
3345 }
3346 
3347 int
3348 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
3349 {
3350 	struct sr_discipline	*sd = NULL;
3351 	int			rv = 1;
3352 	int			i;
3353 
3354 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
3355 	    dr->bd_dev);
3356 
3357 	for (i = 0; i < SR_MAX_LD; i++)
3358 		if (sc->sc_dis[i]) {
3359 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3360 			    dr->bd_dev,
3361 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3362 				sd = sc->sc_dis[i];
3363 				break;
3364 			}
3365 		}
3366 
3367 	if (sd == NULL) {
3368 		sr_error(sc, "volume %s not found", dr->bd_dev);
3369 		goto bad;
3370 	}
3371 
3372 	sd->sd_deleted = 1;
3373 	sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE;
3374 	sr_discipline_shutdown(sd, 1);
3375 
3376 	rv = 0;
3377 bad:
3378 	return (rv);
3379 }
3380 
3381 int
3382 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd)
3383 {
3384 	struct sr_discipline	*sd = NULL;
3385 	int			i, rv = 1;
3386 
3387 	/* Dispatch a discipline specific ioctl. */
3388 
3389 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc),
3390 	    bd->bd_dev);
3391 
3392 	for (i = 0; i < SR_MAX_LD; i++)
3393 		if (sc->sc_dis[i]) {
3394 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3395 			    bd->bd_dev,
3396 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3397 				sd = sc->sc_dis[i];
3398 				break;
3399 			}
3400 		}
3401 
3402 	if (sd && sd->sd_ioctl_handler)
3403 		rv = sd->sd_ioctl_handler(sd, bd);
3404 
3405 	return (rv);
3406 }
3407 
3408 int
3409 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb)
3410 {
3411 	void			*bootblk = NULL, *bootldr = NULL;
3412 	struct sr_discipline	*sd = NULL;
3413 	struct sr_chunk		*chunk;
3414 	struct sr_meta_opt_item *omi;
3415 	struct sr_meta_boot	*sbm;
3416 	struct disk		*dk;
3417 	u_int32_t		bbs, bls;
3418 	u_char			duid[8];
3419 	int			rv = EINVAL;
3420 	int			i;
3421 
3422 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc),
3423 	    bb->bb_dev);
3424 
3425 	for (i = 0; i < SR_MAX_LD; i++)
3426 		if (sc->sc_dis[i]) {
3427 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3428 			    bb->bb_dev,
3429 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3430 				sd = sc->sc_dis[i];
3431 				break;
3432 			}
3433 		}
3434 
3435 	if (sd == NULL)
3436 		goto done;
3437 
3438 	bzero(duid, sizeof(duid));
3439 	TAILQ_FOREACH(dk, &disklist,  dk_link)
3440 		if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev)))
3441 			break;
3442 	if (dk == NULL || dk->dk_label == NULL ||
3443 	    (dk->dk_flags & DKF_LABELVALID) == 0 ||
3444 	    bcmp(dk->dk_label->d_uid, &duid, sizeof(duid)) == 0) {
3445 		sr_error(sc, "failed to get DUID for softraid volume");
3446 		goto done;
3447 	}
3448 	bcopy(dk->dk_label->d_uid, duid, sizeof(duid));
3449 
3450 	/* Ensure that boot storage area is large enough. */
3451 	if (sd->sd_meta->ssd_data_offset < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) {
3452 		sr_error(sc, "insufficient boot storage");
3453 		goto done;
3454 	}
3455 
3456 	if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512)
3457 		goto done;
3458 
3459 	if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512)
3460 		goto done;
3461 
3462 	/* Copy in boot block. */
3463 	bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE;
3464 	bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO);
3465 	if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0)
3466 		goto done;
3467 
3468 	/* Copy in boot loader. */
3469 	bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE;
3470 	bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO);
3471 	if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0)
3472 		goto done;
3473 
3474 	/* Create or update optional meta for bootable volumes. */
3475 	SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link)
3476 		if (omi->omi_som->som_type == SR_OPT_BOOT)
3477 			break;
3478 	if (omi == NULL) {
3479 		omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
3480 		    M_WAITOK | M_ZERO);
3481 		omi->omi_som = malloc(sizeof(struct sr_meta_crypto), M_DEVBUF,
3482 		    M_WAITOK | M_ZERO);
3483 		omi->omi_som->som_type = SR_OPT_BOOT;
3484 		omi->omi_som->som_length = sizeof(struct sr_meta_boot);
3485 		SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link);
3486 		sd->sd_meta->ssdi.ssd_opt_no++;
3487 	}
3488 	sbm = (struct sr_meta_boot *)omi->omi_som;
3489 
3490 	bcopy(duid, sbm->sbm_root_duid, sizeof(sbm->sbm_root_duid));
3491 	bzero(&sbm->sbm_boot_duid, sizeof(sbm->sbm_boot_duid));
3492 	sbm->sbm_bootblk_size = bbs;
3493 	sbm->sbm_bootldr_size = bls;
3494 
3495 	DNPRINTF(SR_D_IOCTL, "sr_ioctl_installboot: root duid is "
3496 	    "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx\n",
3497 	    sbm->sbm_root_duid[0], sbm->sbm_root_duid[1],
3498 	    sbm->sbm_root_duid[2], sbm->sbm_root_duid[3],
3499 	    sbm->sbm_root_duid[4], sbm->sbm_root_duid[5],
3500 	    sbm->sbm_root_duid[6], sbm->sbm_root_duid[7]);
3501 
3502 	/* Save boot block and boot loader to each chunk. */
3503 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
3504 
3505 		chunk = sd->sd_vol.sv_chunks[i];
3506 		if (chunk->src_meta.scm_status != BIOC_SDONLINE &&
3507 		    chunk->src_meta.scm_status != BIOC_SDREBUILD)
3508 			continue;
3509 
3510 		if (i < SR_MAX_BOOT_DISKS)
3511 			bcopy(chunk->src_duid, &sbm->sbm_boot_duid[i],
3512 			    sizeof(sbm->sbm_boot_duid[i]));
3513 
3514 		/* Save boot blocks. */
3515 		DNPRINTF(SR_D_IOCTL,
3516 		    "sr_ioctl_installboot: saving boot block to %s "
3517 		    "(%u bytes)\n", chunk->src_devname, bbs);
3518 
3519 		if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs,
3520 		    SR_BOOT_BLOCKS_OFFSET, B_WRITE)) {
3521 			sr_error(sc, "failed to write boot block", DEVNAME(sc));
3522 			goto done;
3523 		}
3524 
3525 		/* Save boot loader.*/
3526 		DNPRINTF(SR_D_IOCTL,
3527 		    "sr_ioctl_installboot: saving boot loader to %s "
3528 		    "(%u bytes)\n", chunk->src_devname, bls);
3529 
3530 		if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls,
3531 		    SR_BOOT_LOADER_OFFSET, B_WRITE)) {
3532 			sr_error(sc, "failed to write boot loader");
3533 			goto done;
3534 		}
3535 
3536 	}
3537 
3538 	/* XXX - Install boot block on disk - MD code. */
3539 
3540 	/* Mark volume as bootable and save metadata. */
3541 	sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE;
3542 	if (sr_meta_save(sd, SR_META_DIRTY)) {
3543 		sr_error(sc, "could not save metadata to %s",
3544 		    chunk->src_devname);
3545 		goto done;
3546 	}
3547 
3548 	rv = 0;
3549 
3550 done:
3551 	if (bootblk)
3552 		free(bootblk, M_DEVBUF);
3553 	if (bootldr)
3554 		free(bootldr, M_DEVBUF);
3555 
3556 	return (rv);
3557 }
3558 
3559 void
3560 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
3561 {
3562 	struct sr_chunk		*ch_entry, *ch_next;
3563 
3564 	DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
3565 
3566 	if (!cl)
3567 		return;
3568 
3569 	for (ch_entry = SLIST_FIRST(cl);
3570 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
3571 		ch_next = SLIST_NEXT(ch_entry, src_link);
3572 
3573 		DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
3574 		    DEVNAME(sc), ch_entry->src_devname);
3575 		if (ch_entry->src_vn) {
3576 			/*
3577 			 * XXX - explicitly lock the vnode until we can resolve
3578 			 * the problem introduced by vnode aliasing... specfs
3579 			 * has no locking, whereas ufs/ffs does!
3580 			 */
3581 			vn_lock(ch_entry->src_vn, LK_EXCLUSIVE |
3582 			    LK_RETRY, curproc);
3583 			VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED,
3584 			    curproc);
3585 			vput(ch_entry->src_vn);
3586 		}
3587 		free(ch_entry, M_DEVBUF);
3588 	}
3589 	SLIST_INIT(cl);
3590 }
3591 
3592 void
3593 sr_discipline_free(struct sr_discipline *sd)
3594 {
3595 	struct sr_softc		*sc;
3596 	struct sr_meta_opt_head *som;
3597 	struct sr_meta_opt_item	*omi, *omi_next;
3598 
3599 	if (!sd)
3600 		return;
3601 
3602 	sc = sd->sd_sc;
3603 
3604 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
3605 	    DEVNAME(sc),
3606 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3607 	if (sd->sd_free_resources)
3608 		sd->sd_free_resources(sd);
3609 	if (sd->sd_vol.sv_chunks)
3610 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
3611 	if (sd->sd_meta)
3612 		free(sd->sd_meta, M_DEVBUF);
3613 	if (sd->sd_meta_foreign)
3614 		free(sd->sd_meta_foreign, M_DEVBUF);
3615 
3616 	som = &sd->sd_meta_opt;
3617 	for (omi = SLIST_FIRST(som); omi != SLIST_END(som); omi = omi_next) {
3618 		omi_next = SLIST_NEXT(omi, omi_link);
3619 		if (omi->omi_som)
3620 			free(omi->omi_som, M_DEVBUF);
3621 		free(omi, M_DEVBUF);
3622 	}
3623 
3624 	if (sd->sd_target != 0) {
3625 		KASSERT(sc->sc_dis[sd->sd_target] == sd);
3626 		sc->sc_dis[sd->sd_target] = NULL;
3627 	}
3628 
3629 	explicit_bzero(sd, sizeof *sd);
3630 	free(sd, M_DEVBUF);
3631 }
3632 
3633 void
3634 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save)
3635 {
3636 	struct sr_softc		*sc;
3637 	int			s;
3638 
3639 	if (!sd)
3640 		return;
3641 	sc = sd->sd_sc;
3642 
3643 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
3644 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3645 
3646 	/* If rebuilding, abort rebuild and drain I/O. */
3647 	if (sd->sd_reb_active) {
3648 		sd->sd_reb_abort = 1;
3649 		while (sd->sd_reb_active)
3650 			tsleep(sd, PWAIT, "sr_shutdown", 1);
3651 	}
3652 
3653 	if (meta_save)
3654 		sr_meta_save(sd, 0);
3655 
3656 	s = splbio();
3657 
3658 	sd->sd_ready = 0;
3659 
3660 	/* make sure there isn't a sync pending and yield */
3661 	wakeup(sd);
3662 	while (sd->sd_sync || sd->sd_must_flush)
3663 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
3664 		    EWOULDBLOCK)
3665 			break;
3666 
3667 #ifndef SMALL_KERNEL
3668 	sr_sensors_delete(sd);
3669 #endif /* SMALL_KERNEL */
3670 
3671 	if (sd->sd_target != 0)
3672 		scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0, DETACH_FORCE);
3673 
3674 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
3675 
3676 	if (sd->sd_workq)
3677 		workq_destroy(sd->sd_workq);
3678 
3679 	if (sd)
3680 		sr_discipline_free(sd);
3681 
3682 	splx(s);
3683 }
3684 
3685 int
3686 sr_discipline_init(struct sr_discipline *sd, int level)
3687 {
3688 	int			rv = 1;
3689 
3690 	/* Initialise discipline function pointers with defaults. */
3691 	sd->sd_alloc_resources = NULL;
3692 	sd->sd_assemble = NULL;
3693 	sd->sd_create = NULL;
3694 	sd->sd_free_resources = NULL;
3695 	sd->sd_ioctl_handler = NULL;
3696 	sd->sd_openings = NULL;
3697 	sd->sd_meta_opt_handler = NULL;
3698 	sd->sd_scsi_inquiry = sr_raid_inquiry;
3699 	sd->sd_scsi_read_cap = sr_raid_read_cap;
3700 	sd->sd_scsi_tur = sr_raid_tur;
3701 	sd->sd_scsi_req_sense = sr_raid_request_sense;
3702 	sd->sd_scsi_start_stop = sr_raid_start_stop;
3703 	sd->sd_scsi_sync = sr_raid_sync;
3704 	sd->sd_scsi_rw = NULL;
3705 	sd->sd_set_chunk_state = sr_set_chunk_state;
3706 	sd->sd_set_vol_state = sr_set_vol_state;
3707 	sd->sd_start_discipline = NULL;
3708 
3709 	switch (level) {
3710 	case 0:
3711 		sr_raid0_discipline_init(sd);
3712 		break;
3713 	case 1:
3714 		sr_raid1_discipline_init(sd);
3715 		break;
3716 	case 4:
3717 		sr_raidp_discipline_init(sd, SR_MD_RAID4);
3718 		break;
3719 	case 5:
3720 		sr_raidp_discipline_init(sd, SR_MD_RAID5);
3721 		break;
3722 	case 6:
3723 		sr_raid6_discipline_init(sd);
3724 		break;
3725 #ifdef AOE
3726 	/* AOE target. */
3727 	case 'A':
3728 		sr_aoe_server_discipline_init(sd);
3729 		break;
3730 	/* AOE initiator. */
3731 	case 'a':
3732 		sr_aoe_discipline_init(sd);
3733 		break;
3734 #endif
3735 #ifdef CRYPTO
3736 	case 'C':
3737 		sr_crypto_discipline_init(sd);
3738 		break;
3739 #endif
3740 	case 'c':
3741 		sr_concat_discipline_init(sd);
3742 		break;
3743 	default:
3744 		goto bad;
3745 	}
3746 
3747 	rv = 0;
3748 bad:
3749 	return (rv);
3750 }
3751 
3752 int
3753 sr_raid_inquiry(struct sr_workunit *wu)
3754 {
3755 	struct sr_discipline	*sd = wu->swu_dis;
3756 	struct scsi_xfer	*xs = wu->swu_xs;
3757 	struct scsi_inquiry	*cdb = (struct scsi_inquiry *)xs->cmd;
3758 	struct scsi_inquiry_data inq;
3759 
3760 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
3761 
3762 	if (xs->cmdlen != sizeof(*cdb))
3763 		return (EINVAL);
3764 
3765 	if (ISSET(cdb->flags, SI_EVPD))
3766 		return (EOPNOTSUPP);
3767 
3768 	bzero(&inq, sizeof(inq));
3769 	inq.device = T_DIRECT;
3770 	inq.dev_qual2 = 0;
3771 	inq.version = 2;
3772 	inq.response_format = 2;
3773 	inq.additional_length = 32;
3774 	inq.flags |= SID_CmdQue;
3775 	strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
3776 	    sizeof(inq.vendor));
3777 	strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
3778 	    sizeof(inq.product));
3779 	strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
3780 	    sizeof(inq.revision));
3781 	sr_copy_internal_data(xs, &inq, sizeof(inq));
3782 
3783 	return (0);
3784 }
3785 
3786 int
3787 sr_raid_read_cap(struct sr_workunit *wu)
3788 {
3789 	struct sr_discipline	*sd = wu->swu_dis;
3790 	struct scsi_xfer	*xs = wu->swu_xs;
3791 	struct scsi_read_cap_data rcd;
3792 	struct scsi_read_cap_data_16 rcd16;
3793 	daddr64_t		addr;
3794 	int			rv = 1;
3795 
3796 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
3797 
3798 	addr = sd->sd_meta->ssdi.ssd_size - 1;
3799 	if (xs->cmd->opcode == READ_CAPACITY) {
3800 		bzero(&rcd, sizeof(rcd));
3801 		if (addr > 0xffffffffllu)
3802 			_lto4b(0xffffffff, rcd.addr);
3803 		else
3804 			_lto4b(addr, rcd.addr);
3805 		_lto4b(512, rcd.length);
3806 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
3807 		rv = 0;
3808 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
3809 		bzero(&rcd16, sizeof(rcd16));
3810 		_lto8b(addr, rcd16.addr);
3811 		_lto4b(512, rcd16.length);
3812 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
3813 		rv = 0;
3814 	}
3815 
3816 	return (rv);
3817 }
3818 
3819 int
3820 sr_raid_tur(struct sr_workunit *wu)
3821 {
3822 	struct sr_discipline	*sd = wu->swu_dis;
3823 
3824 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
3825 
3826 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3827 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3828 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
3829 		sd->sd_scsi_sense.add_sense_code = 0x04;
3830 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
3831 		sd->sd_scsi_sense.extra_len = 4;
3832 		return (1);
3833 	} else if (sd->sd_vol_status == BIOC_SVINVALID) {
3834 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3835 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
3836 		sd->sd_scsi_sense.add_sense_code = 0x05;
3837 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
3838 		sd->sd_scsi_sense.extra_len = 4;
3839 		return (1);
3840 	}
3841 
3842 	return (0);
3843 }
3844 
3845 int
3846 sr_raid_request_sense(struct sr_workunit *wu)
3847 {
3848 	struct sr_discipline	*sd = wu->swu_dis;
3849 	struct scsi_xfer	*xs = wu->swu_xs;
3850 
3851 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
3852 	    DEVNAME(sd->sd_sc));
3853 
3854 	/* use latest sense data */
3855 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
3856 
3857 	/* clear sense data */
3858 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3859 
3860 	return (0);
3861 }
3862 
3863 int
3864 sr_raid_start_stop(struct sr_workunit *wu)
3865 {
3866 	struct scsi_xfer	*xs = wu->swu_xs;
3867 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
3868 
3869 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
3870 	    DEVNAME(wu->swu_dis->sd_sc));
3871 
3872 	if (!ss)
3873 		return (1);
3874 
3875 	/*
3876 	 * do nothing!
3877 	 * a softraid discipline should always reflect correct status
3878 	 */
3879 	return (0);
3880 }
3881 
3882 int
3883 sr_raid_sync(struct sr_workunit *wu)
3884 {
3885 	struct sr_discipline	*sd = wu->swu_dis;
3886 	int			s, rv = 0, ios;
3887 
3888 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
3889 
3890 	/* when doing a fake sync don't count the wu */
3891 	ios = wu->swu_fake ? 0 : 1;
3892 
3893 	s = splbio();
3894 	sd->sd_sync = 1;
3895 
3896 	while (sd->sd_wu_pending > ios)
3897 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
3898 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
3899 			    DEVNAME(sd->sd_sc));
3900 			rv = 1;
3901 			break;
3902 		}
3903 
3904 	sd->sd_sync = 0;
3905 	splx(s);
3906 
3907 	wakeup(&sd->sd_sync);
3908 
3909 	return (rv);
3910 }
3911 
3912 void
3913 sr_startwu_callback(void *arg1, void *arg2)
3914 {
3915 	struct sr_discipline	*sd = arg1;
3916 	struct sr_workunit	*wu = arg2;
3917 	struct sr_ccb		*ccb;
3918 	int			s;
3919 
3920 	s = splbio();
3921 	if (wu->swu_cb_active == 1)
3922 		panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc));
3923 	wu->swu_cb_active = 1;
3924 
3925 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
3926 		VOP_STRATEGY(&ccb->ccb_buf);
3927 
3928 	wu->swu_cb_active = 0;
3929 	splx(s);
3930 }
3931 
3932 void
3933 sr_raid_startwu(struct sr_workunit *wu)
3934 {
3935 	struct sr_discipline	*sd = wu->swu_dis;
3936 
3937 	splassert(IPL_BIO);
3938 
3939 	if (wu->swu_state == SR_WU_RESTART)
3940 		/*
3941 		 * no need to put the wu on the pending queue since we
3942 		 * are restarting the io
3943 		 */
3944 		 ;
3945 	else
3946 		/* move wu to pending queue */
3947 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
3948 
3949 	/* start all individual ios */
3950 	workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback,
3951 	    sd, wu);
3952 }
3953 
3954 void
3955 sr_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
3956 {
3957 	int			old_state, s;
3958 
3959 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_set_chunk_state %d -> %d\n",
3960 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
3961 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
3962 
3963 	/* ok to go to splbio since this only happens in error path */
3964 	s = splbio();
3965 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
3966 
3967 	/* multiple IOs to the same chunk that fail will come through here */
3968 	if (old_state == new_state)
3969 		goto done;
3970 
3971 	switch (old_state) {
3972 	case BIOC_SDONLINE:
3973 		if (new_state == BIOC_SDOFFLINE)
3974 			break;
3975 		else
3976 			goto die;
3977 		break;
3978 
3979 	case BIOC_SDOFFLINE:
3980 		goto die;
3981 
3982 	default:
3983 die:
3984 		splx(s); /* XXX */
3985 		panic("%s: %s: %s: invalid chunk state transition "
3986 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
3987 		    sd->sd_meta->ssd_devname,
3988 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
3989 		    old_state, new_state);
3990 		/* NOTREACHED */
3991 	}
3992 
3993 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
3994 	sd->sd_set_vol_state(sd);
3995 
3996 	sd->sd_must_flush = 1;
3997 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
3998 done:
3999 	splx(s);
4000 }
4001 
4002 void
4003 sr_set_vol_state(struct sr_discipline *sd)
4004 {
4005 	int			states[SR_MAX_STATES];
4006 	int			new_state, i, s, nd;
4007 	int			old_state = sd->sd_vol_status;
4008 
4009 	DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state\n",
4010 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
4011 
4012 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
4013 
4014 	for (i = 0; i < SR_MAX_STATES; i++)
4015 		states[i] = 0;
4016 
4017 	for (i = 0; i < nd; i++) {
4018 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
4019 		if (s >= SR_MAX_STATES)
4020 			panic("%s: %s: %s: invalid chunk state",
4021 			    DEVNAME(sd->sd_sc),
4022 			    sd->sd_meta->ssd_devname,
4023 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
4024 		states[s]++;
4025 	}
4026 
4027 	if (states[BIOC_SDONLINE] == nd)
4028 		new_state = BIOC_SVONLINE;
4029 	else
4030 		new_state = BIOC_SVOFFLINE;
4031 
4032 	DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state %d -> %d\n",
4033 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
4034 	    old_state, new_state);
4035 
4036 	switch (old_state) {
4037 	case BIOC_SVONLINE:
4038 		if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE)
4039 			break;
4040 		else
4041 			goto die;
4042 		break;
4043 
4044 	case BIOC_SVOFFLINE:
4045 		/* XXX this might be a little too much */
4046 		goto die;
4047 
4048 	default:
4049 die:
4050 		panic("%s: %s: invalid volume state transition "
4051 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
4052 		    sd->sd_meta->ssd_devname,
4053 		    old_state, new_state);
4054 		/* NOTREACHED */
4055 	}
4056 
4057 	sd->sd_vol_status = new_state;
4058 }
4059 
4060 void
4061 sr_checksum_print(u_int8_t *md5)
4062 {
4063 	int			i;
4064 
4065 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
4066 		printf("%02x", md5[i]);
4067 }
4068 
4069 void
4070 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
4071 {
4072 	MD5_CTX			ctx;
4073 
4074 	DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
4075 	    md5, len);
4076 
4077 	MD5Init(&ctx);
4078 	MD5Update(&ctx, src, len);
4079 	MD5Final(md5, &ctx);
4080 }
4081 
4082 void
4083 sr_uuid_generate(struct sr_uuid *uuid)
4084 {
4085 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
4086 	/* UUID version 4: random */
4087 	uuid->sui_id[6] &= 0x0f;
4088 	uuid->sui_id[6] |= 0x40;
4089 	/* RFC4122 variant */
4090 	uuid->sui_id[8] &= 0x3f;
4091 	uuid->sui_id[8] |= 0x80;
4092 }
4093 
4094 char *
4095 sr_uuid_format(struct sr_uuid *uuid)
4096 {
4097 	char *uuidstr;
4098 
4099 	uuidstr = malloc(37, M_DEVBUF, M_WAITOK);
4100 
4101 	snprintf(uuidstr, 37,
4102 	    "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
4103 	    "%02x%02x%02x%02x%02x%02x",
4104 	    uuid->sui_id[0], uuid->sui_id[1],
4105 	    uuid->sui_id[2], uuid->sui_id[3],
4106 	    uuid->sui_id[4], uuid->sui_id[5],
4107 	    uuid->sui_id[6], uuid->sui_id[7],
4108 	    uuid->sui_id[8], uuid->sui_id[9],
4109 	    uuid->sui_id[10], uuid->sui_id[11],
4110 	    uuid->sui_id[12], uuid->sui_id[13],
4111 	    uuid->sui_id[14], uuid->sui_id[15]);
4112 
4113 	return uuidstr;
4114 }
4115 
4116 void
4117 sr_uuid_print(struct sr_uuid *uuid, int cr)
4118 {
4119 	char *uuidstr;
4120 
4121 	uuidstr = sr_uuid_format(uuid);
4122 	printf("%s%s", uuidstr, (cr ? "\n" : ""));
4123 	free(uuidstr, M_DEVBUF);
4124 }
4125 
4126 int
4127 sr_already_assembled(struct sr_discipline *sd)
4128 {
4129 	struct sr_softc		*sc = sd->sd_sc;
4130 	int			i;
4131 
4132 	for (i = 0; i < SR_MAX_LD; i++)
4133 		if (sc->sc_dis[i])
4134 			if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
4135 			    &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
4136 			    sizeof(sd->sd_meta->ssdi.ssd_uuid)))
4137 				return (1);
4138 
4139 	return (0);
4140 }
4141 
4142 int32_t
4143 sr_validate_stripsize(u_int32_t b)
4144 {
4145 	int			s = 0;
4146 
4147 	if (b % 512)
4148 		return (-1);
4149 
4150 	while ((b & 1) == 0) {
4151 		b >>= 1;
4152 		s++;
4153 	}
4154 
4155 	/* only multiple of twos */
4156 	b >>= 1;
4157 	if (b)
4158 		return(-1);
4159 
4160 	return (s);
4161 }
4162 
4163 void
4164 sr_shutdownhook(void *arg)
4165 {
4166 	sr_shutdown((struct sr_softc *)arg);
4167 }
4168 
4169 void
4170 sr_shutdown(struct sr_softc *sc)
4171 {
4172 	int			i;
4173 
4174 	DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc));
4175 
4176 	/* XXX this will not work when we stagger disciplines */
4177 	for (i = 0; i < SR_MAX_LD; i++)
4178 		if (sc->sc_dis[i])
4179 			sr_discipline_shutdown(sc->sc_dis[i], 1);
4180 }
4181 
4182 int
4183 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
4184 {
4185 	struct sr_discipline	*sd = wu->swu_dis;
4186 	struct scsi_xfer	*xs = wu->swu_xs;
4187 	int			rv = 1;
4188 
4189 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
4190 	    xs->cmd->opcode);
4191 
4192 	if (sd->sd_meta->ssd_data_offset == 0)
4193 		panic("invalid data offset");
4194 
4195 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
4196 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
4197 		    DEVNAME(sd->sd_sc), func);
4198 		goto bad;
4199 	}
4200 
4201 	if (xs->datalen == 0) {
4202 		printf("%s: %s: illegal block count for %s\n",
4203 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
4204 		goto bad;
4205 	}
4206 
4207 	if (xs->cmdlen == 10)
4208 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
4209 	else if (xs->cmdlen == 16)
4210 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
4211 	else if (xs->cmdlen == 6)
4212 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
4213 	else {
4214 		printf("%s: %s: illegal cmdlen for %s\n",
4215 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
4216 		goto bad;
4217 	}
4218 
4219 	wu->swu_blk_start = *blk;
4220 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
4221 
4222 	if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
4223 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
4224 		    "end: %lld length: %d\n",
4225 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
4226 		    wu->swu_blk_end, xs->datalen);
4227 
4228 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
4229 		    SSD_ERRCODE_VALID;
4230 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
4231 		sd->sd_scsi_sense.add_sense_code = 0x21;
4232 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
4233 		sd->sd_scsi_sense.extra_len = 4;
4234 		goto bad;
4235 	}
4236 
4237 	rv = 0;
4238 bad:
4239 	return (rv);
4240 }
4241 
4242 int
4243 sr_check_io_collision(struct sr_workunit *wu)
4244 {
4245 	struct sr_discipline	*sd = wu->swu_dis;
4246 	struct sr_workunit	*wup;
4247 
4248 	splassert(IPL_BIO);
4249 
4250 	/* walk queue backwards and fill in collider if we have one */
4251 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
4252 		if (wu->swu_blk_end < wup->swu_blk_start ||
4253 		    wup->swu_blk_end < wu->swu_blk_start)
4254 			continue;
4255 
4256 		/* we have an LBA collision, defer wu */
4257 		wu->swu_state = SR_WU_DEFERRED;
4258 		if (wup->swu_collider)
4259 			/* wu is on deferred queue, append to last wu */
4260 			while (wup->swu_collider)
4261 				wup = wup->swu_collider;
4262 
4263 		wup->swu_collider = wu;
4264 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
4265 		sd->sd_wu_collisions++;
4266 		goto queued;
4267 	}
4268 
4269 	return (0);
4270 queued:
4271 	return (1);
4272 }
4273 
4274 void
4275 sr_rebuild(void *arg)
4276 {
4277 	struct sr_discipline	*sd = arg;
4278 	struct sr_softc		*sc = sd->sd_sc;
4279 
4280 	if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc,
4281 	    DEVNAME(sc)) != 0)
4282 		printf("%s: unable to start background operation\n",
4283 		    DEVNAME(sc));
4284 }
4285 
4286 void
4287 sr_rebuild_thread(void *arg)
4288 {
4289 	struct sr_discipline	*sd = arg;
4290 	struct sr_softc		*sc = sd->sd_sc;
4291 	daddr64_t		whole_blk, partial_blk, blk, sz, lba;
4292 	daddr64_t		psz, rb, restart;
4293 	struct sr_workunit	*wu_r, *wu_w;
4294 	struct scsi_xfer	xs_r, xs_w;
4295 	struct scsi_rw_16	*cr, *cw;
4296 	int			c, s, slept, percent = 0, old_percent = -1;
4297 	u_int8_t		*buf;
4298 
4299 	whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
4300 	partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE;
4301 
4302 	restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE;
4303 	if (restart > whole_blk) {
4304 		printf("%s: bogus rebuild restart offset, starting from 0\n",
4305 		    DEVNAME(sc));
4306 		restart = 0;
4307 	}
4308 	if (restart) {
4309 		/*
4310 		 * XXX there is a hole here; there is a posibility that we
4311 		 * had a restart however the chunk that was supposed to
4312 		 * be rebuilt is no longer valid; we can reach this situation
4313 		 * when a rebuild is in progress and the box crashes and
4314 		 * on reboot the rebuild chunk is different (like zero'd or
4315 		 * replaced).  We need to check the uuid of the chunk that is
4316 		 * being rebuilt to assert this.
4317 		 */
4318 		psz = sd->sd_meta->ssdi.ssd_size;
4319 		rb = sd->sd_meta->ssd_rebuild;
4320 		if (rb > 0)
4321 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
4322 		else
4323 			percent = 0;
4324 		printf("%s: resuming rebuild on %s at %d%%\n",
4325 		    DEVNAME(sc), sd->sd_meta->ssd_devname, percent);
4326 	}
4327 
4328 	sd->sd_reb_active = 1;
4329 
4330 	/* currently this is 64k therefore we can use dma_alloc */
4331 	buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK);
4332 	for (blk = restart; blk <= whole_blk; blk++) {
4333 		lba = blk * SR_REBUILD_IO_SIZE;
4334 		sz = SR_REBUILD_IO_SIZE;
4335 		if (blk == whole_blk) {
4336 			if (partial_blk == 0)
4337 				break;
4338 			sz = partial_blk;
4339 		}
4340 
4341 		/* get some wu */
4342 		if ((wu_r = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
4343 			panic("%s: rebuild exhausted wu_r", DEVNAME(sc));
4344 		if ((wu_w = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
4345 			panic("%s: rebuild exhausted wu_w", DEVNAME(sc));
4346 
4347 		/* setup read io */
4348 		bzero(&xs_r, sizeof xs_r);
4349 		xs_r.error = XS_NOERROR;
4350 		xs_r.flags = SCSI_DATA_IN;
4351 		xs_r.datalen = sz << DEV_BSHIFT;
4352 		xs_r.data = buf;
4353 		xs_r.cmdlen = sizeof(*cr);
4354 		xs_r.cmd = &xs_r.cmdstore;
4355 		cr = (struct scsi_rw_16 *)xs_r.cmd;
4356 		cr->opcode = READ_16;
4357 		_lto4b(sz, cr->length);
4358 		_lto8b(lba, cr->addr);
4359 		wu_r->swu_flags |= SR_WUF_REBUILD;
4360 		wu_r->swu_xs = &xs_r;
4361 		if (sd->sd_scsi_rw(wu_r)) {
4362 			printf("%s: could not create read io\n",
4363 			    DEVNAME(sc));
4364 			goto fail;
4365 		}
4366 
4367 		/* setup write io */
4368 		bzero(&xs_w, sizeof xs_w);
4369 		xs_w.error = XS_NOERROR;
4370 		xs_w.flags = SCSI_DATA_OUT;
4371 		xs_w.datalen = sz << DEV_BSHIFT;
4372 		xs_w.data = buf;
4373 		xs_w.cmdlen = sizeof(*cw);
4374 		xs_w.cmd = &xs_w.cmdstore;
4375 		cw = (struct scsi_rw_16 *)xs_w.cmd;
4376 		cw->opcode = WRITE_16;
4377 		_lto4b(sz, cw->length);
4378 		_lto8b(lba, cw->addr);
4379 		wu_w->swu_flags |= SR_WUF_REBUILD;
4380 		wu_w->swu_xs = &xs_w;
4381 		if (sd->sd_scsi_rw(wu_w)) {
4382 			printf("%s: could not create write io\n",
4383 			    DEVNAME(sc));
4384 			goto fail;
4385 		}
4386 
4387 		/*
4388 		 * collide with the read io so that we get automatically
4389 		 * started when the read is done
4390 		 */
4391 		wu_w->swu_state = SR_WU_DEFERRED;
4392 		wu_r->swu_collider = wu_w;
4393 		s = splbio();
4394 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
4395 
4396 		/* schedule io */
4397 		if (sr_check_io_collision(wu_r))
4398 			goto queued;
4399 
4400 		sr_raid_startwu(wu_r);
4401 queued:
4402 		splx(s);
4403 
4404 		/* wait for read completion */
4405 		slept = 0;
4406 		while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
4407 			tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
4408 			slept = 1;
4409 		}
4410 		/* yield if we didn't sleep */
4411 		if (slept == 0)
4412 			tsleep(sc, PWAIT, "sr_yield", 1);
4413 
4414 		scsi_io_put(&sd->sd_iopool, wu_r);
4415 		scsi_io_put(&sd->sd_iopool, wu_w);
4416 
4417 		sd->sd_meta->ssd_rebuild = lba;
4418 
4419 		/* save metadata every percent */
4420 		psz = sd->sd_meta->ssdi.ssd_size;
4421 		rb = sd->sd_meta->ssd_rebuild;
4422 		if (rb > 0)
4423 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
4424 		else
4425 			percent = 0;
4426 		if (percent != old_percent && blk != whole_blk) {
4427 			if (sr_meta_save(sd, SR_META_DIRTY))
4428 				printf("%s: could not save metadata to %s\n",
4429 				    DEVNAME(sc), sd->sd_meta->ssd_devname);
4430 			old_percent = percent;
4431 		}
4432 
4433 		if (sd->sd_reb_abort)
4434 			goto abort;
4435 	}
4436 
4437 	/* all done */
4438 	sd->sd_meta->ssd_rebuild = 0;
4439 	for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
4440 		if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
4441 		    BIOC_SDREBUILD) {
4442 			sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE);
4443 			break;
4444 		}
4445 
4446 abort:
4447 	if (sr_meta_save(sd, SR_META_DIRTY))
4448 		printf("%s: could not save metadata to %s\n",
4449 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
4450 fail:
4451 	dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT);
4452 	sd->sd_reb_active = 0;
4453 	kthread_exit(0);
4454 }
4455 
4456 #ifndef SMALL_KERNEL
4457 int
4458 sr_sensors_create(struct sr_discipline *sd)
4459 {
4460 	struct sr_softc		*sc = sd->sd_sc;
4461 	int			rv = 1;
4462 
4463 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
4464 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
4465 
4466 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
4467 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
4468 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
4469 	    sizeof(sd->sd_vol.sv_sensor.desc));
4470 
4471 	sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4472 	sd->sd_vol.sv_sensor_attached = 1;
4473 
4474 	if (sc->sc_sensor_task == NULL) {
4475 		sc->sc_sensor_task = sensor_task_register(sc,
4476 		    sr_sensors_refresh, 10);
4477 		if (sc->sc_sensor_task == NULL)
4478 			goto bad;
4479 	}
4480 
4481 	rv = 0;
4482 bad:
4483 	return (rv);
4484 }
4485 
4486 void
4487 sr_sensors_delete(struct sr_discipline *sd)
4488 {
4489 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc));
4490 
4491 	if (sd->sd_vol.sv_sensor_attached)
4492 		sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4493 }
4494 
4495 void
4496 sr_sensors_refresh(void *arg)
4497 {
4498 	struct sr_softc		*sc = arg;
4499 	struct sr_volume	*sv;
4500 	struct sr_discipline	*sd;
4501 	int			i, vol;
4502 
4503 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
4504 
4505 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
4506 		/* XXX this will not work when we stagger disciplines */
4507 		if (!sc->sc_dis[i])
4508 			continue;
4509 
4510 		sd = sc->sc_dis[i];
4511 		sv = &sd->sd_vol;
4512 
4513 		switch(sd->sd_vol_status) {
4514 		case BIOC_SVOFFLINE:
4515 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
4516 			sv->sv_sensor.status = SENSOR_S_CRIT;
4517 			break;
4518 
4519 		case BIOC_SVDEGRADED:
4520 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
4521 			sv->sv_sensor.status = SENSOR_S_WARN;
4522 			break;
4523 
4524 		case BIOC_SVSCRUB:
4525 		case BIOC_SVONLINE:
4526 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
4527 			sv->sv_sensor.status = SENSOR_S_OK;
4528 			break;
4529 
4530 		default:
4531 			sv->sv_sensor.value = 0; /* unknown */
4532 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
4533 		}
4534 	}
4535 }
4536 #endif /* SMALL_KERNEL */
4537 
4538 #ifdef SR_FANCY_STATS
4539 void				sr_print_stats(void);
4540 
4541 void
4542 sr_print_stats(void)
4543 {
4544 	struct sr_softc		*sc;
4545 	struct sr_discipline	*sd;
4546 	int			i, vol;
4547 
4548 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
4549 		if (softraid_cd.cd_devs[i]) {
4550 			sc = softraid_cd.cd_devs[i];
4551 			/* we'll only have one softc */
4552 			break;
4553 		}
4554 
4555 	if (!sc) {
4556 		printf("no softraid softc found\n");
4557 		return;
4558 	}
4559 
4560 	for (i = 0, vol = -1; i < SR_MAX_LD; i++) {
4561 		/* XXX this will not work when we stagger disciplines */
4562 		if (!sc->sc_dis[i])
4563 			continue;
4564 
4565 		sd = sc->sc_dis[i];
4566 		printf("%s: ios pending: %d  collisions %llu\n",
4567 		    sd->sd_meta->ssd_devname,
4568 		    sd->sd_wu_pending,
4569 		    sd->sd_wu_collisions);
4570 	}
4571 }
4572 #endif /* SR_FANCY_STATS */
4573 
4574 #ifdef SR_DEBUG
4575 void
4576 sr_meta_print(struct sr_metadata *m)
4577 {
4578 	int			i;
4579 	struct sr_meta_chunk	*mc;
4580 	struct sr_meta_opt_hdr	*omh;
4581 
4582 	if (!(sr_debug & SR_D_META))
4583 		return;
4584 
4585 	printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
4586 	printf("\tssd_version %d\n", m->ssdi.ssd_version);
4587 	printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags);
4588 	printf("\tssd_uuid ");
4589 	sr_uuid_print(&m->ssdi.ssd_uuid, 1);
4590 	printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
4591 	printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
4592 	printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
4593 	printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
4594 	printf("\tssd_level %d\n", m->ssdi.ssd_level);
4595 	printf("\tssd_size %lld\n", m->ssdi.ssd_size);
4596 	printf("\tssd_devname %s\n", m->ssd_devname);
4597 	printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
4598 	printf("\tssd_product %s\n", m->ssdi.ssd_product);
4599 	printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
4600 	printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
4601 	printf("\tssd_checksum ");
4602 	sr_checksum_print(m->ssd_checksum);
4603 	printf("\n");
4604 	printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
4605 	printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
4606 
4607 	mc = (struct sr_meta_chunk *)(m + 1);
4608 	for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
4609 		printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
4610 		printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
4611 		printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
4612 		printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
4613 		printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
4614 		printf("\t\tscm_uuid ");
4615 		sr_uuid_print(&mc->scmi.scm_uuid, 1);
4616 		printf("\t\tscm_checksum ");
4617 		sr_checksum_print(mc->scm_checksum);
4618 		printf("\n");
4619 		printf("\t\tscm_status %d\n", mc->scm_status);
4620 	}
4621 
4622 	omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(m + 1) +
4623 	    sizeof(struct sr_meta_chunk) * m->ssdi.ssd_chunk_no);
4624 	for (i = 0; i < m->ssdi.ssd_opt_no; i++) {
4625 		printf("\t\t\tsom_type %d\n", omh->som_type);
4626 		printf("\t\t\tsom_checksum ");
4627 		sr_checksum_print(omh->som_checksum);
4628 		printf("\n");
4629 		omh = (struct sr_meta_opt_hdr *)((void *)omh +
4630 		    omh->som_length);
4631 	}
4632 }
4633 
4634 void
4635 sr_dump_mem(u_int8_t *p, int len)
4636 {
4637 	int			i;
4638 
4639 	for (i = 0; i < len; i++)
4640 		printf("%02x ", *p++);
4641 	printf("\n");
4642 }
4643 
4644 #endif /* SR_DEBUG */
4645