xref: /openbsd-src/sys/dev/softraid.c (revision 7d464165e831b6257b4befbd30d2fbb433d300de)
1 /* $OpenBSD: softraid.c,v 1.227 2011/04/14 02:11:23 marco Exp $ */
2 /*
3  * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  * Copyright (c) 2009 Joel Sing <jsing@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bio.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/buf.h>
25 #include <sys/device.h>
26 #include <sys/ioctl.h>
27 #include <sys/proc.h>
28 #include <sys/malloc.h>
29 #include <sys/pool.h>
30 #include <sys/kernel.h>
31 #include <sys/disk.h>
32 #include <sys/rwlock.h>
33 #include <sys/queue.h>
34 #include <sys/fcntl.h>
35 #include <sys/disklabel.h>
36 #include <sys/mount.h>
37 #include <sys/sensors.h>
38 #include <sys/stat.h>
39 #include <sys/conf.h>
40 #include <sys/uio.h>
41 #include <sys/workq.h>
42 #include <sys/kthread.h>
43 #include <sys/dkio.h>
44 
45 #ifdef AOE
46 #include <sys/mbuf.h>
47 #include <net/if_aoe.h>
48 #endif /* AOE */
49 
50 #include <crypto/cryptodev.h>
51 
52 #include <scsi/scsi_all.h>
53 #include <scsi/scsiconf.h>
54 #include <scsi/scsi_disk.h>
55 
56 #include <dev/softraidvar.h>
57 #include <dev/rndvar.h>
58 
59 /* #define SR_FANCY_STATS */
60 
61 #ifdef SR_DEBUG
62 #define SR_FANCY_STATS
63 uint32_t	sr_debug = 0
64 		    /* | SR_D_CMD */
65 		    /* | SR_D_MISC */
66 		    /* | SR_D_INTR */
67 		    /* | SR_D_IOCTL */
68 		    /* | SR_D_CCB */
69 		    /* | SR_D_WU */
70 		    /* | SR_D_META */
71 		    /* | SR_D_DIS */
72 		    /* | SR_D_STATE */
73 		;
74 #endif
75 
76 int		sr_match(struct device *, void *, void *);
77 void		sr_attach(struct device *, struct device *, void *);
78 int		sr_detach(struct device *, int);
79 
80 struct cfattach softraid_ca = {
81 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
82 };
83 
84 struct cfdriver softraid_cd = {
85 	NULL, "softraid", DV_DULL
86 };
87 
88 /* scsi & discipline */
89 void			sr_scsi_cmd(struct scsi_xfer *);
90 void			sr_minphys(struct buf *bp, struct scsi_link *sl);
91 void			sr_copy_internal_data(struct scsi_xfer *,
92 			    void *, size_t);
93 int			sr_scsi_ioctl(struct scsi_link *, u_long,
94 			    caddr_t, int);
95 int			sr_ioctl(struct device *, u_long, caddr_t);
96 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
97 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
98 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
99 int			sr_ioctl_setstate(struct sr_softc *,
100 			    struct bioc_setstate *);
101 int			sr_ioctl_createraid(struct sr_softc *,
102 			    struct bioc_createraid *, int);
103 int			sr_ioctl_deleteraid(struct sr_softc *,
104 			    struct bioc_deleteraid *);
105 int			sr_ioctl_discipline(struct sr_softc *,
106 			    struct bioc_discipline *);
107 int			sr_ioctl_installboot(struct sr_softc *,
108 			    struct bioc_installboot *);
109 void			sr_chunks_unwind(struct sr_softc *,
110 			    struct sr_chunk_head *);
111 void			sr_discipline_free(struct sr_discipline *);
112 void			sr_discipline_shutdown(struct sr_discipline *);
113 int			sr_discipline_init(struct sr_discipline *, int);
114 
115 /* utility functions */
116 void			sr_shutdown(void *);
117 void			sr_uuid_get(struct sr_uuid *);
118 void			sr_uuid_print(struct sr_uuid *, int);
119 void			sr_checksum_print(u_int8_t *);
120 int			sr_boot_assembly(struct sr_softc *);
121 int			sr_already_assembled(struct sr_discipline *);
122 int			sr_hotspare(struct sr_softc *, dev_t);
123 void			sr_hotspare_rebuild(struct sr_discipline *);
124 int			sr_rebuild_init(struct sr_discipline *, dev_t, int);
125 void			sr_rebuild(void *);
126 void			sr_rebuild_thread(void *);
127 void			sr_roam_chunks(struct sr_discipline *);
128 int			sr_chunk_in_use(struct sr_softc *, dev_t);
129 void			sr_startwu_callback(void *, void *);
130 int			sr_rw(struct sr_softc *, dev_t, char *, size_t,
131 			    daddr64_t, long);
132 
133 /* don't include these on RAMDISK */
134 #ifndef SMALL_KERNEL
135 void			sr_sensors_refresh(void *);
136 int			sr_sensors_create(struct sr_discipline *);
137 void			sr_sensors_delete(struct sr_discipline *);
138 #endif
139 
140 /* metadata */
141 int			sr_meta_probe(struct sr_discipline *, dev_t *, int);
142 int			sr_meta_attach(struct sr_discipline *, int, int);
143 int			sr_meta_rw(struct sr_discipline *, dev_t, void *,
144 			    size_t, daddr64_t, long);
145 int			sr_meta_clear(struct sr_discipline *);
146 void			sr_meta_chunks_create(struct sr_softc *,
147 			    struct sr_chunk_head *);
148 void			sr_meta_init(struct sr_discipline *,
149 			    struct sr_chunk_head *);
150 void			sr_meta_opt_load(struct sr_discipline *,
151 			    struct sr_meta_opt *);
152 
153 /* hotplug magic */
154 void			sr_disk_attach(struct disk *, int);
155 
156 struct sr_hotplug_list {
157 	void			(*sh_hotplug)(struct sr_discipline *,
158 				    struct disk *, int);
159 	struct sr_discipline	*sh_sd;
160 
161 	SLIST_ENTRY(sr_hotplug_list) shl_link;
162 };
163 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list);
164 
165 struct			sr_hotplug_list_head	sr_hotplug_callbacks;
166 extern void		(*softraid_disk_attach)(struct disk *, int);
167 
168 /* scsi glue */
169 struct scsi_adapter sr_switch = {
170 	sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl
171 };
172 
173 /* native metadata format */
174 int			sr_meta_native_bootprobe(struct sr_softc *, dev_t,
175 			    struct sr_metadata_list_head *);
176 #define SR_META_NOTCLAIMED	(0)
177 #define SR_META_CLAIMED		(1)
178 int			sr_meta_native_probe(struct sr_softc *,
179 			   struct sr_chunk *);
180 int			sr_meta_native_attach(struct sr_discipline *, int);
181 int			sr_meta_native_write(struct sr_discipline *, dev_t,
182 			    struct sr_metadata *,void *);
183 
184 #ifdef SR_DEBUG
185 void			sr_meta_print(struct sr_metadata *);
186 #else
187 #define			sr_meta_print(m)
188 #endif
189 
190 /* the metadata driver should remain stateless */
191 struct sr_meta_driver {
192 	daddr64_t		smd_offset;	/* metadata location */
193 	u_int32_t		smd_size;	/* size of metadata */
194 
195 	int			(*smd_probe)(struct sr_softc *,
196 				   struct sr_chunk *);
197 	int			(*smd_attach)(struct sr_discipline *, int);
198 	int			(*smd_detach)(struct sr_discipline *);
199 	int			(*smd_read)(struct sr_discipline *, dev_t,
200 				    struct sr_metadata *, void *);
201 	int			(*smd_write)(struct sr_discipline *, dev_t,
202 				    struct sr_metadata *, void *);
203 	int			(*smd_validate)(struct sr_discipline *,
204 				    struct sr_metadata *, void *);
205 } smd[] = {
206 	{ SR_META_OFFSET, SR_META_SIZE * 512,
207 	  sr_meta_native_probe, sr_meta_native_attach, NULL,
208 	  sr_meta_native_read, sr_meta_native_write, NULL },
209 	{ 0, 0, NULL, NULL, NULL, NULL }
210 };
211 
212 int
213 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force)
214 {
215 	struct sr_softc		*sc = sd->sd_sc;
216 	struct sr_chunk_head	*cl;
217 	struct sr_chunk		*ch_entry, *chunk1, *chunk2;
218 	int			rv = 1, i = 0;
219 
220 	DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
221 
222 	/* in memory copy of metadata */
223 	sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
224 	if (!sd->sd_meta) {
225 		printf("%s: could not allocate memory for metadata\n",
226 		    DEVNAME(sc));
227 		goto bad;
228 	}
229 
230 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
231 		/* in memory copy of foreign metadata */
232 		sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size,
233 		    M_DEVBUF, M_ZERO | M_NOWAIT);
234 		if (!sd->sd_meta_foreign) {
235 			/* unwind frees sd_meta */
236 			printf("%s: could not allocate memory for foreign "
237 			    "metadata\n", DEVNAME(sc));
238 			goto bad;
239 		}
240 	}
241 
242 	/* we have a valid list now create an array index */
243 	cl = &sd->sd_vol.sv_chunk_list;
244 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no,
245 	    M_DEVBUF, M_WAITOK | M_ZERO);
246 
247 	/* fill out chunk array */
248 	i = 0;
249 	SLIST_FOREACH(ch_entry, cl, src_link)
250 		sd->sd_vol.sv_chunks[i++] = ch_entry;
251 
252 	/* attach metadata */
253 	if (smd[sd->sd_meta_type].smd_attach(sd, force))
254 		goto bad;
255 
256 	/* Force chunks into correct order now that metadata is attached. */
257 	SLIST_FOREACH(ch_entry, cl, src_link)
258 		SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link);
259 	for (i = 0; i < chunk_no; i++) {
260 		ch_entry = sd->sd_vol.sv_chunks[i];
261 		chunk2 = NULL;
262 		SLIST_FOREACH(chunk1, cl, src_link) {
263 			if (chunk1->src_meta.scmi.scm_chunk_id >
264 			    ch_entry->src_meta.scmi.scm_chunk_id)
265 				break;
266 			chunk2 = chunk1;
267 		}
268 		if (chunk2 == NULL)
269 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
270 		else
271 			SLIST_INSERT_AFTER(chunk2, ch_entry, src_link);
272 	}
273 	i = 0;
274 	SLIST_FOREACH(ch_entry, cl, src_link)
275 		sd->sd_vol.sv_chunks[i++] = ch_entry;
276 
277 	rv = 0;
278 bad:
279 	return (rv);
280 }
281 
282 int
283 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
284 {
285 	struct sr_softc		*sc = sd->sd_sc;
286 	struct vnode		*vn;
287 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
288 	struct sr_chunk_head	*cl;
289 	char			devname[32];
290 	int			i, d, type, found, prevf, error;
291 	dev_t			dev;
292 
293 	DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
294 
295 	if (no_chunk == 0)
296 		goto unwind;
297 
298 
299 	cl = &sd->sd_vol.sv_chunk_list;
300 
301 	for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
302 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
303 		    M_WAITOK | M_ZERO);
304 		/* keep disks in user supplied order */
305 		if (ch_prev)
306 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
307 		else
308 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
309 		ch_prev = ch_entry;
310 		dev = dt[d];
311 		ch_entry->src_dev_mm = dev;
312 
313 		if (dev == NODEV) {
314 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
315 			continue;
316 		} else {
317 			sr_meta_getdevname(sc, dev, devname, sizeof(devname));
318 			if (bdevvp(dev, &vn)) {
319 				printf("%s:, sr_meta_probe: can't allocate "
320 				    "vnode\n", DEVNAME(sc));
321 				goto unwind;
322 			}
323 
324 			/*
325 			 * XXX leaving dev open for now; move this to attach
326 			 * and figure out the open/close dance for unwind.
327 			 */
328 			error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
329 			if (error) {
330 				DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
331 				    "open %s\n", DEVNAME(sc), devname);
332 				vput(vn);
333 				goto unwind;
334 			}
335 
336 			strlcpy(ch_entry->src_devname, devname,
337 			    sizeof(ch_entry->src_devname));
338 			ch_entry->src_vn = vn;
339 		}
340 
341 		/* determine if this is a device we understand */
342 		for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
343 			type = smd[i].smd_probe(sc, ch_entry);
344 			if (type == SR_META_F_INVALID)
345 				continue;
346 			else {
347 				found = type;
348 				break;
349 			}
350 		}
351 
352 		if (found == SR_META_F_INVALID)
353 			goto unwind;
354 		if (prevf == SR_META_F_INVALID)
355 			prevf = found;
356 		if (prevf != found) {
357 			DNPRINTF(SR_D_META, "%s: prevf != found\n",
358 			    DEVNAME(sc));
359 			goto unwind;
360 		}
361 	}
362 
363 	return (prevf);
364 unwind:
365 	return (SR_META_F_INVALID);
366 }
367 
368 void
369 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
370 {
371 	int			maj, unit, part;
372 	char			*name;
373 
374 	DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
375 	    DEVNAME(sc), buf, size);
376 
377 	if (!buf)
378 		return;
379 
380 	maj = major(dev);
381 	part = DISKPART(dev);
382 	unit = DISKUNIT(dev);
383 
384 	name = findblkname(maj);
385 	if (name == NULL)
386 		return;
387 
388 	snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
389 }
390 
391 int
392 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr64_t offset,
393     long flags)
394 {
395 	struct vnode		*vp;
396 	struct buf		b;
397 	size_t			bufsize, dma_bufsize;
398 	int			rv = 1;
399 	char			*dma_buf;
400 
401 	DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %d, %llu 0x%x)\n",
402 	    DEVNAME(sc), dev, buf, size, offset, flags);
403 
404 	dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size;
405 	dma_buf = dma_alloc(dma_bufsize, PR_WAITOK);
406 
407 	if (bdevvp(dev, &vp)) {
408 		printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc));
409 		goto done;
410 	}
411 
412 	while (size > 0) {
413 		DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %d, offset %llu)\n",
414 		    DEVNAME(sc), dma_buf, size, offset);
415 
416 		bufsize = (size > MAXPHYS) ? MAXPHYS : size;
417 		if (flags == B_WRITE)
418 			bcopy(buf, dma_buf, bufsize);
419 
420 		bzero(&b, sizeof(b));
421 		b.b_flags = flags | B_PHYS;
422 		b.b_proc = curproc;
423 		b.b_dev = dev;
424 		b.b_iodone = NULL;
425 		b.b_error = 0;
426 		b.b_blkno = offset;
427 		b.b_data = dma_buf;
428 		b.b_bcount = bufsize;
429 		b.b_bufsize = bufsize;
430 		b.b_resid = bufsize;
431 		b.b_vp = vp;
432 
433 		if ((b.b_flags & B_READ) == 0)
434 			vp->v_numoutput++;
435 
436 		LIST_INIT(&b.b_dep);
437 		VOP_STRATEGY(&b);
438 		biowait(&b);
439 
440 		if (b.b_flags & B_ERROR) {
441 			printf("%s: I/O error %d on dev 0x%x at block %llu\n",
442 			    DEVNAME(sc), b.b_error, dev, b.b_blkno);
443 			goto done;
444 		}
445 
446 		if (flags == B_READ)
447 			bcopy(dma_buf, buf, bufsize);
448 
449 		size -= bufsize;
450 		buf += bufsize;
451 		offset += howmany(bufsize, DEV_BSIZE);
452 	}
453 
454 	rv = 0;
455 
456 done:
457 	if (vp)
458 		vput(vp);
459 
460 	dma_free(dma_buf, dma_bufsize);
461 
462 	return (rv);
463 }
464 
465 int
466 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t size,
467     daddr64_t offset, long flags)
468 {
469 	int			rv = 1;
470 
471 	DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
472 	    DEVNAME(sd->sd_sc), dev, md, size, offset, flags);
473 
474 	if (md == NULL) {
475 		printf("%s: sr_meta_rw: invalid metadata pointer\n",
476 		    DEVNAME(sd->sd_sc));
477 		goto done;
478 	}
479 
480 	rv = sr_rw(sd->sd_sc, dev, md, size, offset, flags);
481 
482 done:
483 	return (rv);
484 }
485 
486 int
487 sr_meta_clear(struct sr_discipline *sd)
488 {
489 	struct sr_softc		*sc = sd->sd_sc;
490 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
491 	struct sr_chunk		*ch_entry;
492 	void			*m;
493 	int			rv = 1;
494 
495 	DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
496 
497 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
498 		printf("%s: sr_meta_clear can not clear foreign metadata\n",
499 		    DEVNAME(sc));
500 		goto done;
501 	}
502 
503 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
504 	SLIST_FOREACH(ch_entry, cl, src_link) {
505 		if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
506 			/* XXX mark disk offline */
507 			DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
508 			    "clear %s\n", ch_entry->src_devname);
509 			rv++;
510 			continue;
511 		}
512 		bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
513 	}
514 
515 	bzero(sd->sd_meta, SR_META_SIZE * 512);
516 
517 	free(m, M_DEVBUF);
518 	rv = 0;
519 done:
520 	return (rv);
521 }
522 
523 void
524 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl)
525 {
526 	struct sr_chunk		*ch_entry;
527 	struct sr_uuid		uuid;
528 	int			cid = 0;
529 	char			*name;
530 	u_int64_t		max_chunk_sz = 0, min_chunk_sz;
531 
532 	DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc));
533 
534 	sr_uuid_get(&uuid);
535 
536 	/* fill out stuff and get largest chunk size while looping */
537 	SLIST_FOREACH(ch_entry, cl, src_link) {
538 		name = ch_entry->src_devname;
539 		ch_entry->src_meta.scmi.scm_size = ch_entry->src_size;
540 		ch_entry->src_meta.scmi.scm_chunk_id = cid++;
541 		ch_entry->src_meta.scm_status = BIOC_SDONLINE;
542 		strlcpy(ch_entry->src_meta.scmi.scm_devname, name,
543 		    sizeof(ch_entry->src_meta.scmi.scm_devname));
544 		bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid,
545 		    sizeof(ch_entry->src_meta.scmi.scm_uuid));
546 
547 		if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz)
548 			max_chunk_sz = ch_entry->src_meta.scmi.scm_size;
549 	}
550 
551 	/* get smallest chunk size */
552 	min_chunk_sz = max_chunk_sz;
553 	SLIST_FOREACH(ch_entry, cl, src_link)
554 		if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz)
555 			min_chunk_sz = ch_entry->src_meta.scmi.scm_size;
556 
557 	/* equalize all sizes */
558 	SLIST_FOREACH(ch_entry, cl, src_link)
559 		ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz;
560 
561 	/* whine if chunks are not the same size */
562 	if (min_chunk_sz != max_chunk_sz)
563 		printf("%s: chunk sizes are not equal; up to %llu blocks "
564 		    "wasted per chunk\n",
565 		    DEVNAME(sc), max_chunk_sz - min_chunk_sz);
566 }
567 
568 void
569 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl)
570 {
571 	struct sr_softc		*sc = sd->sd_sc;
572 	struct sr_metadata	*sm = sd->sd_meta;
573 	struct sr_meta_chunk	*im_sc;
574 	int			i, chunk_no;
575 
576 	DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
577 
578 	if (!sm)
579 		return;
580 
581 	/* initial metadata */
582 	sm->ssdi.ssd_magic = SR_MAGIC;
583 	sm->ssdi.ssd_version = SR_META_VERSION;
584 	sm->ssd_ondisk = 0;
585 	sm->ssdi.ssd_vol_flags = sd->sd_meta_flags;
586 	sm->ssd_data_offset = SR_DATA_OFFSET;
587 
588 	/* get uuid from chunk 0 */
589 	bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid,
590 	    &sm->ssdi.ssd_uuid,
591 	    sizeof(struct sr_uuid));
592 
593 	/* volume is filled in createraid */
594 
595 	/* add missing chunk bits */
596 	chunk_no = sm->ssdi.ssd_chunk_no;
597 	for (i = 0; i < chunk_no; i++) {
598 		im_sc = &sd->sd_vol.sv_chunks[i]->src_meta;
599 		im_sc->scmi.scm_volid = sm->ssdi.ssd_volid;
600 		sr_checksum(sc, im_sc, &im_sc->scm_checksum,
601 		    sizeof(struct sr_meta_chunk_invariant));
602 	}
603 }
604 
605 void
606 sr_meta_opt_load(struct sr_discipline *sd, struct sr_meta_opt *om)
607 {
608 	if (om->somi.som_type == SR_OPT_BOOT) {
609 
610 
611 	} else
612 		panic("unknown optional metadata type");
613 }
614 
615 void
616 sr_meta_save_callback(void *arg1, void *arg2)
617 {
618 	struct sr_discipline	*sd = arg1;
619 	int			s;
620 
621 	s = splbio();
622 
623 	if (sr_meta_save(arg1, SR_META_DIRTY))
624 		printf("%s: save metadata failed\n",
625 		    DEVNAME(sd->sd_sc));
626 
627 	sd->sd_must_flush = 0;
628 	splx(s);
629 }
630 
631 int
632 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
633 {
634 	struct sr_softc		*sc = sd->sd_sc;
635 	struct sr_metadata	*sm = sd->sd_meta, *m;
636 	struct sr_meta_driver	*s;
637 	struct sr_chunk		*src;
638 	struct sr_meta_chunk	*cm;
639 	struct sr_workunit	wu;
640 	struct sr_meta_opt_item *omi;
641 	struct sr_meta_opt	*om;
642 	int			i;
643 
644 	DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
645 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
646 
647 	if (!sm) {
648 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
649 		goto bad;
650 	}
651 
652 	/* meta scratchpad */
653 	s = &smd[sd->sd_meta_type];
654 	m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
655 	if (!m) {
656 		printf("%s: could not allocate metadata scratch area\n",
657 		    DEVNAME(sc));
658 		goto bad;
659 	}
660 
661 	/* from here on out metadata is updated */
662 restart:
663 	sm->ssd_ondisk++;
664 	sm->ssd_meta_flags = flags;
665 	bcopy(sm, m, sizeof(*m));
666 
667 	/* Chunk metadata. */
668 	cm = (struct sr_meta_chunk *)(m + 1);
669 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
670 		src = sd->sd_vol.sv_chunks[i];
671 		bcopy(&src->src_meta, cm, sizeof(*cm));
672 		cm++;
673 	}
674 
675 	/* Optional metadata. */
676 	om = (struct sr_meta_opt *)(cm);
677 	SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) {
678 		bcopy(&omi->omi_om, om, sizeof(*om));
679 		sr_checksum(sc, om, &om->som_checksum,
680 		    sizeof(struct sr_meta_opt_invariant));
681 		om++;
682 	}
683 
684 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
685 		src = sd->sd_vol.sv_chunks[i];
686 
687 		/* skip disks that are offline */
688 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
689 			continue;
690 
691 		/* calculate metadata checksum for correct chunk */
692 		m->ssdi.ssd_chunk_id = i;
693 		sr_checksum(sc, m, &m->ssd_checksum,
694 		    sizeof(struct sr_meta_invariant));
695 
696 #ifdef SR_DEBUG
697 		DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
698 		    "chunkid: %d checksum: ",
699 		    DEVNAME(sc), src->src_meta.scmi.scm_devname,
700 		    m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
701 
702 		if (sr_debug & SR_D_META)
703 			sr_checksum_print((u_int8_t *)&m->ssd_checksum);
704 		DNPRINTF(SR_D_META, "\n");
705 		sr_meta_print(m);
706 #endif
707 
708 		/* translate and write to disk */
709 		if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
710 			printf("%s: could not write metadata to %s\n",
711 			    DEVNAME(sc), src->src_devname);
712 			/* restart the meta write */
713 			src->src_meta.scm_status = BIOC_SDOFFLINE;
714 			/* XXX recalculate volume status */
715 			goto restart;
716 		}
717 	}
718 
719 	/* not all disciplines have sync */
720 	if (sd->sd_scsi_sync) {
721 		bzero(&wu, sizeof(wu));
722 		wu.swu_fake = 1;
723 		wu.swu_dis = sd;
724 		sd->sd_scsi_sync(&wu);
725 	}
726 	free(m, M_DEVBUF);
727 	return (0);
728 bad:
729 	return (1);
730 }
731 
732 int
733 sr_meta_read(struct sr_discipline *sd)
734 {
735 #ifdef SR_DEBUG
736 	struct sr_softc		*sc = sd->sd_sc;
737 #endif
738 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
739 	struct sr_metadata	*sm;
740 	struct sr_chunk		*ch_entry;
741 	struct sr_meta_chunk	*cp;
742 	struct sr_meta_driver	*s;
743 	struct sr_meta_opt_item *omi;
744 	struct sr_meta_opt	*om;
745 	void			*fm = NULL;
746 	int			i, no_disk = 0, got_meta = 0;
747 
748 	DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
749 
750 	sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
751 	s = &smd[sd->sd_meta_type];
752 	if (sd->sd_meta_type != SR_META_F_NATIVE)
753 		fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO);
754 
755 	cp = (struct sr_meta_chunk *)(sm + 1);
756 	SLIST_FOREACH(ch_entry, cl, src_link) {
757 		/* skip disks that are offline */
758 		if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) {
759 			DNPRINTF(SR_D_META,
760 			    "%s: %s chunk marked offline, spoofing status\n",
761 			    DEVNAME(sc), ch_entry->src_devname);
762 			cp++; /* adjust chunk pointer to match failure */
763 			continue;
764 		} else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
765 			/* read and translate */
766 			/* XXX mark chunk offline, elsewhere!! */
767 			ch_entry->src_meta.scm_status = BIOC_SDOFFLINE;
768 			cp++; /* adjust chunk pointer to match failure */
769 			DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
770 			    DEVNAME(sc));
771 			continue;
772 		}
773 
774 		if (sm->ssdi.ssd_magic != SR_MAGIC) {
775 			DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
776 			    DEVNAME(sc));
777 			continue;
778 		}
779 
780 		/* validate metadata */
781 		if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
782 			DNPRINTF(SR_D_META, "%s: invalid metadata\n",
783 			    DEVNAME(sc));
784 			no_disk = -1;
785 			goto done;
786 		}
787 
788 		/* assume first chunk contains metadata */
789 		if (got_meta == 0) {
790 			bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
791 			got_meta = 1;
792 		}
793 
794 		bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
795 
796 		/* Process optional metadata. */
797 		om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) +
798 		    sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
799 		for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
800 
801 			omi = malloc(sizeof(struct sr_meta_opt_item),
802 			    M_DEVBUF, M_WAITOK | M_ZERO);
803 			bcopy(om, &omi->omi_om, sizeof(struct sr_meta_opt));
804 			SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link);
805 
806 			/* See if discipline wants to handle it. */
807 			if (sd->sd_meta_opt_load &&
808 			    sd->sd_meta_opt_load(sd, &omi->omi_om) == 0)
809 				continue;
810 			else
811 				sr_meta_opt_load(sd, &omi->omi_om);
812 
813 			om++;
814 		}
815 
816 		cp++;
817 		no_disk++;
818 	}
819 
820 	free(sm, M_DEVBUF);
821 	if (fm)
822 		free(fm, M_DEVBUF);
823 
824 done:
825 	DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
826 	    no_disk);
827 	return (no_disk);
828 }
829 
830 int
831 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
832     void *fm)
833 {
834 	struct sr_softc		*sc = sd->sd_sc;
835 	struct sr_meta_driver	*s;
836 #ifdef SR_DEBUG
837 	struct sr_meta_chunk	*mc;
838 #endif
839 	char			devname[32];
840 	int			rv = 1;
841 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
842 
843 	DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
844 
845 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
846 
847 	s = &smd[sd->sd_meta_type];
848 	if (sd->sd_meta_type != SR_META_F_NATIVE)
849 		if (s->smd_validate(sd, sm, fm)) {
850 			printf("%s: invalid foreign metadata\n", DEVNAME(sc));
851 			goto done;
852 		}
853 
854 	/*
855 	 * at this point all foreign metadata has been translated to the native
856 	 * format and will be treated just like the native format
857 	 */
858 
859 	if (sm->ssdi.ssd_magic != SR_MAGIC) {
860 		printf("%s: not valid softraid metadata\n", DEVNAME(sc));
861 		goto done;
862 	}
863 
864 	/* Verify metadata checksum. */
865 	sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
866 	if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
867 		printf("%s: invalid metadata checksum\n", DEVNAME(sc));
868 		goto done;
869 	}
870 
871 	/* Handle changes between versions. */
872 	if (sm->ssdi.ssd_version == 3) {
873 
874 		/*
875 		 * Version 3 - update metadata version and fix up data offset
876 		 * value since this did not exist in version 3.
877 		 */
878 		sm->ssdi.ssd_version = SR_META_VERSION;
879 		snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
880 		    "%03d", SR_META_VERSION);
881 		if (sm->ssd_data_offset == 0)
882 			sm->ssd_data_offset = SR_META_V3_DATA_OFFSET;
883 
884 	} else if (sm->ssdi.ssd_version == SR_META_VERSION) {
885 
886 		/*
887 		 * Version 4 - original metadata format did not store
888 		 * data offset so fix this up if necessary.
889 		 */
890 		if (sm->ssd_data_offset == 0)
891 			sm->ssd_data_offset = SR_DATA_OFFSET;
892 
893 	} else {
894 
895 		printf("%s: %s can not read metadata version %u, expected %u\n",
896 		    DEVNAME(sc), devname, sm->ssdi.ssd_version,
897 		    SR_META_VERSION);
898 		goto done;
899 
900 	}
901 
902 #ifdef SR_DEBUG
903 	/* warn if disk changed order */
904 	mc = (struct sr_meta_chunk *)(sm + 1);
905 	if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
906 	    sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
907 		DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n",
908 		    DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname,
909 		    devname);
910 #endif
911 
912 	/* we have meta data on disk */
913 	DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
914 	    DEVNAME(sc), devname);
915 
916 	rv = 0;
917 done:
918 	return (rv);
919 }
920 
921 int
922 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
923     struct sr_metadata_list_head *mlh)
924 {
925 	struct vnode		*vn;
926 	struct disklabel	label;
927 	struct sr_metadata	*md = NULL;
928 	struct sr_discipline	*fake_sd = NULL;
929 	struct sr_metadata_list *mle;
930 	char			devname[32];
931 	dev_t			chrdev, rawdev;
932 	int			error, i;
933 	int			rv = SR_META_NOTCLAIMED;
934 
935 	DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
936 
937 	/*
938 	 * Use character raw device to avoid SCSI complaints about missing
939 	 * media on removable media devices.
940 	 */
941 	chrdev = blktochr(devno);
942 	rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART);
943 	if (cdevvp(rawdev, &vn)) {
944 		printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n",
945 		    DEVNAME(sc));
946 		goto done;
947 	}
948 
949 	/* open device */
950 	error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
951 	if (error) {
952 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
953 		    "failed\n", DEVNAME(sc));
954 		vput(vn);
955 		goto done;
956 	}
957 
958 	/* get disklabel */
959 	error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED,
960 	    curproc);
961 	if (error) {
962 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
963 		    "failed\n", DEVNAME(sc));
964 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
965 		vput(vn);
966 		goto done;
967 	}
968 
969 	/* we are done, close device */
970 	error = VOP_CLOSE(vn, FREAD, NOCRED, curproc);
971 	if (error) {
972 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
973 		    "failed\n", DEVNAME(sc));
974 		vput(vn);
975 		goto done;
976 	}
977 	vput(vn);
978 
979 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
980 	if (md == NULL) {
981 		printf("%s: not enough memory for metadata buffer\n",
982 		    DEVNAME(sc));
983 		goto done;
984 	}
985 
986 	/* create fake sd to use utility functions */
987 	fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF,
988 	    M_ZERO | M_NOWAIT);
989 	if (fake_sd == NULL) {
990 		printf("%s: not enough memory for fake discipline\n",
991 		    DEVNAME(sc));
992 		goto done;
993 	}
994 	fake_sd->sd_sc = sc;
995 	fake_sd->sd_meta_type = SR_META_F_NATIVE;
996 
997 	for (i = 0; i < MAXPARTITIONS; i++) {
998 		if (label.d_partitions[i].p_fstype != FS_RAID)
999 			continue;
1000 
1001 		/* open partition */
1002 		rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i);
1003 		if (bdevvp(rawdev, &vn)) {
1004 			printf("%s:, sr_meta_native_bootprobe: can't allocate "
1005 			    "vnode for partition\n", DEVNAME(sc));
1006 			goto done;
1007 		}
1008 		error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
1009 		if (error) {
1010 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
1011 			    "open failed, partition %d\n",
1012 			    DEVNAME(sc), i);
1013 			vput(vn);
1014 			continue;
1015 		}
1016 
1017 		if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) {
1018 			printf("%s: native bootprobe could not read native "
1019 			    "metadata\n", DEVNAME(sc));
1020 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1021 			vput(vn);
1022 			continue;
1023 		}
1024 
1025 		/* are we a softraid partition? */
1026 		if (md->ssdi.ssd_magic != SR_MAGIC) {
1027 			VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1028 			vput(vn);
1029 			continue;
1030 		}
1031 
1032 		sr_meta_getdevname(sc, rawdev, devname, sizeof(devname));
1033 		if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) {
1034 			if (md->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE) {
1035 				DNPRINTF(SR_D_META, "%s: don't save %s\n",
1036 				    DEVNAME(sc), devname);
1037 			} else {
1038 				/* XXX fix M_WAITOK, this is boot time */
1039 				mle = malloc(sizeof(*mle), M_DEVBUF,
1040 				    M_WAITOK | M_ZERO);
1041 				bcopy(md, &mle->sml_metadata,
1042 				    SR_META_SIZE * 512);
1043 				mle->sml_mm = rawdev;
1044 				SLIST_INSERT_HEAD(mlh, mle, sml_link);
1045 				rv = SR_META_CLAIMED;
1046 			}
1047 		}
1048 
1049 		/* we are done, close partition */
1050 		VOP_CLOSE(vn, FREAD, NOCRED, curproc);
1051 		vput(vn);
1052 	}
1053 
1054 done:
1055 	if (fake_sd)
1056 		free(fake_sd, M_DEVBUF);
1057 	if (md)
1058 		free(md, M_DEVBUF);
1059 
1060 	return (rv);
1061 }
1062 
1063 int
1064 sr_boot_assembly(struct sr_softc *sc)
1065 {
1066 	struct disk		*dk;
1067 	struct sr_disk_head	sdklist;
1068 	struct sr_disk		*sdk;
1069 	struct bioc_createraid	bc;
1070 	struct sr_metadata_list_head mlh, kdh;
1071 	struct sr_metadata_list *mle, *mlenext, *mle1, *mle2;
1072 	struct sr_metadata	*metadata;
1073 	struct sr_boot_volume_head bvh;
1074 	struct sr_boot_volume	*vol, *vp1, *vp2;
1075 	struct sr_meta_chunk	*hm;
1076 	struct sr_chunk_head	*cl;
1077 	struct sr_chunk		*hotspare, *chunk, *last;
1078 	u_int32_t		chunk_id;
1079 	u_int64_t		*ondisk = NULL;
1080 	dev_t			*devs = NULL;
1081 	char			devname[32];
1082 	int			rv = 0, i;
1083 
1084 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
1085 
1086 	SLIST_INIT(&sdklist);
1087 	SLIST_INIT(&mlh);
1088 
1089 	dk = TAILQ_FIRST(&disklist);
1090 	while (dk != TAILQ_END(&disklist)) {
1091 
1092 		/* See if this disk has been checked. */
1093 		SLIST_FOREACH(sdk, &sdklist, sdk_link)
1094 			if (sdk->sdk_devno == dk->dk_devno)
1095 				break;
1096 
1097 		if (sdk != NULL) {
1098 			dk = TAILQ_NEXT(dk, dk_link);
1099 			continue;
1100 		}
1101 
1102 		/* Add this disk to the list that we've checked. */
1103 		sdk = malloc(sizeof(struct sr_disk), M_DEVBUF,
1104 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1105 		if (sdk == NULL)
1106 			goto unwind;
1107 		sdk->sdk_devno = dk->dk_devno;
1108 		SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link);
1109 
1110 		/* Only check sd(4) and wd(4) devices. */
1111 		if (strncmp(dk->dk_name, "sd", 2) &&
1112 		    strncmp(dk->dk_name, "wd", 2)) {
1113 			dk = TAILQ_NEXT(dk, dk_link);
1114 			continue;
1115 		}
1116 
1117 		/* native softraid uses partitions */
1118 		sr_meta_native_bootprobe(sc, dk->dk_devno, &mlh);
1119 
1120 		/* probe non-native disks if native failed. */
1121 
1122 		/* Restart scan since we may have slept. */
1123 		dk = TAILQ_FIRST(&disklist);
1124 	}
1125 
1126 	/*
1127 	 * Create a list of volumes and associate chunks with each volume.
1128 	 */
1129 
1130 	SLIST_INIT(&bvh);
1131 	SLIST_INIT(&kdh);
1132 
1133 	for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) {
1134 
1135 		mlenext = SLIST_NEXT(mle, sml_link);
1136 		SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link);
1137 
1138 		metadata = (struct sr_metadata *)&mle->sml_metadata;
1139 		mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id;
1140 
1141 		/* Handle key disks separately. */
1142 		if (metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) {
1143 			SLIST_INSERT_HEAD(&kdh, mle, sml_link);
1144 			continue;
1145 		}
1146 
1147 		SLIST_FOREACH(vol, &bvh, sbv_link) {
1148 			if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid,
1149 			    sizeof(metadata->ssdi.ssd_uuid)) == 0)
1150 				break;
1151 		}
1152 
1153 		if (vol == NULL) {
1154 			vol = malloc(sizeof(struct sr_boot_volume),
1155 			    M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO);
1156 			if (vol == NULL) {
1157 				printf("%s: failed to allocate boot volume!\n",
1158 				    DEVNAME(sc));
1159 				goto unwind;
1160 			}
1161 
1162 			vol->sbv_level = metadata->ssdi.ssd_level;
1163 			vol->sbv_volid = metadata->ssdi.ssd_volid;
1164 			vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no;
1165 			bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid,
1166 			    sizeof(metadata->ssdi.ssd_uuid));
1167 			SLIST_INIT(&vol->sml);
1168 
1169 			/* Maintain volume order. */
1170 			vp2 = NULL;
1171 			SLIST_FOREACH(vp1, &bvh, sbv_link) {
1172 				if (vp1->sbv_volid > vol->sbv_volid)
1173 					break;
1174 				vp2 = vp1;
1175 			}
1176 			if (vp2 == NULL) {
1177 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1178 				    "at head\n", DEVNAME(sc), vol->sbv_volid);
1179 				SLIST_INSERT_HEAD(&bvh, vol, sbv_link);
1180 			} else {
1181 				DNPRINTF(SR_D_META, "%s: insert volume %u "
1182 				    "after %u\n", DEVNAME(sc), vol->sbv_volid,
1183 				    vp2->sbv_volid);
1184 				SLIST_INSERT_AFTER(vp2, vol, sbv_link);
1185 			}
1186 		}
1187 
1188 		/* Maintain chunk order. */
1189 		mle2 = NULL;
1190 		SLIST_FOREACH(mle1, &vol->sml, sml_link) {
1191 			if (mle1->sml_chunk_id > mle->sml_chunk_id)
1192 				break;
1193 			mle2 = mle1;
1194 		}
1195 		if (mle2 == NULL) {
1196 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1197 			    "at head\n", DEVNAME(sc), vol->sbv_volid,
1198 			    mle->sml_chunk_id);
1199 			SLIST_INSERT_HEAD(&vol->sml, mle, sml_link);
1200 		} else {
1201 			DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
1202 			    "after %u\n", DEVNAME(sc), vol->sbv_volid,
1203 			    mle->sml_chunk_id, mle2->sml_chunk_id);
1204 			SLIST_INSERT_AFTER(mle2, mle, sml_link);
1205 		}
1206 
1207 		vol->sbv_dev_no++;
1208 	}
1209 
1210 	/* Allocate memory for device and ondisk version arrays. */
1211 	devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF,
1212 	    M_NOWAIT | M_CANFAIL);
1213 	if (devs == NULL) {
1214 		printf("%s: failed to allocate device array\n", DEVNAME(sc));
1215 		goto unwind;
1216 	}
1217 	ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF,
1218 	    M_NOWAIT | M_CANFAIL);
1219 	if (ondisk == NULL) {
1220 		printf("%s: failed to allocate ondisk array\n", DEVNAME(sc));
1221 		goto unwind;
1222 	}
1223 
1224 	/*
1225 	 * Assemble hotspare "volumes".
1226 	 */
1227 	SLIST_FOREACH(vol, &bvh, sbv_link) {
1228 
1229 		/* Check if this is a hotspare "volume". */
1230 		if (vol->sbv_level != SR_HOTSPARE_LEVEL ||
1231 		    vol->sbv_chunk_no != 1)
1232 			continue;
1233 
1234 #ifdef SR_DEBUG
1235 		DNPRINTF(SR_D_META, "%s: assembling hotspare volume ",
1236 		    DEVNAME(sc));
1237 		if (sr_debug & SR_D_META)
1238 			sr_uuid_print(&vol->sbv_uuid, 0);
1239 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1240 		    vol->sbv_volid, vol->sbv_chunk_no);
1241 #endif
1242 
1243 		/* Create hotspare chunk metadata. */
1244 		hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF,
1245 		    M_NOWAIT | M_CANFAIL | M_ZERO);
1246 		if (hotspare == NULL) {
1247 			printf("%s: failed to allocate hotspare\n",
1248 			    DEVNAME(sc));
1249 			goto unwind;
1250 		}
1251 
1252 		mle = SLIST_FIRST(&vol->sml);
1253 		sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname));
1254 		hotspare->src_dev_mm = mle->sml_mm;
1255 		strlcpy(hotspare->src_devname, devname,
1256 		    sizeof(hotspare->src_devname));
1257 		hotspare->src_size = metadata->ssdi.ssd_size;
1258 
1259 		hm = &hotspare->src_meta;
1260 		hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
1261 		hm->scmi.scm_chunk_id = 0;
1262 		hm->scmi.scm_size = metadata->ssdi.ssd_size;
1263 		hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size;
1264 		strlcpy(hm->scmi.scm_devname, devname,
1265 		    sizeof(hm->scmi.scm_devname));
1266 		bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid,
1267 		    sizeof(struct sr_uuid));
1268 
1269 		sr_checksum(sc, hm, &hm->scm_checksum,
1270 		    sizeof(struct sr_meta_chunk_invariant));
1271 
1272 		hm->scm_status = BIOC_SDHOTSPARE;
1273 
1274 		/* Add chunk to hotspare list. */
1275 		rw_enter_write(&sc->sc_hs_lock);
1276 		cl = &sc->sc_hotspare_list;
1277 		if (SLIST_EMPTY(cl))
1278 			SLIST_INSERT_HEAD(cl, hotspare, src_link);
1279 		else {
1280 			SLIST_FOREACH(chunk, cl, src_link)
1281 				last = chunk;
1282 			SLIST_INSERT_AFTER(last, hotspare, src_link);
1283 		}
1284 		sc->sc_hotspare_no++;
1285 		rw_exit_write(&sc->sc_hs_lock);
1286 
1287 	}
1288 
1289 	/*
1290 	 * Assemble RAID volumes.
1291 	 */
1292 	SLIST_FOREACH(vol, &bvh, sbv_link) {
1293 
1294 		bzero(&bc, sizeof(bc));
1295 
1296 		/* Check if this is a hotspare "volume". */
1297 		if (vol->sbv_level == SR_HOTSPARE_LEVEL &&
1298 		    vol->sbv_chunk_no == 1)
1299 			continue;
1300 
1301 #ifdef SR_DEBUG
1302 		DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc));
1303 		if (sr_debug & SR_D_META)
1304 			sr_uuid_print(&vol->sbv_uuid, 0);
1305 		DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
1306 		    vol->sbv_volid, vol->sbv_chunk_no);
1307 #endif
1308 
1309 		/*
1310 		 * If this is a crypto volume, try to find a matching
1311 		 * key disk...
1312 		 */
1313 		bc.bc_key_disk = NODEV;
1314 		if (vol->sbv_level == 'C') {
1315 			SLIST_FOREACH(mle, &kdh, sml_link) {
1316 				metadata =
1317 				    (struct sr_metadata *)&mle->sml_metadata;
1318 				if (bcmp(&metadata->ssdi.ssd_uuid,
1319 				    &vol->sbv_uuid,
1320 				    sizeof(metadata->ssdi.ssd_uuid)) == 0) {
1321 					bc.bc_key_disk = mle->sml_mm;
1322 				}
1323 			}
1324 		}
1325 
1326 		for (i = 0; i < BIOC_CRMAXLEN; i++) {
1327 			devs[i] = NODEV; /* mark device as illegal */
1328 			ondisk[i] = 0;
1329 		}
1330 
1331 		SLIST_FOREACH(mle, &vol->sml, sml_link) {
1332 			metadata = (struct sr_metadata *)&mle->sml_metadata;
1333 			chunk_id = metadata->ssdi.ssd_chunk_id;
1334 
1335 			if (devs[chunk_id] != NODEV) {
1336 				vol->sbv_dev_no--;
1337 				sr_meta_getdevname(sc, mle->sml_mm, devname,
1338 				    sizeof(devname));
1339 				printf("%s: found duplicate chunk %u for "
1340 				    "volume %u on device %s\n", DEVNAME(sc),
1341 				    chunk_id, vol->sbv_volid, devname);
1342 			}
1343 
1344 			if (devs[chunk_id] == NODEV ||
1345 			    metadata->ssd_ondisk > ondisk[chunk_id]) {
1346 				devs[chunk_id] = mle->sml_mm;
1347 				ondisk[chunk_id] = metadata->ssd_ondisk;
1348 				DNPRINTF(SR_D_META, "%s: using ondisk "
1349 				    "metadata version %llu for chunk %u\n",
1350 				    DEVNAME(sc), ondisk[chunk_id], chunk_id);
1351 			}
1352 		}
1353 
1354 		if (vol->sbv_chunk_no != vol->sbv_dev_no) {
1355 			printf("%s: not all chunks were provided; "
1356 			    "attempting to bring volume %d online\n",
1357 			    DEVNAME(sc), vol->sbv_volid);
1358 		}
1359 
1360 		bc.bc_level = vol->sbv_level;
1361 		bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t);
1362 		bc.bc_dev_list = devs;
1363 		bc.bc_flags = BIOC_SCDEVT;
1364 
1365 		rw_enter_write(&sc->sc_lock);
1366 		sr_ioctl_createraid(sc, &bc, 0);
1367 		rw_exit_write(&sc->sc_lock);
1368 
1369 		rv++;
1370 	}
1371 
1372 	/* done with metadata */
1373 unwind:
1374 	for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) {
1375 		vp2 = SLIST_NEXT(vp1, sbv_link);
1376 		for (mle1 = SLIST_FIRST(&vp1->sml);
1377 		    mle1 != SLIST_END(&vp1->sml); mle1 = mle2) {
1378 			mle2 = SLIST_NEXT(mle1, sml_link);
1379 			free(mle1, M_DEVBUF);
1380 		}
1381 		free(vp1, M_DEVBUF);
1382 	}
1383 	for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
1384 		mle2 = SLIST_NEXT(mle, sml_link);
1385 		free(mle, M_DEVBUF);
1386 	}
1387 	SLIST_INIT(&mlh);
1388 
1389 	while (!SLIST_EMPTY(&sdklist)) {
1390 		sdk = SLIST_FIRST(&sdklist);
1391 		SLIST_REMOVE_HEAD(&sdklist, sdk_link);
1392 		free(sdk, M_DEVBUF);
1393 	}
1394 
1395 	if (devs)
1396 		free(devs, M_DEVBUF);
1397 	if (ondisk)
1398 		free(ondisk, M_DEVBUF);
1399 
1400 	return (rv);
1401 }
1402 
1403 int
1404 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
1405 {
1406 	struct disklabel	label;
1407 	char			*devname;
1408 	int			error, part;
1409 	daddr64_t		size;
1410 
1411 	DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
1412 	   DEVNAME(sc), ch_entry->src_devname);
1413 
1414 	devname = ch_entry->src_devname;
1415 	part = DISKPART(ch_entry->src_dev_mm);
1416 
1417 	/* get disklabel */
1418 	error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD,
1419 	    NOCRED, curproc);
1420 	if (error) {
1421 		DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
1422 		    DEVNAME(sc), devname);
1423 		goto unwind;
1424 	}
1425 
1426 	/* make sure the partition is of the right type */
1427 	if (label.d_partitions[part].p_fstype != FS_RAID) {
1428 		DNPRINTF(SR_D_META,
1429 		    "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc),
1430 		    devname,
1431 		    label.d_partitions[part].p_fstype);
1432 		goto unwind;
1433 	}
1434 
1435 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
1436 	if (size <= 0) {
1437 		DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
1438 		    devname);
1439 		goto unwind;
1440 	}
1441 	ch_entry->src_size = size;
1442 
1443 	DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
1444 	    devname, size);
1445 
1446 	return (SR_META_F_NATIVE);
1447 unwind:
1448 	DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
1449 	    devname ? devname : "nodev");
1450 	return (SR_META_F_INVALID);
1451 }
1452 
1453 int
1454 sr_meta_native_attach(struct sr_discipline *sd, int force)
1455 {
1456 	struct sr_softc		*sc = sd->sd_sc;
1457 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
1458 	struct sr_metadata	*md = NULL;
1459 	struct sr_chunk		*ch_entry, *ch_next;
1460 	struct sr_uuid		uuid;
1461 	u_int64_t		version = 0;
1462 	int			sr, not_sr, rv = 1, d, expected = -1, old_meta = 0;
1463 
1464 	DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
1465 
1466 	md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT);
1467 	if (md == NULL) {
1468 		printf("%s: not enough memory for metadata buffer\n",
1469 		    DEVNAME(sc));
1470 		goto bad;
1471 	}
1472 
1473 	bzero(&uuid, sizeof uuid);
1474 
1475 	sr = not_sr = d = 0;
1476 	SLIST_FOREACH(ch_entry, cl, src_link) {
1477 		if (ch_entry->src_dev_mm == NODEV)
1478 			continue;
1479 
1480 		if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
1481 			printf("%s: could not read native metadata\n",
1482 			    DEVNAME(sc));
1483 			goto bad;
1484 		}
1485 
1486 		if (md->ssdi.ssd_magic == SR_MAGIC) {
1487 			sr++;
1488 			ch_entry->src_meta.scmi.scm_chunk_id =
1489 			    md->ssdi.ssd_chunk_id;
1490 			if (d == 0) {
1491 				bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
1492 				expected = md->ssdi.ssd_chunk_no;
1493 				version = md->ssd_ondisk;
1494 				d++;
1495 				continue;
1496 			} else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
1497 			    sizeof uuid)) {
1498 				printf("%s: not part of the same volume\n",
1499 				    DEVNAME(sc));
1500 				goto bad;
1501 			}
1502 			if (md->ssd_ondisk != version) {
1503 				old_meta++;
1504 				version = MAX(md->ssd_ondisk, version);
1505 			}
1506 		} else
1507 			not_sr++;
1508 	}
1509 
1510 	if (sr && not_sr) {
1511 		printf("%s: not all chunks are of the native metadata format\n",
1512 		     DEVNAME(sc));
1513 		goto bad;
1514 	}
1515 
1516 	/* mixed metadata versions; mark bad disks offline */
1517 	if (old_meta) {
1518 		d = 0;
1519 		for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl);
1520 		    ch_entry = ch_next, d++) {
1521 			ch_next = SLIST_NEXT(ch_entry, src_link);
1522 
1523 			/* XXX do we want to read this again? */
1524 			if (ch_entry->src_dev_mm == NODEV)
1525 				panic("src_dev_mm == NODEV");
1526 			if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md,
1527 			    NULL))
1528 				printf("%s: could not read native metadata\n",
1529 				    DEVNAME(sc));
1530 			if (md->ssd_ondisk != version)
1531 				sd->sd_vol.sv_chunks[d]->src_meta.scm_status =
1532 				    BIOC_SDOFFLINE;
1533 		}
1534 	}
1535 
1536 	if (expected != sr && !force && expected != -1) {
1537 		DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying "
1538 		    "anyway\n", DEVNAME(sc));
1539 	}
1540 
1541 	rv = 0;
1542 bad:
1543 	if (md)
1544 		free(md, M_DEVBUF);
1545 	return (rv);
1546 }
1547 
1548 int
1549 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
1550     struct sr_metadata *md, void *fm)
1551 {
1552 #ifdef SR_DEBUG
1553 	struct sr_softc		*sc = sd->sd_sc;
1554 #endif
1555 	DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
1556 	    DEVNAME(sc), dev, md);
1557 
1558 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1559 	    B_READ));
1560 }
1561 
1562 int
1563 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
1564     struct sr_metadata *md, void *fm)
1565 {
1566 #ifdef SR_DEBUG
1567 	struct sr_softc		*sc = sd->sd_sc;
1568 #endif
1569 	DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
1570 	    DEVNAME(sc), dev, md);
1571 
1572 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1573 	    B_WRITE));
1574 }
1575 
1576 void
1577 sr_hotplug_register(struct sr_discipline *sd, void *func)
1578 {
1579 	struct sr_hotplug_list	*mhe;
1580 
1581 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n",
1582 	    DEVNAME(sd->sd_sc), func);
1583 
1584 	/* make sure we aren't on the list yet */
1585 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1586 		if (mhe->sh_hotplug == func)
1587 			return;
1588 
1589 	mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF,
1590 	    M_WAITOK | M_ZERO);
1591 	mhe->sh_hotplug = func;
1592 	mhe->sh_sd = sd;
1593 	SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link);
1594 }
1595 
1596 void
1597 sr_hotplug_unregister(struct sr_discipline *sd, void *func)
1598 {
1599 	struct sr_hotplug_list	*mhe;
1600 
1601 	DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n",
1602 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func);
1603 
1604 	/* make sure we are on the list yet */
1605 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1606 		if (mhe->sh_hotplug == func) {
1607 			SLIST_REMOVE(&sr_hotplug_callbacks, mhe,
1608 			    sr_hotplug_list, shl_link);
1609 			free(mhe, M_DEVBUF);
1610 			if (SLIST_EMPTY(&sr_hotplug_callbacks))
1611 				SLIST_INIT(&sr_hotplug_callbacks);
1612 			return;
1613 		}
1614 }
1615 
1616 void
1617 sr_disk_attach(struct disk *diskp, int action)
1618 {
1619 	struct sr_hotplug_list	*mhe;
1620 
1621 	SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link)
1622 		if (mhe->sh_sd->sd_ready)
1623 			mhe->sh_hotplug(mhe->sh_sd, diskp, action);
1624 }
1625 
1626 int
1627 sr_match(struct device *parent, void *match, void *aux)
1628 {
1629 	return (1);
1630 }
1631 
1632 void
1633 sr_attach(struct device *parent, struct device *self, void *aux)
1634 {
1635 	struct sr_softc		*sc = (void *)self;
1636 
1637 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
1638 
1639 	rw_init(&sc->sc_lock, "sr_lock");
1640 	rw_init(&sc->sc_hs_lock, "sr_hs_lock");
1641 
1642 	SLIST_INIT(&sr_hotplug_callbacks);
1643 	SLIST_INIT(&sc->sc_hotspare_list);
1644 
1645 #if NBIO > 0
1646 	if (bio_register(&sc->sc_dev, sr_ioctl) != 0)
1647 		printf("%s: controller registration failed", DEVNAME(sc));
1648 	else
1649 		sc->sc_ioctl = sr_ioctl;
1650 #endif /* NBIO > 0 */
1651 
1652 #ifndef SMALL_KERNEL
1653 	strlcpy(sc->sc_sensordev.xname, DEVNAME(sc),
1654 	    sizeof(sc->sc_sensordev.xname));
1655 	sensordev_install(&sc->sc_sensordev);
1656 #endif /* SMALL_KERNEL */
1657 
1658 	printf("\n");
1659 
1660 	softraid_disk_attach = sr_disk_attach;
1661 
1662 	sr_boot_assembly(sc);
1663 }
1664 
1665 int
1666 sr_detach(struct device *self, int flags)
1667 {
1668 #ifndef SMALL_KERNEL
1669 	struct sr_softc		*sc = (void *)self;
1670 
1671 	sensordev_deinstall(&sc->sc_sensordev);
1672 #endif /* SMALL_KERNEL */
1673 
1674 	return (0);
1675 }
1676 
1677 void
1678 sr_minphys(struct buf *bp, struct scsi_link *sl)
1679 {
1680 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
1681 
1682 	/* XXX currently using SR_MAXFER = MAXPHYS */
1683 	if (bp->b_bcount > SR_MAXFER)
1684 		bp->b_bcount = SR_MAXFER;
1685 	minphys(bp);
1686 }
1687 
1688 void
1689 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
1690 {
1691 	size_t			copy_cnt;
1692 
1693 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
1694 	    xs, size);
1695 
1696 	if (xs->datalen) {
1697 		copy_cnt = MIN(size, xs->datalen);
1698 		bcopy(v, xs->data, copy_cnt);
1699 	}
1700 }
1701 
1702 int
1703 sr_ccb_alloc(struct sr_discipline *sd)
1704 {
1705 	struct sr_ccb		*ccb;
1706 	int			i;
1707 
1708 	if (!sd)
1709 		return (1);
1710 
1711 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
1712 
1713 	if (sd->sd_ccb)
1714 		return (1);
1715 
1716 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
1717 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
1718 	TAILQ_INIT(&sd->sd_ccb_freeq);
1719 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
1720 		ccb = &sd->sd_ccb[i];
1721 		ccb->ccb_dis = sd;
1722 		sr_ccb_put(ccb);
1723 	}
1724 
1725 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
1726 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
1727 
1728 	return (0);
1729 }
1730 
1731 void
1732 sr_ccb_free(struct sr_discipline *sd)
1733 {
1734 	struct sr_ccb		*ccb;
1735 
1736 	if (!sd)
1737 		return;
1738 
1739 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
1740 
1741 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
1742 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1743 
1744 	if (sd->sd_ccb)
1745 		free(sd->sd_ccb, M_DEVBUF);
1746 }
1747 
1748 struct sr_ccb *
1749 sr_ccb_get(struct sr_discipline *sd)
1750 {
1751 	struct sr_ccb		*ccb;
1752 	int			s;
1753 
1754 	s = splbio();
1755 
1756 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
1757 	if (ccb) {
1758 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1759 		ccb->ccb_state = SR_CCB_INPROGRESS;
1760 	}
1761 
1762 	splx(s);
1763 
1764 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
1765 	    ccb);
1766 
1767 	return (ccb);
1768 }
1769 
1770 void
1771 sr_ccb_put(struct sr_ccb *ccb)
1772 {
1773 	struct sr_discipline	*sd = ccb->ccb_dis;
1774 	int			s;
1775 
1776 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
1777 	    ccb);
1778 
1779 	s = splbio();
1780 
1781 	ccb->ccb_wu = NULL;
1782 	ccb->ccb_state = SR_CCB_FREE;
1783 	ccb->ccb_target = -1;
1784 	ccb->ccb_opaque = NULL;
1785 
1786 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
1787 
1788 	splx(s);
1789 }
1790 
1791 int
1792 sr_wu_alloc(struct sr_discipline *sd)
1793 {
1794 	struct sr_workunit	*wu;
1795 	int			i, no_wu;
1796 
1797 	if (!sd)
1798 		return (1);
1799 
1800 	DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
1801 	    sd, sd->sd_max_wu);
1802 
1803 	if (sd->sd_wu)
1804 		return (1);
1805 
1806 	no_wu = sd->sd_max_wu;
1807 	sd->sd_wu_pending = no_wu;
1808 
1809 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
1810 	    M_DEVBUF, M_WAITOK | M_ZERO);
1811 	TAILQ_INIT(&sd->sd_wu_freeq);
1812 	TAILQ_INIT(&sd->sd_wu_pendq);
1813 	TAILQ_INIT(&sd->sd_wu_defq);
1814 	for (i = 0; i < no_wu; i++) {
1815 		wu = &sd->sd_wu[i];
1816 		wu->swu_dis = sd;
1817 		sr_wu_put(sd, wu);
1818 	}
1819 
1820 	return (0);
1821 }
1822 
1823 void
1824 sr_wu_free(struct sr_discipline *sd)
1825 {
1826 	struct sr_workunit	*wu;
1827 
1828 	if (!sd)
1829 		return;
1830 
1831 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
1832 
1833 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
1834 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1835 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
1836 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
1837 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
1838 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
1839 
1840 	if (sd->sd_wu)
1841 		free(sd->sd_wu, M_DEVBUF);
1842 }
1843 
1844 void
1845 sr_wu_put(void *xsd, void *xwu)
1846 {
1847 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
1848 	struct sr_workunit	*wu = (struct sr_workunit *)xwu;
1849 	struct sr_ccb		*ccb;
1850 
1851 	int			s;
1852 
1853 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
1854 
1855 	s = splbio();
1856 	if (wu->swu_cb_active == 1)
1857 		panic("%s: sr_wu_put got active wu", DEVNAME(sd->sd_sc));
1858 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1859 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1860 		sr_ccb_put(ccb);
1861 	}
1862 	splx(s);
1863 
1864 	bzero(wu, sizeof(*wu));
1865 	TAILQ_INIT(&wu->swu_ccb);
1866 	wu->swu_dis = sd;
1867 
1868 	mtx_enter(&sd->sd_wu_mtx);
1869 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
1870 	sd->sd_wu_pending--;
1871 	mtx_leave(&sd->sd_wu_mtx);
1872 }
1873 
1874 void *
1875 sr_wu_get(void *xsd)
1876 {
1877 	struct sr_discipline	*sd = (struct sr_discipline *)xsd;
1878 	struct sr_workunit	*wu;
1879 
1880 	mtx_enter(&sd->sd_wu_mtx);
1881 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
1882 	if (wu) {
1883 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1884 		sd->sd_wu_pending++;
1885 	}
1886 	mtx_leave(&sd->sd_wu_mtx);
1887 
1888 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
1889 
1890 	return (wu);
1891 }
1892 
1893 void
1894 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs)
1895 {
1896 	DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs);
1897 
1898 	scsi_done(xs);
1899 }
1900 
1901 void
1902 sr_scsi_cmd(struct scsi_xfer *xs)
1903 {
1904 	int			s;
1905 	struct scsi_link	*link = xs->sc_link;
1906 	struct sr_softc		*sc = link->adapter_softc;
1907 	struct sr_workunit	*wu = NULL;
1908 	struct sr_discipline	*sd;
1909 	struct sr_ccb		*ccb;
1910 
1911 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p "
1912 	    "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags);
1913 
1914 	sd = sc->sc_dis[link->scsibus];
1915 	if (sd == NULL) {
1916 		s = splhigh();
1917 		sd = sc->sc_attach_dis;
1918 		splx(s);
1919 
1920 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n",
1921 		    DEVNAME(sc), sd);
1922 		if (sd == NULL) {
1923 			printf("%s: sr_scsi_cmd NULL discipline\n",
1924 			    DEVNAME(sc));
1925 			goto stuffup;
1926 		}
1927 	}
1928 
1929 	if (sd->sd_deleted) {
1930 		printf("%s: %s device is being deleted, failing io\n",
1931 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
1932 		goto stuffup;
1933 	}
1934 
1935 	wu = xs->io;
1936 	/* scsi layer *can* re-send wu without calling sr_wu_put(). */
1937 	s = splbio();
1938 	if (wu->swu_cb_active == 1)
1939 		panic("%s: sr_scsi_cmd got active wu", DEVNAME(sd->sd_sc));
1940 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1941 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1942 		sr_ccb_put(ccb);
1943 	}
1944 	splx(s);
1945 
1946 	bzero(wu, sizeof(*wu));
1947 	TAILQ_INIT(&wu->swu_ccb);
1948 	wu->swu_state = SR_WU_INPROGRESS;
1949 	wu->swu_dis = sd;
1950 	wu->swu_xs = xs;
1951 
1952 	/* the midlayer will query LUNs so report sense to stop scanning */
1953 	if (link->target != 0 || link->lun != 0) {
1954 		DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n",
1955 		    DEVNAME(sc), link->target, link->lun);
1956 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
1957 		    SSD_ERRCODE_VALID;
1958 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
1959 		sd->sd_scsi_sense.add_sense_code = 0x25;
1960 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
1961 		sd->sd_scsi_sense.extra_len = 4;
1962 		goto stuffup;
1963 	}
1964 
1965 	switch (xs->cmd->opcode) {
1966 	case READ_COMMAND:
1967 	case READ_BIG:
1968 	case READ_16:
1969 	case WRITE_COMMAND:
1970 	case WRITE_BIG:
1971 	case WRITE_16:
1972 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
1973 		    DEVNAME(sc), xs->cmd->opcode);
1974 		if (sd->sd_scsi_rw(wu))
1975 			goto stuffup;
1976 		break;
1977 
1978 	case SYNCHRONIZE_CACHE:
1979 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
1980 		    DEVNAME(sc));
1981 		if (sd->sd_scsi_sync(wu))
1982 			goto stuffup;
1983 		goto complete;
1984 
1985 	case TEST_UNIT_READY:
1986 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
1987 		    DEVNAME(sc));
1988 		if (sd->sd_scsi_tur(wu))
1989 			goto stuffup;
1990 		goto complete;
1991 
1992 	case START_STOP:
1993 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
1994 		    DEVNAME(sc));
1995 		if (sd->sd_scsi_start_stop(wu))
1996 			goto stuffup;
1997 		goto complete;
1998 
1999 	case INQUIRY:
2000 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
2001 		    DEVNAME(sc));
2002 		if (sd->sd_scsi_inquiry(wu))
2003 			goto stuffup;
2004 		goto complete;
2005 
2006 	case READ_CAPACITY:
2007 	case READ_CAPACITY_16:
2008 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
2009 		    DEVNAME(sc), xs->cmd->opcode);
2010 		if (sd->sd_scsi_read_cap(wu))
2011 			goto stuffup;
2012 		goto complete;
2013 
2014 	case REQUEST_SENSE:
2015 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
2016 		    DEVNAME(sc));
2017 		if (sd->sd_scsi_req_sense(wu))
2018 			goto stuffup;
2019 		goto complete;
2020 
2021 	default:
2022 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
2023 		    DEVNAME(sc), xs->cmd->opcode);
2024 		/* XXX might need to add generic function to handle others */
2025 		goto stuffup;
2026 	}
2027 
2028 	return;
2029 stuffup:
2030 	if (sd && sd->sd_scsi_sense.error_code) {
2031 		xs->error = XS_SENSE;
2032 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
2033 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2034 	} else {
2035 		xs->error = XS_DRIVER_STUFFUP;
2036 	}
2037 complete:
2038 	sr_scsi_done(sd, xs);
2039 }
2040 int
2041 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
2042 {
2043 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
2044 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
2045 
2046 	switch (cmd) {
2047 	case DIOCGCACHE:
2048 	case DIOCSCACHE:
2049 		return (EOPNOTSUPP);
2050 	default:
2051 		return (sr_ioctl(link->adapter_softc, cmd, addr));
2052 	}
2053 }
2054 
2055 int
2056 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr)
2057 {
2058 	struct sr_softc		*sc = (struct sr_softc *)dev;
2059 	int			rv = 0;
2060 
2061 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc));
2062 
2063 	rw_enter_write(&sc->sc_lock);
2064 
2065 	switch (cmd) {
2066 	case BIOCINQ:
2067 		DNPRINTF(SR_D_IOCTL, "inq\n");
2068 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
2069 		break;
2070 
2071 	case BIOCVOL:
2072 		DNPRINTF(SR_D_IOCTL, "vol\n");
2073 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
2074 		break;
2075 
2076 	case BIOCDISK:
2077 		DNPRINTF(SR_D_IOCTL, "disk\n");
2078 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
2079 		break;
2080 
2081 	case BIOCALARM:
2082 		DNPRINTF(SR_D_IOCTL, "alarm\n");
2083 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
2084 		break;
2085 
2086 	case BIOCBLINK:
2087 		DNPRINTF(SR_D_IOCTL, "blink\n");
2088 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
2089 		break;
2090 
2091 	case BIOCSETSTATE:
2092 		DNPRINTF(SR_D_IOCTL, "setstate\n");
2093 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
2094 		break;
2095 
2096 	case BIOCCREATERAID:
2097 		DNPRINTF(SR_D_IOCTL, "createraid\n");
2098 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
2099 		break;
2100 
2101 	case BIOCDELETERAID:
2102 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
2103 		break;
2104 
2105 	case BIOCDISCIPLINE:
2106 		rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr);
2107 		break;
2108 
2109 	case BIOCINSTALLBOOT:
2110 		rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr);
2111 		break;
2112 
2113 	default:
2114 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
2115 		rv = ENOTTY;
2116 	}
2117 
2118 	rw_exit_write(&sc->sc_lock);
2119 
2120 	return (rv);
2121 }
2122 
2123 int
2124 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
2125 {
2126 	int			i, vol, disk;
2127 
2128 	for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++)
2129 		/* XXX this will not work when we stagger disciplines */
2130 		if (sc->sc_dis[i]) {
2131 			vol++;
2132 			disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
2133 		}
2134 
2135 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
2136 	bi->bi_novol = vol + sc->sc_hotspare_no;
2137 	bi->bi_nodisk = disk + sc->sc_hotspare_no;
2138 
2139 	return (0);
2140 }
2141 
2142 int
2143 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
2144 {
2145 	int			i, vol, rv = EINVAL;
2146 	struct sr_discipline	*sd;
2147 	struct sr_chunk		*hotspare;
2148 	daddr64_t		rb, sz;
2149 
2150 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2151 		/* XXX this will not work when we stagger disciplines */
2152 		if (sc->sc_dis[i])
2153 			vol++;
2154 		if (vol != bv->bv_volid)
2155 			continue;
2156 
2157 		if (sc->sc_dis[i] == NULL)
2158 			goto done;
2159 
2160 		sd = sc->sc_dis[i];
2161 		bv->bv_status = sd->sd_vol_status;
2162 		bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
2163 		bv->bv_level = sd->sd_meta->ssdi.ssd_level;
2164 		bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
2165 
2166 #ifdef CRYPTO
2167 		if (sd->sd_meta->ssdi.ssd_level == 'C' &&
2168 		    sd->mds.mdd_crypto.key_disk != NULL)
2169 			bv->bv_nodisk++;
2170 #endif
2171 
2172 		if (bv->bv_status == BIOC_SVREBUILD) {
2173 			sz = sd->sd_meta->ssdi.ssd_size;
2174 			rb = sd->sd_meta->ssd_rebuild;
2175 			if (rb > 0)
2176 				bv->bv_percent = 100 -
2177 				    ((sz * 100 - rb * 100) / sz) - 1;
2178 			else
2179 				bv->bv_percent = 0;
2180 		}
2181 		strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
2182 		    sizeof(bv->bv_dev));
2183 		strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
2184 		    sizeof(bv->bv_vendor));
2185 		rv = 0;
2186 		goto done;
2187 	}
2188 
2189 	/* Check hotspares list. */
2190 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2191 		vol++;
2192 		if (vol != bv->bv_volid)
2193 			continue;
2194 
2195 		bv->bv_status = BIOC_SVONLINE;
2196 		bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2197 		bv->bv_level = -1;	/* Hotspare. */
2198 		bv->bv_nodisk = 1;
2199 		strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname,
2200 		    sizeof(bv->bv_dev));
2201 		strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname,
2202 		    sizeof(bv->bv_vendor));
2203 		rv = 0;
2204 		goto done;
2205 	}
2206 
2207 done:
2208 	return (rv);
2209 }
2210 
2211 int
2212 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
2213 {
2214 	int			i, vol, rv = EINVAL, id;
2215 	struct sr_chunk		*src, *hotspare;
2216 
2217 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2218 		/* XXX this will not work when we stagger disciplines */
2219 		if (sc->sc_dis[i])
2220 			vol++;
2221 		if (vol != bd->bd_volid)
2222 			continue;
2223 
2224 		if (sc->sc_dis[i] == NULL)
2225 			goto done;
2226 
2227 		id = bd->bd_diskid;
2228 
2229 		if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
2230 			src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
2231 #ifdef CRYPTO
2232 		else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no &&
2233 		    sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' &&
2234 		    sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL)
2235 			src = sc->sc_dis[i]->mds.mdd_crypto.key_disk;
2236 #endif
2237 		else
2238 			break;
2239 
2240 		bd->bd_status = src->src_meta.scm_status;
2241 		bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
2242 		bd->bd_channel = vol;
2243 		bd->bd_target = id;
2244 		strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
2245 		    sizeof(bd->bd_vendor));
2246 		rv = 0;
2247 		goto done;
2248 	}
2249 
2250 	/* Check hotspares list. */
2251 	SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) {
2252 		vol++;
2253 		if (vol != bd->bd_volid)
2254 			continue;
2255 
2256 		if (bd->bd_diskid != 0)
2257 			break;
2258 
2259 		bd->bd_status = hotspare->src_meta.scm_status;
2260 		bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT;
2261 		bd->bd_channel = vol;
2262 		bd->bd_target = bd->bd_diskid;
2263 		strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname,
2264 		    sizeof(bd->bd_vendor));
2265 		rv = 0;
2266 		goto done;
2267 	}
2268 
2269 done:
2270 	return (rv);
2271 }
2272 
2273 int
2274 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
2275 {
2276 	int			rv = EINVAL;
2277 	int			i, vol, found, c;
2278 	struct sr_discipline	*sd = NULL;
2279 	struct sr_chunk		*ch_entry;
2280 	struct sr_chunk_head	*cl;
2281 
2282 	if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED)
2283 		goto done;
2284 
2285 	if (bs->bs_status == BIOC_SSHOTSPARE) {
2286 		rv = sr_hotspare(sc, (dev_t)bs->bs_other_id);
2287 		goto done;
2288 	}
2289 
2290 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2291 		/* XXX this will not work when we stagger disciplines */
2292 		if (sc->sc_dis[i])
2293 			vol++;
2294 		if (vol != bs->bs_volid)
2295 			continue;
2296 		sd = sc->sc_dis[i];
2297 		break;
2298 	}
2299 	if (sd == NULL)
2300 		goto done;
2301 
2302 	switch (bs->bs_status) {
2303 	case BIOC_SSOFFLINE:
2304 		/* Take chunk offline */
2305 		found = c = 0;
2306 		cl = &sd->sd_vol.sv_chunk_list;
2307 		SLIST_FOREACH(ch_entry, cl, src_link) {
2308 			if (ch_entry->src_dev_mm == bs->bs_other_id) {
2309 				found = 1;
2310 				break;
2311 			}
2312 			c++;
2313 		}
2314 		if (found == 0) {
2315 			printf("%s: chunk not part of array\n", DEVNAME(sc));
2316 			goto done;
2317 		}
2318 
2319 		/* XXX: check current state first */
2320 		sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE);
2321 
2322 		if (sr_meta_save(sd, SR_META_DIRTY)) {
2323 			printf("%s: could not save metadata to %s\n",
2324 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
2325 			goto done;
2326 		}
2327 		rv = 0;
2328 		break;
2329 
2330 	case BIOC_SDSCRUB:
2331 		break;
2332 
2333 	case BIOC_SSREBUILD:
2334 		rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0);
2335 		break;
2336 
2337 	default:
2338 		printf("%s: unsupported state request %d\n",
2339 		    DEVNAME(sc), bs->bs_status);
2340 	}
2341 
2342 done:
2343 	return (rv);
2344 }
2345 
2346 int
2347 sr_chunk_in_use(struct sr_softc *sc, dev_t dev)
2348 {
2349 	struct sr_discipline	*sd;
2350 	struct sr_chunk		*chunk;
2351 	int			i, c;
2352 
2353 	/* See if chunk is already in use. */
2354 	for (i = 0; i < SR_MAXSCSIBUS; i++) {
2355 		if (sc->sc_dis[i] == NULL)
2356 			continue;
2357 		sd = sc->sc_dis[i];
2358 		for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) {
2359 			chunk = sd->sd_vol.sv_chunks[c];
2360 			if (chunk->src_dev_mm == dev)
2361 				return chunk->src_meta.scm_status;
2362 		}
2363 	}
2364 
2365 	/* Check hotspares list. */
2366 	SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link)
2367 		if (chunk->src_dev_mm == dev)
2368 			return chunk->src_meta.scm_status;
2369 
2370 	return BIOC_SDINVALID;
2371 }
2372 
2373 int
2374 sr_hotspare(struct sr_softc *sc, dev_t dev)
2375 {
2376 	struct sr_discipline	*sd = NULL;
2377 	struct sr_metadata	*sm = NULL;
2378 	struct sr_meta_chunk    *hm;
2379 	struct sr_chunk_head	*cl;
2380 	struct sr_chunk		*chunk, *last, *hotspare = NULL;
2381 	struct sr_uuid		uuid;
2382 	struct disklabel	label;
2383 	struct vnode		*vn;
2384 	daddr64_t		size;
2385 	char			devname[32];
2386 	int			rv = EINVAL;
2387 	int			c, part, open = 0;
2388 
2389 	/*
2390 	 * Add device to global hotspares list.
2391 	 */
2392 
2393 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2394 
2395 	/* Make sure chunk is not already in use. */
2396 	c = sr_chunk_in_use(sc, dev);
2397 	if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) {
2398 		if (c == BIOC_SDHOTSPARE)
2399 			printf("%s: %s is already a hotspare\n",
2400 			    DEVNAME(sc), devname);
2401 		else
2402 			printf("%s: %s is already in use\n",
2403 			    DEVNAME(sc), devname);
2404 		goto done;
2405 	}
2406 
2407 	/* XXX - See if there is an existing degraded volume... */
2408 
2409 	/* Open device. */
2410 	if (bdevvp(dev, &vn)) {
2411 		printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc));
2412 		goto done;
2413 	}
2414 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2415 		DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
2416 		    DEVNAME(sc), devname);
2417 		vput(vn);
2418 		goto fail;
2419 	}
2420 	open = 1; /* close dev on error */
2421 
2422 	/* Get partition details. */
2423 	part = DISKPART(dev);
2424 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2425 	    NOCRED, curproc)) {
2426 		DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n",
2427 		    DEVNAME(sc));
2428 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2429 		vput(vn);
2430 		goto fail;
2431 	}
2432 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2433 		printf("%s: %s partition not of type RAID (%d)\n",
2434 		    DEVNAME(sc), devname,
2435 		    label.d_partitions[part].p_fstype);
2436 		goto fail;
2437 	}
2438 
2439 	/* Calculate partition size. */
2440 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2441 
2442 	/*
2443 	 * Create and populate chunk metadata.
2444 	 */
2445 
2446 	sr_uuid_get(&uuid);
2447 	hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO);
2448 
2449 	hotspare->src_dev_mm = dev;
2450 	hotspare->src_vn = vn;
2451 	strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname));
2452 	hotspare->src_size = size;
2453 
2454 	hm = &hotspare->src_meta;
2455 	hm->scmi.scm_volid = SR_HOTSPARE_VOLID;
2456 	hm->scmi.scm_chunk_id = 0;
2457 	hm->scmi.scm_size = size;
2458 	hm->scmi.scm_coerced_size = size;
2459 	strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname));
2460 	bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid));
2461 
2462 	sr_checksum(sc, hm, &hm->scm_checksum,
2463 	    sizeof(struct sr_meta_chunk_invariant));
2464 
2465 	hm->scm_status = BIOC_SDHOTSPARE;
2466 
2467 	/*
2468 	 * Create and populate our own discipline and metadata.
2469 	 */
2470 
2471 	sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO);
2472 	sm->ssdi.ssd_magic = SR_MAGIC;
2473 	sm->ssdi.ssd_version = SR_META_VERSION;
2474 	sm->ssd_ondisk = 0;
2475 	sm->ssdi.ssd_vol_flags = 0;
2476 	bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid));
2477 	sm->ssdi.ssd_chunk_no = 1;
2478 	sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID;
2479 	sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL;
2480 	sm->ssdi.ssd_size = size;
2481 	strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor));
2482 	snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product),
2483 	    "SR %s", "HOTSPARE");
2484 	snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision),
2485 	    "%03d", SR_META_VERSION);
2486 
2487 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
2488 	sd->sd_sc = sc;
2489 	sd->sd_meta = sm;
2490 	sd->sd_meta_type = SR_META_F_NATIVE;
2491 	sd->sd_vol_status = BIOC_SVONLINE;
2492 	strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name));
2493 	SLIST_INIT(&sd->sd_meta_opt);
2494 
2495 	/* Add chunk to volume. */
2496 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF,
2497 	    M_WAITOK | M_ZERO);
2498 	sd->sd_vol.sv_chunks[0] = hotspare;
2499 	SLIST_INIT(&sd->sd_vol.sv_chunk_list);
2500 	SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link);
2501 
2502 	/* Save metadata. */
2503 	if (sr_meta_save(sd, SR_META_DIRTY)) {
2504 		printf("%s: could not save metadata to %s\n",
2505 		    DEVNAME(sc), devname);
2506 		goto fail;
2507 	}
2508 
2509 	/*
2510 	 * Add chunk to hotspare list.
2511 	 */
2512 	rw_enter_write(&sc->sc_hs_lock);
2513 	cl = &sc->sc_hotspare_list;
2514 	if (SLIST_EMPTY(cl))
2515 		SLIST_INSERT_HEAD(cl, hotspare, src_link);
2516 	else {
2517 		SLIST_FOREACH(chunk, cl, src_link)
2518 			last = chunk;
2519 		SLIST_INSERT_AFTER(last, hotspare, src_link);
2520 	}
2521 	sc->sc_hotspare_no++;
2522 	rw_exit_write(&sc->sc_hs_lock);
2523 
2524 	rv = 0;
2525 	goto done;
2526 
2527 fail:
2528 	if (hotspare)
2529 		free(hotspare, M_DEVBUF);
2530 
2531 done:
2532 	if (sd && sd->sd_vol.sv_chunks)
2533 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
2534 	if (sd)
2535 		free(sd, M_DEVBUF);
2536 	if (sm)
2537 		free(sm, M_DEVBUF);
2538 	if (open) {
2539 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2540 		vput(vn);
2541 	}
2542 
2543 	return (rv);
2544 }
2545 
2546 void
2547 sr_hotspare_rebuild_callback(void *arg1, void *arg2)
2548 {
2549 	sr_hotspare_rebuild((struct sr_discipline *)arg1);
2550 }
2551 
2552 void
2553 sr_hotspare_rebuild(struct sr_discipline *sd)
2554 {
2555 	struct sr_chunk_head	*cl;
2556 	struct sr_chunk		*hotspare, *chunk = NULL;
2557 	struct sr_workunit	*wu;
2558 	struct sr_ccb           *ccb;
2559 	int			i, s, chunk_no, busy;
2560 
2561 	/*
2562 	 * Attempt to locate a hotspare and initiate rebuild.
2563 	 */
2564 
2565 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
2566 		if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
2567 		    BIOC_SDOFFLINE) {
2568 			chunk_no = i;
2569 			chunk = sd->sd_vol.sv_chunks[i];
2570 			break;
2571 		}
2572 	}
2573 
2574 	if (chunk == NULL) {
2575 		printf("%s: no offline chunk found on %s!\n",
2576 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
2577 		return;
2578 	}
2579 
2580 	/* See if we have a suitable hotspare... */
2581 	rw_enter_write(&sd->sd_sc->sc_hs_lock);
2582 	cl = &sd->sd_sc->sc_hotspare_list;
2583 	SLIST_FOREACH(hotspare, cl, src_link)
2584 		if (hotspare->src_size >= chunk->src_size)
2585 			break;
2586 
2587 	if (hotspare != NULL) {
2588 
2589 		printf("%s: %s volume degraded, will attempt to "
2590 		    "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc),
2591 		    sd->sd_meta->ssd_devname, hotspare->src_devname);
2592 
2593 		/*
2594 		 * Ensure that all pending I/O completes on the failed chunk
2595 		 * before trying to initiate a rebuild.
2596 		 */
2597 		i = 0;
2598 		do {
2599 			busy = 0;
2600 
2601 			s = splbio();
2602 			if (wu->swu_cb_active == 1)
2603 				panic("%s: sr_hotspare_rebuild",
2604 				    DEVNAME(sd->sd_sc));
2605 			TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) {
2606 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2607 					if (ccb->ccb_target == chunk_no)
2608 						busy = 1;
2609 				}
2610 			}
2611 			TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) {
2612 				TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2613 					if (ccb->ccb_target == chunk_no)
2614 						busy = 1;
2615 				}
2616 			}
2617 			splx(s);
2618 
2619 			if (busy) {
2620 				tsleep(sd, PRIBIO, "sr_hotspare", hz);
2621 				i++;
2622 			}
2623 
2624 		} while (busy && i < 120);
2625 
2626 		DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to "
2627 		    "complete on failed chunk %s\n", DEVNAME(sd->sd_sc),
2628 		    i, chunk->src_devname);
2629 
2630 		if (busy) {
2631 			printf("%s: pending I/O failed to complete on "
2632 			    "failed chunk %s, hotspare rebuild aborted...\n",
2633 			    DEVNAME(sd->sd_sc), chunk->src_devname);
2634 			goto done;
2635 		}
2636 
2637 		s = splbio();
2638 		rw_enter_write(&sd->sd_sc->sc_lock);
2639 		if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) {
2640 
2641 			/* Remove hotspare from available list. */
2642 			sd->sd_sc->sc_hotspare_no--;
2643 			SLIST_REMOVE(cl, hotspare, sr_chunk, src_link);
2644 			free(hotspare, M_DEVBUF);
2645 
2646 		}
2647 		rw_exit_write(&sd->sd_sc->sc_lock);
2648 		splx(s);
2649 	}
2650 done:
2651 	rw_exit_write(&sd->sd_sc->sc_hs_lock);
2652 }
2653 
2654 int
2655 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare)
2656 {
2657 	struct sr_softc		*sc = sd->sd_sc;
2658 	int			rv = EINVAL, part;
2659 	int			c, found, open = 0;
2660 	char			devname[32];
2661 	struct vnode		*vn;
2662 	daddr64_t		size, csize;
2663 	struct disklabel	label;
2664 	struct sr_meta_chunk	*old, *new;
2665 
2666 	/*
2667 	 * Attempt to initiate a rebuild onto the specified device.
2668 	 */
2669 
2670 	if (!(sd->sd_capabilities & SR_CAP_REBUILD)) {
2671 		printf("%s: discipline does not support rebuild\n",
2672 		    DEVNAME(sc));
2673 		goto done;
2674 	}
2675 
2676 	/* make sure volume is in the right state */
2677 	if (sd->sd_vol_status == BIOC_SVREBUILD) {
2678 		printf("%s: rebuild already in progress\n", DEVNAME(sc));
2679 		goto done;
2680 	}
2681 	if (sd->sd_vol_status != BIOC_SVDEGRADED) {
2682 		printf("%s: %s not degraded\n", DEVNAME(sc),
2683 		    sd->sd_meta->ssd_devname);
2684 		goto done;
2685 	}
2686 
2687 	/* find offline chunk */
2688 	for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
2689 		if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
2690 		    BIOC_SDOFFLINE) {
2691 			found = c;
2692 			new = &sd->sd_vol.sv_chunks[c]->src_meta;
2693 			if (c > 0)
2694 				break; /* roll at least once over the for */
2695 		} else {
2696 			csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size;
2697 			old = &sd->sd_vol.sv_chunks[c]->src_meta;
2698 			if (found != -1)
2699 				break;
2700 		}
2701 	if (found == -1) {
2702 		printf("%s: no offline chunks available for rebuild\n",
2703 		    DEVNAME(sc));
2704 		goto done;
2705 	}
2706 
2707 	/* populate meta entry */
2708 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
2709 	if (bdevvp(dev, &vn)) {
2710 		printf("%s:, sr_rebuild_init: can't allocate vnode\n",
2711 		    DEVNAME(sc));
2712 		goto done;
2713 	}
2714 
2715 	if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
2716 		DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
2717 		    "open %s\n", DEVNAME(sc), devname);
2718 		vput(vn);
2719 		goto done;
2720 	}
2721 	open = 1; /* close dev on error */
2722 
2723 	/* get partition */
2724 	part = DISKPART(dev);
2725 	if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD,
2726 	    NOCRED, curproc)) {
2727 		DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n",
2728 		    DEVNAME(sc));
2729 		goto done;
2730 	}
2731 	if (label.d_partitions[part].p_fstype != FS_RAID) {
2732 		printf("%s: %s partition not of type RAID (%d)\n",
2733 		    DEVNAME(sc), devname,
2734 		    label.d_partitions[part].p_fstype);
2735 		goto done;
2736 	}
2737 
2738 	/* is partition large enough? */
2739 	size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET;
2740 	if (size < csize) {
2741 		printf("%s: partition too small, at least %llu B required\n",
2742 		    DEVNAME(sc), csize << DEV_BSHIFT);
2743 		goto done;
2744 	} else if (size > csize)
2745 		printf("%s: partition too large, wasting %llu B\n",
2746 		    DEVNAME(sc), (size - csize) << DEV_BSHIFT);
2747 
2748 	/* make sure we are not stomping on some other partition */
2749 	c = sr_chunk_in_use(sc, dev);
2750 	if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE &&
2751 	    !(hotspare && c == BIOC_SDHOTSPARE)) {
2752 		printf("%s: %s is already in use\n", DEVNAME(sc), devname);
2753 		goto done;
2754 	}
2755 
2756 	/* Reset rebuild counter since we rebuilding onto a new chunk. */
2757 	sd->sd_meta->ssd_rebuild = 0;
2758 
2759 	/* recreate metadata */
2760 	open = 0; /* leave dev open from here on out */
2761 	sd->sd_vol.sv_chunks[found]->src_dev_mm = dev;
2762 	sd->sd_vol.sv_chunks[found]->src_vn = vn;
2763 	new->scmi.scm_volid = old->scmi.scm_volid;
2764 	new->scmi.scm_chunk_id = found;
2765 	strlcpy(new->scmi.scm_devname, devname,
2766 	    sizeof new->scmi.scm_devname);
2767 	new->scmi.scm_size = size;
2768 	new->scmi.scm_coerced_size = old->scmi.scm_coerced_size;
2769 	bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid,
2770 	    sizeof new->scmi.scm_uuid);
2771 	sr_checksum(sc, new, &new->scm_checksum,
2772 	    sizeof(struct sr_meta_chunk_invariant));
2773 	sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD);
2774 	if (sr_meta_save(sd, SR_META_DIRTY)) {
2775 		printf("%s: could not save metadata to %s\n",
2776 		    DEVNAME(sc), devname);
2777 		open = 1;
2778 		goto done;
2779 	}
2780 
2781 	printf("%s: rebuild of %s started on %s\n", DEVNAME(sc),
2782 	    sd->sd_meta->ssd_devname, devname);
2783 
2784 	sd->sd_reb_abort = 0;
2785 	kthread_create_deferred(sr_rebuild, sd);
2786 
2787 	rv = 0;
2788 done:
2789 	if (open) {
2790 		VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc);
2791 		vput(vn);
2792 	}
2793 
2794 	return (rv);
2795 }
2796 
2797 void
2798 sr_roam_chunks(struct sr_discipline *sd)
2799 {
2800 	struct sr_softc		*sc = sd->sd_sc;
2801 	struct sr_chunk		*chunk;
2802 	struct sr_meta_chunk	*meta;
2803 	int			roamed = 0;
2804 
2805 	/* Have any chunks roamed? */
2806 	SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) {
2807 		meta = &chunk->src_meta;
2808 		if (strncmp(meta->scmi.scm_devname, chunk->src_devname,
2809 		    sizeof(meta->scmi.scm_devname))) {
2810 
2811 			printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
2812 			    meta->scmi.scm_devname, chunk->src_devname);
2813 
2814 			strlcpy(meta->scmi.scm_devname, chunk->src_devname,
2815 			    sizeof(meta->scmi.scm_devname));
2816 
2817 			roamed++;
2818 		}
2819 	}
2820 
2821 	if (roamed)
2822 		sr_meta_save(sd, SR_META_DIRTY);
2823 }
2824 
2825 int
2826 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
2827 {
2828 	dev_t			*dt;
2829 	int			i, s, no_chunk, rv = EINVAL, vol;
2830 	int			no_meta, updatemeta = 0;
2831 	struct sr_chunk_head	*cl;
2832 	struct sr_discipline	*sd = NULL;
2833 	struct sr_chunk		*ch_entry;
2834 	struct device		*dev, *dev2;
2835 	struct scsibus_attach_args saa;
2836 	char			devname[32];
2837 
2838 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
2839 	    DEVNAME(sc), user);
2840 
2841 	/* user input */
2842 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
2843 		goto unwind;
2844 
2845 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
2846 	if (user) {
2847 		if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0)
2848 			goto unwind;
2849 	} else
2850 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
2851 
2852 	/* Initialise discipline. */
2853 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
2854 	sd->sd_sc = sc;
2855 	SLIST_INIT(&sd->sd_meta_opt);
2856 	sd->sd_workq = workq_create("srdis", 1, IPL_BIO);
2857 	if (sd->sd_workq == NULL) {
2858 		printf("%s: could not create workq\n", DEVNAME(sc));
2859 		goto unwind;
2860 	}
2861 	if (sr_discipline_init(sd, bc->bc_level)) {
2862 		printf("%s: could not initialize discipline\n", DEVNAME(sc));
2863 		goto unwind;
2864 	}
2865 
2866 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
2867 	cl = &sd->sd_vol.sv_chunk_list;
2868 	SLIST_INIT(cl);
2869 
2870 	/* Ensure that chunks are not already in use. */
2871 	for (i = 0; i < no_chunk; i++) {
2872 		if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) {
2873 			sr_meta_getdevname(sc, dt[i], devname, sizeof(devname));
2874 			printf("%s: chunk %s already in use\n",
2875 			    DEVNAME(sc), devname);
2876 			goto unwind;
2877 		}
2878 	}
2879 
2880 	sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
2881 	if (sd->sd_meta_type == SR_META_F_INVALID) {
2882 		printf("%s: invalid metadata format\n", DEVNAME(sc));
2883 		goto unwind;
2884 	}
2885 
2886 	if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) {
2887 		printf("%s: can't attach metadata type %d\n", DEVNAME(sc),
2888 		    sd->sd_meta_type);
2889 		goto unwind;
2890 	}
2891 
2892 	/* force the raid volume by clearing metadata region */
2893 	if (bc->bc_flags & BIOC_SCFORCE) {
2894 		/* make sure disk isn't up and running */
2895 		if (sr_meta_read(sd))
2896 			if (sr_already_assembled(sd)) {
2897 				printf("%s: disk ", DEVNAME(sc));
2898 				sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
2899 				printf(" is currently in use; can't force "
2900 				    "create\n");
2901 				goto unwind;
2902 			}
2903 
2904 		if (sr_meta_clear(sd)) {
2905 			printf("%s: failed to clear metadata\n", DEVNAME(sc));
2906 			goto unwind;
2907 		}
2908 	}
2909 
2910 	if ((no_meta = sr_meta_read(sd)) == 0) {
2911 		/* fill out all chunk metadata */
2912 		sr_meta_chunks_create(sc, cl);
2913 		ch_entry = SLIST_FIRST(cl);
2914 
2915 		sd->sd_vol_status = BIOC_SVONLINE;
2916 		sd->sd_meta->ssdi.ssd_level = bc->bc_level;
2917 		sd->sd_meta->ssdi.ssd_chunk_no = no_chunk;
2918 
2919 		/* Make the volume UUID available. */
2920 		bcopy(&ch_entry->src_meta.scmi.scm_uuid,
2921 		    &sd->sd_meta->ssdi.ssd_uuid,
2922 		    sizeof(sd->sd_meta->ssdi.ssd_uuid));
2923 
2924 		if (sd->sd_create) {
2925 			if ((i = sd->sd_create(sd, bc, no_chunk,
2926 			    ch_entry->src_meta.scmi.scm_coerced_size))) {
2927 				rv = i;
2928 				goto unwind;
2929 			}
2930 		}
2931 
2932 		/* fill out all volume metadata */
2933 		DNPRINTF(SR_D_IOCTL,
2934 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
2935 		    DEVNAME(sc), sd->sd_meta->ssdi.ssd_size);
2936 		strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD",
2937 		    sizeof(sd->sd_meta->ssdi.ssd_vendor));
2938 		snprintf(sd->sd_meta->ssdi.ssd_product,
2939 		    sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s",
2940 		    sd->sd_name);
2941 		snprintf(sd->sd_meta->ssdi.ssd_revision,
2942 		    sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d",
2943 		    SR_META_VERSION);
2944 
2945 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
2946 		updatemeta = 1;
2947 	} else if (no_meta == no_chunk) {
2948 		if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
2949 			printf("%s: %s was not shutdown properly\n",
2950 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
2951 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
2952 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
2953 			    "metadata\n", DEVNAME(sc));
2954 			goto unwind;
2955 		}
2956 		if (sr_already_assembled(sd)) {
2957 			printf("%s: disk ", DEVNAME(sc));
2958 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
2959 			printf(" already assembled\n");
2960 			goto unwind;
2961 		}
2962 
2963 		if (sd->sd_assemble) {
2964 			if ((i = sd->sd_assemble(sd, bc, no_chunk))) {
2965 				rv = i;
2966 				goto unwind;
2967 			}
2968 		}
2969 
2970 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
2971 		    DEVNAME(sc));
2972 		updatemeta = 0;
2973 	} else if (no_meta == -1) {
2974 		printf("%s: one of the chunks has corrupt metadata; aborting "
2975 		    "assembly\n", DEVNAME(sc));
2976 		goto unwind;
2977 	} else {
2978 		if (sr_already_assembled(sd)) {
2979 			printf("%s: disk ", DEVNAME(sc));
2980 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
2981 			printf(" already assembled; will not partial "
2982 			    "assemble it\n");
2983 			goto unwind;
2984 		}
2985 
2986 		if (sd->sd_assemble) {
2987 			if ((i = sd->sd_assemble(sd, bc, no_chunk))) {
2988 				rv = i;
2989 				goto unwind;
2990 			}
2991 		}
2992 
2993 		printf("%s: trying to bring up %s degraded\n", DEVNAME(sc),
2994 		    sd->sd_meta->ssd_devname);
2995 	}
2996 
2997 	/* metadata SHALL be fully filled in at this point */
2998 
2999 	/* Make sure that metadata level matches assembly level. */
3000 	if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) {
3001 		printf("%s: volume level does not match metadata level!\n",
3002 		    DEVNAME(sc));
3003 		goto unwind;
3004 	}
3005 
3006 	/* allocate all resources */
3007 	if ((rv = sd->sd_alloc_resources(sd)))
3008 		goto unwind;
3009 
3010 	/* Adjust flags if necessary. */
3011 	if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) &&
3012 	    (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) !=
3013 	    (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) {
3014 		sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE;
3015 		sd->sd_meta->ssdi.ssd_vol_flags |=
3016 		    bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
3017 	}
3018 
3019 	if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) {
3020 		/* set volume status */
3021 		sd->sd_set_vol_state(sd);
3022 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3023 			printf("%s: %s offline, will not be brought online\n",
3024 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
3025 			goto unwind;
3026 		}
3027 
3028 		/* setup scsi midlayer */
3029 		mtx_init(&sd->sd_wu_mtx, IPL_BIO);
3030 		scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put);
3031 		if (sd->sd_openings)
3032 			sd->sd_link.openings = sd->sd_openings(sd);
3033 		else
3034 			sd->sd_link.openings = sd->sd_max_wu;
3035 		sd->sd_link.device_softc = sc;
3036 		sd->sd_link.adapter_softc = sc;
3037 		sd->sd_link.adapter = &sr_switch;
3038 		sd->sd_link.adapter_target = SR_MAX_LD;
3039 		sd->sd_link.adapter_buswidth = 1;
3040 		sd->sd_link.pool = &sd->sd_iopool;
3041 		bzero(&saa, sizeof(saa));
3042 		saa.saa_sc_link = &sd->sd_link;
3043 
3044 		/*
3045 		 * we passed all checks return ENXIO if volume can't be created
3046 		 */
3047 		rv = ENXIO;
3048 
3049 		/* clear sense data */
3050 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3051 
3052 		/* use temporary discipline pointer */
3053 		s = splhigh();
3054 		sc->sc_attach_dis = sd;
3055 		splx(s);
3056 		dev2 = config_found(&sc->sc_dev, &saa, scsiprint);
3057 		s = splhigh();
3058 		sc->sc_attach_dis = NULL;
3059 		splx(s);
3060 		TAILQ_FOREACH(dev, &alldevs, dv_list)
3061 			if (dev->dv_parent == dev2)
3062 				break;
3063 		if (dev == NULL)
3064 			goto unwind;
3065 
3066 		DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n",
3067 		    DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus);
3068 
3069 		sc->sc_dis[sd->sd_link.scsibus] = sd;
3070 		for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++)
3071 			if (sc->sc_dis[i])
3072 				vol++;
3073 		sd->sd_scsibus_dev = dev2;
3074 
3075 		rv = 0;
3076 		if (updatemeta) {
3077 			/* fill out remaining volume metadata */
3078 			sd->sd_meta->ssdi.ssd_volid = vol;
3079 			strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
3080 			    sizeof(sd->sd_meta->ssd_devname));
3081 			sr_meta_init(sd, cl);
3082 		} else {
3083 			if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
3084 			    sizeof(dev->dv_xname))) {
3085 				printf("%s: volume %s is roaming, it used to "
3086 				    "be %s, updating metadata\n",
3087 				    DEVNAME(sc), dev->dv_xname,
3088 				    sd->sd_meta->ssd_devname);
3089 
3090 				sd->sd_meta->ssdi.ssd_volid = vol;
3091 				strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
3092 				    sizeof(sd->sd_meta->ssd_devname));
3093 			}
3094 		}
3095 
3096 		/* Update device name on any chunks which roamed. */
3097 		sr_roam_chunks(sd);
3098 
3099 #ifndef SMALL_KERNEL
3100 		if (sr_sensors_create(sd))
3101 			printf("%s: unable to create sensor for %s\n",
3102 			    DEVNAME(sc), dev->dv_xname);
3103 		else
3104 			sd->sd_vol.sv_sensor_valid = 1;
3105 #endif /* SMALL_KERNEL */
3106 	} else {
3107 		/* we are not an os disk */
3108 		if (updatemeta) {
3109 			/* fill out remaining volume metadata */
3110 			sd->sd_meta->ssdi.ssd_volid = 0;
3111 			strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname,
3112 			    sizeof(sd->sd_meta->ssd_devname));
3113 			sr_meta_init(sd, cl);
3114 		}
3115 		if (sd->sd_start_discipline(sd))
3116 			goto unwind;
3117 	}
3118 
3119 	/* save metadata to disk */
3120 	rv = sr_meta_save(sd, SR_META_DIRTY);
3121 	sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd);
3122 
3123 	if (sd->sd_vol_status == BIOC_SVREBUILD)
3124 		kthread_create_deferred(sr_rebuild, sd);
3125 
3126 	sd->sd_ready = 1;
3127 
3128 	return (rv);
3129 unwind:
3130 	sr_discipline_shutdown(sd);
3131 
3132 	/* XXX - use internal status values! */
3133 	if (rv == EAGAIN)
3134 		rv = 0;
3135 
3136 	return (rv);
3137 }
3138 
3139 int
3140 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
3141 {
3142 	struct sr_discipline	*sd = NULL;
3143 	int			rv = 1;
3144 	int			i;
3145 
3146 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
3147 	    dr->bd_dev);
3148 
3149 	for (i = 0; i < SR_MAXSCSIBUS; i++)
3150 		if (sc->sc_dis[i]) {
3151 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3152 			    dr->bd_dev,
3153 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3154 				sd = sc->sc_dis[i];
3155 				break;
3156 			}
3157 		}
3158 
3159 	if (sd == NULL)
3160 		goto bad;
3161 
3162 	sd->sd_deleted = 1;
3163 	sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE;
3164 	sr_shutdown(sd);
3165 
3166 	rv = 0;
3167 bad:
3168 	return (rv);
3169 }
3170 
3171 int
3172 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd)
3173 {
3174 	struct sr_discipline	*sd = NULL;
3175 	int			i, rv = 1;
3176 
3177 	/* Dispatch a discipline specific ioctl. */
3178 
3179 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc),
3180 	    bd->bd_dev);
3181 
3182 	for (i = 0; i < SR_MAXSCSIBUS; i++)
3183 		if (sc->sc_dis[i]) {
3184 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3185 			    bd->bd_dev,
3186 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3187 				sd = sc->sc_dis[i];
3188 				break;
3189 			}
3190 		}
3191 
3192 	if (sd && sd->sd_ioctl_handler)
3193 		rv = sd->sd_ioctl_handler(sd, bd);
3194 
3195 	return (rv);
3196 }
3197 
3198 int
3199 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb)
3200 {
3201 	void			*bootblk = NULL, *bootldr = NULL;
3202 	struct sr_discipline	*sd = NULL;
3203 	struct sr_chunk		*chunk;
3204 	u_int32_t		bbs, bls;
3205 	int			rv = EINVAL;
3206 	int			i;
3207 
3208 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc),
3209 	    bb->bb_dev);
3210 
3211 	for (i = 0; i < SR_MAXSCSIBUS; i++)
3212 		if (sc->sc_dis[i]) {
3213 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
3214 			    bb->bb_dev,
3215 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
3216 				sd = sc->sc_dis[i];
3217 				break;
3218 			}
3219 		}
3220 
3221 	if (sd == NULL)
3222 		goto done;
3223 
3224 	/* Ensure that boot storage area is large enough. */
3225 	if (sd->sd_meta->ssd_data_offset < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) {
3226 		printf("%s: insufficient boot storage!\n", DEVNAME(sd->sd_sc));
3227 		goto done;
3228 	}
3229 
3230 	if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512)
3231 		goto done;
3232 
3233 	if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512)
3234 		goto done;
3235 
3236 	/* Copy in boot block. */
3237 	bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE;
3238 	bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO);
3239 	if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0)
3240 		goto done;
3241 
3242 	/* Copy in boot loader. */
3243 	bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE;
3244 	bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO);
3245 	if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0)
3246 		goto done;
3247 
3248 	/* Save boot block and boot loader to each chunk. */
3249 	for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
3250 
3251 		chunk = sd->sd_vol.sv_chunks[i];
3252 
3253 		/* Save boot blocks. */
3254 		DNPRINTF(SR_D_IOCTL,
3255 		    "sr_ioctl_installboot: saving boot block to %s "
3256 		    "(%u bytes)\n", chunk->src_devname, bbs);
3257 
3258 		if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs,
3259 		    SR_BOOT_BLOCKS_OFFSET, B_WRITE)) {
3260 			printf("%s: failed to write boot block\n", DEVNAME(sc));
3261 			goto done;
3262 		}
3263 
3264 		/* Save boot loader.*/
3265 		DNPRINTF(SR_D_IOCTL,
3266 		    "sr_ioctl_installboot: saving boot loader to %s "
3267 		    "(%u bytes)\n", chunk->src_devname, bls);
3268 
3269 		if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls,
3270 		    SR_BOOT_LOADER_OFFSET, B_WRITE)) {
3271 			printf("%s: failed to write boot loader\n",
3272 			   DEVNAME(sc));
3273 			goto done;
3274 		}
3275 
3276 	}
3277 
3278 	/* XXX - Install boot block on disk - MD code. */
3279 
3280 	/* Save boot details in metadata. */
3281 	sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE;
3282 
3283 	/* XXX - Store size of boot block/loader in optional metadata. */
3284 
3285 	/* Save metadata. */
3286 	if (sr_meta_save(sd, SR_META_DIRTY)) {
3287 		printf("%s: could not save metadata to %s\n",
3288 		    DEVNAME(sc), chunk->src_devname);
3289 		goto done;
3290 	}
3291 
3292 	rv = 0;
3293 
3294 done:
3295 	if (bootblk)
3296 		free(bootblk, M_DEVBUF);
3297 	if (bootldr)
3298 		free(bootldr, M_DEVBUF);
3299 
3300 	return (rv);
3301 }
3302 
3303 void
3304 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
3305 {
3306 	struct sr_chunk		*ch_entry, *ch_next;
3307 
3308 	DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
3309 
3310 	if (!cl)
3311 		return;
3312 
3313 	for (ch_entry = SLIST_FIRST(cl);
3314 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
3315 		ch_next = SLIST_NEXT(ch_entry, src_link);
3316 
3317 		DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
3318 		    DEVNAME(sc), ch_entry->src_devname);
3319 		if (ch_entry->src_vn) {
3320 			/*
3321 			 * XXX - explicitly lock the vnode until we can resolve
3322 			 * the problem introduced by vnode aliasing... specfs
3323 			 * has no locking, whereas ufs/ffs does!
3324 			 */
3325 			vn_lock(ch_entry->src_vn, LK_EXCLUSIVE |
3326 			    LK_RETRY, curproc);
3327 			VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED,
3328 			    curproc);
3329 			vput(ch_entry->src_vn);
3330 		}
3331 		free(ch_entry, M_DEVBUF);
3332 	}
3333 	SLIST_INIT(cl);
3334 }
3335 
3336 void
3337 sr_discipline_free(struct sr_discipline *sd)
3338 {
3339 	struct sr_softc		*sc;
3340 	struct sr_meta_opt_head *omh;
3341 	struct sr_meta_opt_item	*omi, *omi_next;
3342 	int			i;
3343 
3344 	if (!sd)
3345 		return;
3346 
3347 	sc = sd->sd_sc;
3348 
3349 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
3350 	    DEVNAME(sc),
3351 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3352 	if (sd->sd_free_resources)
3353 		sd->sd_free_resources(sd);
3354 	if (sd->sd_vol.sv_chunks)
3355 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
3356 	if (sd->sd_meta)
3357 		free(sd->sd_meta, M_DEVBUF);
3358 	if (sd->sd_meta_foreign)
3359 		free(sd->sd_meta_foreign, M_DEVBUF);
3360 
3361 	omh = &sd->sd_meta_opt;
3362 	for (omi = SLIST_FIRST(omh); omi != SLIST_END(omh); omi = omi_next) {
3363 		omi_next = SLIST_NEXT(omi, omi_link);
3364 		free(omi, M_DEVBUF);
3365 	}
3366 
3367 	for (i = 0; i < SR_MAXSCSIBUS; i++)
3368 		if (sc->sc_dis[i] == sd) {
3369 			sc->sc_dis[i] = NULL;
3370 			break;
3371 		}
3372 
3373 	explicit_bzero(sd, sizeof *sd);
3374 	free(sd, M_DEVBUF);
3375 }
3376 
3377 void
3378 sr_discipline_shutdown(struct sr_discipline *sd)
3379 {
3380 	struct sr_softc		*sc = sd->sd_sc;
3381 	int			s;
3382 
3383 	if (!sd || !sc)
3384 		return;
3385 
3386 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
3387 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
3388 
3389 	s = splbio();
3390 
3391 	sd->sd_ready = 0;
3392 
3393 	if (sd->sd_shutdownhook)
3394 		shutdownhook_disestablish(sd->sd_shutdownhook);
3395 
3396 	/* make sure there isn't a sync pending and yield */
3397 	wakeup(sd);
3398 	while (sd->sd_sync || sd->sd_must_flush)
3399 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
3400 		    EWOULDBLOCK)
3401 			break;
3402 
3403 #ifndef SMALL_KERNEL
3404 	sr_sensors_delete(sd);
3405 #endif /* SMALL_KERNEL */
3406 
3407 	if (sd->sd_scsibus_dev)
3408 		config_detach(sd->sd_scsibus_dev, DETACH_FORCE);
3409 
3410 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
3411 
3412 	if (sd->sd_workq)
3413 		workq_destroy(sd->sd_workq);
3414 
3415 	if (sd)
3416 		sr_discipline_free(sd);
3417 
3418 	splx(s);
3419 }
3420 
3421 int
3422 sr_discipline_init(struct sr_discipline *sd, int level)
3423 {
3424 	int			rv = 1;
3425 
3426 	switch (level) {
3427 	case 0:
3428 		sr_raid0_discipline_init(sd);
3429 		break;
3430 	case 1:
3431 		sr_raid1_discipline_init(sd);
3432 		break;
3433 	case 4:
3434 		sr_raidp_discipline_init(sd, SR_MD_RAID4);
3435 		break;
3436 	case 5:
3437 		sr_raidp_discipline_init(sd, SR_MD_RAID5);
3438 		break;
3439 	case 6:
3440 		sr_raid6_discipline_init(sd);
3441 		break;
3442 #ifdef AOE
3443 	/* AOE target. */
3444 	case 'A':
3445 		sr_aoe_server_discipline_init(sd);
3446 		break;
3447 	/* AOE initiator. */
3448 	case 'a':
3449 		sr_aoe_discipline_init(sd);
3450 		break;
3451 #endif
3452 #ifdef CRYPTO
3453 	case 'C':
3454 		sr_crypto_discipline_init(sd);
3455 		break;
3456 #endif
3457 	default:
3458 		goto bad;
3459 	}
3460 
3461 	rv = 0;
3462 bad:
3463 	return (rv);
3464 }
3465 
3466 int
3467 sr_raid_inquiry(struct sr_workunit *wu)
3468 {
3469 	struct sr_discipline	*sd = wu->swu_dis;
3470 	struct scsi_xfer	*xs = wu->swu_xs;
3471 	struct scsi_inquiry_data inq;
3472 
3473 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
3474 
3475 	bzero(&inq, sizeof(inq));
3476 	inq.device = T_DIRECT;
3477 	inq.dev_qual2 = 0;
3478 	inq.version = 2;
3479 	inq.response_format = 2;
3480 	inq.additional_length = 32;
3481 	inq.flags |= SID_CmdQue;
3482 	strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
3483 	    sizeof(inq.vendor));
3484 	strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
3485 	    sizeof(inq.product));
3486 	strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
3487 	    sizeof(inq.revision));
3488 	sr_copy_internal_data(xs, &inq, sizeof(inq));
3489 
3490 	return (0);
3491 }
3492 
3493 int
3494 sr_raid_read_cap(struct sr_workunit *wu)
3495 {
3496 	struct sr_discipline	*sd = wu->swu_dis;
3497 	struct scsi_xfer	*xs = wu->swu_xs;
3498 	struct scsi_read_cap_data rcd;
3499 	struct scsi_read_cap_data_16 rcd16;
3500 	daddr64_t		addr;
3501 	int			rv = 1;
3502 
3503 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
3504 
3505 	addr = sd->sd_meta->ssdi.ssd_size - 1;
3506 	if (xs->cmd->opcode == READ_CAPACITY) {
3507 		bzero(&rcd, sizeof(rcd));
3508 		if (addr > 0xffffffffllu)
3509 			_lto4b(0xffffffff, rcd.addr);
3510 		else
3511 			_lto4b(addr, rcd.addr);
3512 		_lto4b(512, rcd.length);
3513 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
3514 		rv = 0;
3515 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
3516 		bzero(&rcd16, sizeof(rcd16));
3517 		_lto8b(addr, rcd16.addr);
3518 		_lto4b(512, rcd16.length);
3519 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
3520 		rv = 0;
3521 	}
3522 
3523 	return (rv);
3524 }
3525 
3526 int
3527 sr_raid_tur(struct sr_workunit *wu)
3528 {
3529 	struct sr_discipline	*sd = wu->swu_dis;
3530 
3531 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
3532 
3533 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3534 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3535 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
3536 		sd->sd_scsi_sense.add_sense_code = 0x04;
3537 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
3538 		sd->sd_scsi_sense.extra_len = 4;
3539 		return (1);
3540 	} else if (sd->sd_vol_status == BIOC_SVINVALID) {
3541 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
3542 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
3543 		sd->sd_scsi_sense.add_sense_code = 0x05;
3544 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
3545 		sd->sd_scsi_sense.extra_len = 4;
3546 		return (1);
3547 	}
3548 
3549 	return (0);
3550 }
3551 
3552 int
3553 sr_raid_request_sense(struct sr_workunit *wu)
3554 {
3555 	struct sr_discipline	*sd = wu->swu_dis;
3556 	struct scsi_xfer	*xs = wu->swu_xs;
3557 
3558 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
3559 	    DEVNAME(sd->sd_sc));
3560 
3561 	/* use latest sense data */
3562 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
3563 
3564 	/* clear sense data */
3565 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
3566 
3567 	return (0);
3568 }
3569 
3570 int
3571 sr_raid_start_stop(struct sr_workunit *wu)
3572 {
3573 	struct scsi_xfer	*xs = wu->swu_xs;
3574 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
3575 
3576 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
3577 	    DEVNAME(sd->sd_sc));
3578 
3579 	if (!ss)
3580 		return (1);
3581 
3582 	/*
3583 	 * do nothing!
3584 	 * a softraid discipline should always reflect correct status
3585 	 */
3586 	return (0);
3587 }
3588 
3589 int
3590 sr_raid_sync(struct sr_workunit *wu)
3591 {
3592 	struct sr_discipline	*sd = wu->swu_dis;
3593 	int			s, rv = 0, ios;
3594 
3595 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
3596 
3597 	/* when doing a fake sync don't count the wu */
3598 	ios = wu->swu_fake ? 0 : 1;
3599 
3600 	s = splbio();
3601 	sd->sd_sync = 1;
3602 
3603 	while (sd->sd_wu_pending > ios)
3604 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
3605 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
3606 			    DEVNAME(sd->sd_sc));
3607 			rv = 1;
3608 			break;
3609 		}
3610 
3611 	sd->sd_sync = 0;
3612 	splx(s);
3613 
3614 	wakeup(&sd->sd_sync);
3615 
3616 	return (rv);
3617 }
3618 
3619 void
3620 sr_startwu_callback(void *arg1, void *arg2)
3621 {
3622 	struct sr_discipline	*sd = arg1;
3623 	struct sr_workunit	*wu = arg2;
3624 	struct sr_ccb		*ccb;
3625 	int			s;
3626 
3627 	s = splbio();
3628 	if (wu->swu_cb_active == 1)
3629 		panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc));
3630 	wu->swu_cb_active = 1;
3631 
3632 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
3633 		VOP_STRATEGY(&ccb->ccb_buf);
3634 
3635 	wu->swu_cb_active = 0;
3636 	splx(s);
3637 }
3638 
3639 void
3640 sr_raid_startwu(struct sr_workunit *wu)
3641 {
3642 	struct sr_discipline	*sd = wu->swu_dis;
3643 
3644 	splassert(IPL_BIO);
3645 
3646 	if (wu->swu_state == SR_WU_RESTART)
3647 		/*
3648 		 * no need to put the wu on the pending queue since we
3649 		 * are restarting the io
3650 		 */
3651 		 ;
3652 	else
3653 		/* move wu to pending queue */
3654 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
3655 
3656 	/* start all individual ios */
3657 	workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback,
3658 	    sd, wu);
3659 }
3660 
3661 void
3662 sr_checksum_print(u_int8_t *md5)
3663 {
3664 	int			i;
3665 
3666 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
3667 		printf("%02x", md5[i]);
3668 }
3669 
3670 void
3671 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
3672 {
3673 	MD5_CTX			ctx;
3674 
3675 	DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
3676 	    md5, len);
3677 
3678 	MD5Init(&ctx);
3679 	MD5Update(&ctx, src, len);
3680 	MD5Final(md5, &ctx);
3681 }
3682 
3683 void
3684 sr_uuid_get(struct sr_uuid *uuid)
3685 {
3686 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
3687 	/* UUID version 4: random */
3688 	uuid->sui_id[6] &= 0x0f;
3689 	uuid->sui_id[6] |= 0x40;
3690 	/* RFC4122 variant */
3691 	uuid->sui_id[8] &= 0x3f;
3692 	uuid->sui_id[8] |= 0x80;
3693 }
3694 
3695 void
3696 sr_uuid_print(struct sr_uuid *uuid, int cr)
3697 {
3698 	printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
3699 	    "%02x%02x%02x%02x%02x%02x",
3700 	    uuid->sui_id[0], uuid->sui_id[1],
3701 	    uuid->sui_id[2], uuid->sui_id[3],
3702 	    uuid->sui_id[4], uuid->sui_id[5],
3703 	    uuid->sui_id[6], uuid->sui_id[7],
3704 	    uuid->sui_id[8], uuid->sui_id[9],
3705 	    uuid->sui_id[10], uuid->sui_id[11],
3706 	    uuid->sui_id[12], uuid->sui_id[13],
3707 	    uuid->sui_id[14], uuid->sui_id[15]);
3708 
3709 	if (cr)
3710 		printf("\n");
3711 }
3712 
3713 int
3714 sr_already_assembled(struct sr_discipline *sd)
3715 {
3716 	struct sr_softc		*sc = sd->sd_sc;
3717 	int			i;
3718 
3719 	for (i = 0; i < SR_MAXSCSIBUS; i++)
3720 		if (sc->sc_dis[i])
3721 			if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
3722 			    &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
3723 			    sizeof(sd->sd_meta->ssdi.ssd_uuid)))
3724 				return (1);
3725 
3726 	return (0);
3727 }
3728 
3729 int32_t
3730 sr_validate_stripsize(u_int32_t b)
3731 {
3732 	int			s = 0;
3733 
3734 	if (b % 512)
3735 		return (-1);
3736 
3737 	while ((b & 1) == 0) {
3738 		b >>= 1;
3739 		s++;
3740 	}
3741 
3742 	/* only multiple of twos */
3743 	b >>= 1;
3744 	if (b)
3745 		return(-1);
3746 
3747 	return (s);
3748 }
3749 
3750 void
3751 sr_shutdown(void *arg)
3752 {
3753 	struct sr_discipline	*sd = arg;
3754 #ifdef SR_DEBUG
3755 	struct sr_softc		*sc = sd->sd_sc;
3756 #endif
3757 	DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n",
3758 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
3759 
3760 	/* abort rebuild and drain io */
3761 	sd->sd_reb_abort = 1;
3762 	while (sd->sd_reb_active)
3763 		tsleep(sd, PWAIT, "sr_shutdown", 1);
3764 
3765 	sr_meta_save(sd, 0);
3766 
3767 	sr_discipline_shutdown(sd);
3768 }
3769 
3770 int
3771 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
3772 {
3773 	struct sr_discipline	*sd = wu->swu_dis;
3774 	struct scsi_xfer	*xs = wu->swu_xs;
3775 	int			rv = 1;
3776 
3777 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
3778 	    xs->cmd->opcode);
3779 
3780 	if (sd->sd_meta->ssd_data_offset == 0)
3781 		panic("invalid data offset");
3782 
3783 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
3784 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
3785 		    DEVNAME(sd->sd_sc), func);
3786 		goto bad;
3787 	}
3788 
3789 	if (xs->datalen == 0) {
3790 		printf("%s: %s: illegal block count for %s\n",
3791 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
3792 		goto bad;
3793 	}
3794 
3795 	if (xs->cmdlen == 10)
3796 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
3797 	else if (xs->cmdlen == 16)
3798 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
3799 	else if (xs->cmdlen == 6)
3800 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
3801 	else {
3802 		printf("%s: %s: illegal cmdlen for %s\n",
3803 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
3804 		goto bad;
3805 	}
3806 
3807 	wu->swu_blk_start = *blk;
3808 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
3809 
3810 	if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
3811 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
3812 		    "end: %lld length: %d\n",
3813 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
3814 		    wu->swu_blk_end, xs->datalen);
3815 
3816 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
3817 		    SSD_ERRCODE_VALID;
3818 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
3819 		sd->sd_scsi_sense.add_sense_code = 0x21;
3820 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
3821 		sd->sd_scsi_sense.extra_len = 4;
3822 		goto bad;
3823 	}
3824 
3825 	rv = 0;
3826 bad:
3827 	return (rv);
3828 }
3829 
3830 int
3831 sr_check_io_collision(struct sr_workunit *wu)
3832 {
3833 	struct sr_discipline	*sd = wu->swu_dis;
3834 	struct sr_workunit	*wup;
3835 
3836 	splassert(IPL_BIO);
3837 
3838 	/* walk queue backwards and fill in collider if we have one */
3839 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
3840 		if (wu->swu_blk_end < wup->swu_blk_start ||
3841 		    wup->swu_blk_end < wu->swu_blk_start)
3842 			continue;
3843 
3844 		/* we have an LBA collision, defer wu */
3845 		wu->swu_state = SR_WU_DEFERRED;
3846 		if (wup->swu_collider)
3847 			/* wu is on deferred queue, append to last wu */
3848 			while (wup->swu_collider)
3849 				wup = wup->swu_collider;
3850 
3851 		wup->swu_collider = wu;
3852 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
3853 		sd->sd_wu_collisions++;
3854 		goto queued;
3855 	}
3856 
3857 	return (0);
3858 queued:
3859 	return (1);
3860 }
3861 
3862 void
3863 sr_rebuild(void *arg)
3864 {
3865 	struct sr_discipline	*sd = arg;
3866 	struct sr_softc		*sc = sd->sd_sc;
3867 
3868 	if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc,
3869 	    DEVNAME(sc)) != 0)
3870 		printf("%s: unable to start backgound operation\n",
3871 		    DEVNAME(sc));
3872 }
3873 
3874 void
3875 sr_rebuild_thread(void *arg)
3876 {
3877 	struct sr_discipline	*sd = arg;
3878 	struct sr_softc		*sc = sd->sd_sc;
3879 	daddr64_t		whole_blk, partial_blk, blk, sz, lba;
3880 	daddr64_t		psz, rb, restart;
3881 	uint64_t		mysize = 0;
3882 	struct sr_workunit	*wu_r, *wu_w;
3883 	struct scsi_xfer	xs_r, xs_w;
3884 	struct scsi_rw_16	cr, cw;
3885 	int			c, s, slept, percent = 0, old_percent = -1;
3886 	u_int8_t		*buf;
3887 
3888 	whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
3889 	partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE;
3890 
3891 	restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE;
3892 	if (restart > whole_blk) {
3893 		printf("%s: bogus rebuild restart offset, starting from 0\n",
3894 		    DEVNAME(sc));
3895 		restart = 0;
3896 	}
3897 	if (restart) {
3898 		/*
3899 		 * XXX there is a hole here; there is a posibility that we
3900 		 * had a restart however the chunk that was supposed to
3901 		 * be rebuilt is no longer valid; we can reach this situation
3902 		 * when a rebuild is in progress and the box crashes and
3903 		 * on reboot the rebuild chunk is different (like zero'd or
3904 		 * replaced).  We need to check the uuid of the chunk that is
3905 		 * being rebuilt to assert this.
3906 		 */
3907 		psz = sd->sd_meta->ssdi.ssd_size;
3908 		rb = sd->sd_meta->ssd_rebuild;
3909 		if (rb > 0)
3910 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
3911 		else
3912 			percent = 0;
3913 		printf("%s: resuming rebuild on %s at %llu%%\n",
3914 		    DEVNAME(sc), sd->sd_meta->ssd_devname, percent);
3915 	}
3916 
3917 	sd->sd_reb_active = 1;
3918 
3919 	/* currently this is 64k therefore we can use dma_alloc */
3920 	buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK);
3921 	for (blk = restart; blk <= whole_blk; blk++) {
3922 		if (blk == whole_blk)
3923 			sz = partial_blk;
3924 		else
3925 			sz = SR_REBUILD_IO_SIZE;
3926 		mysize += sz;
3927 		lba = blk * sz;
3928 
3929 		/* get some wu */
3930 		if ((wu_r = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
3931 			panic("%s: rebuild exhausted wu_r", DEVNAME(sc));
3932 		if ((wu_w = scsi_io_get(&sd->sd_iopool, 0)) == NULL)
3933 			panic("%s: rebuild exhausted wu_w", DEVNAME(sc));
3934 
3935 		/* setup read io */
3936 		bzero(&xs_r, sizeof xs_r);
3937 		bzero(&cr, sizeof cr);
3938 		xs_r.error = XS_NOERROR;
3939 		xs_r.flags = SCSI_DATA_IN;
3940 		xs_r.datalen = sz << DEV_BSHIFT;
3941 		xs_r.data = buf;
3942 		xs_r.cmdlen = 16;
3943 		cr.opcode = READ_16;
3944 		_lto4b(sz, cr.length);
3945 		_lto8b(lba, cr.addr);
3946 		xs_r.cmd = (struct scsi_generic *)&cr;
3947 		wu_r->swu_flags |= SR_WUF_REBUILD;
3948 		wu_r->swu_xs = &xs_r;
3949 		if (sd->sd_scsi_rw(wu_r)) {
3950 			printf("%s: could not create read io\n",
3951 			    DEVNAME(sc));
3952 			goto fail;
3953 		}
3954 
3955 		/* setup write io */
3956 		bzero(&xs_w, sizeof xs_w);
3957 		bzero(&cw, sizeof cw);
3958 		xs_w.error = XS_NOERROR;
3959 		xs_w.flags = SCSI_DATA_OUT;
3960 		xs_w.datalen = sz << DEV_BSHIFT;
3961 		xs_w.data = buf;
3962 		xs_w.cmdlen = 16;
3963 		cw.opcode = WRITE_16;
3964 		_lto4b(sz, cw.length);
3965 		_lto8b(lba, cw.addr);
3966 		xs_w.cmd = (struct scsi_generic *)&cw;
3967 		wu_w->swu_flags |= SR_WUF_REBUILD;
3968 		wu_w->swu_xs = &xs_w;
3969 		if (sd->sd_scsi_rw(wu_w)) {
3970 			printf("%s: could not create write io\n",
3971 			    DEVNAME(sc));
3972 			goto fail;
3973 		}
3974 
3975 		/*
3976 		 * collide with the read io so that we get automatically
3977 		 * started when the read is done
3978 		 */
3979 		wu_w->swu_state = SR_WU_DEFERRED;
3980 		wu_r->swu_collider = wu_w;
3981 		s = splbio();
3982 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
3983 
3984 		/* schedule io */
3985 		if (sr_check_io_collision(wu_r))
3986 			goto queued;
3987 
3988 		sr_raid_startwu(wu_r);
3989 queued:
3990 		splx(s);
3991 
3992 		/* wait for read completion */
3993 		slept = 0;
3994 		while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
3995 			tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
3996 			slept = 1;
3997 		}
3998 		/* yield if we didn't sleep */
3999 		if (slept == 0)
4000 			tsleep(sc, PWAIT, "sr_yield", 1);
4001 
4002 		scsi_io_put(&sd->sd_iopool, wu_r);
4003 		scsi_io_put(&sd->sd_iopool, wu_w);
4004 
4005 		sd->sd_meta->ssd_rebuild = lba;
4006 
4007 		/* save metadata every percent */
4008 		psz = sd->sd_meta->ssdi.ssd_size;
4009 		rb = sd->sd_meta->ssd_rebuild;
4010 		if (rb > 0)
4011 			percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
4012 		else
4013 			percent = 0;
4014 		if (percent != old_percent && blk != whole_blk) {
4015 			if (sr_meta_save(sd, SR_META_DIRTY))
4016 				printf("%s: could not save metadata to %s\n",
4017 				    DEVNAME(sc), sd->sd_meta->ssd_devname);
4018 			old_percent = percent;
4019 		}
4020 
4021 		if (sd->sd_reb_abort)
4022 			goto abort;
4023 	}
4024 
4025 	/* all done */
4026 	sd->sd_meta->ssd_rebuild = 0;
4027 	for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
4028 		if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
4029 		    BIOC_SDREBUILD) {
4030 			sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE);
4031 			break;
4032 		}
4033 
4034 abort:
4035 	if (sr_meta_save(sd, SR_META_DIRTY))
4036 		printf("%s: could not save metadata to %s\n",
4037 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
4038 fail:
4039 	dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT);
4040 	sd->sd_reb_active = 0;
4041 	kthread_exit(0);
4042 }
4043 
4044 #ifndef SMALL_KERNEL
4045 int
4046 sr_sensors_create(struct sr_discipline *sd)
4047 {
4048 	struct sr_softc		*sc = sd->sd_sc;
4049 	int			rv = 1;
4050 
4051 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
4052 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
4053 
4054 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
4055 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
4056 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
4057 	    sizeof(sd->sd_vol.sv_sensor.desc));
4058 
4059 	sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4060 	sd->sd_vol.sv_sensor_attached = 1;
4061 
4062 	if (sc->sc_sensors_running == 0) {
4063 		if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL)
4064 			goto bad;
4065 		sc->sc_sensors_running = 1;
4066 	}
4067 
4068 	rv = 0;
4069 bad:
4070 	return (rv);
4071 }
4072 
4073 void
4074 sr_sensors_delete(struct sr_discipline *sd)
4075 {
4076 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc));
4077 
4078 	if (sd->sd_vol.sv_sensor_attached)
4079 		sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor);
4080 }
4081 
4082 void
4083 sr_sensors_refresh(void *arg)
4084 {
4085 	struct sr_softc		*sc = arg;
4086 	struct sr_volume	*sv;
4087 	struct sr_discipline	*sd;
4088 	int			i, vol;
4089 
4090 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
4091 
4092 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
4093 		/* XXX this will not work when we stagger disciplines */
4094 		if (!sc->sc_dis[i])
4095 			continue;
4096 
4097 		sd = sc->sc_dis[i];
4098 		sv = &sd->sd_vol;
4099 
4100 		switch(sd->sd_vol_status) {
4101 		case BIOC_SVOFFLINE:
4102 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
4103 			sv->sv_sensor.status = SENSOR_S_CRIT;
4104 			break;
4105 
4106 		case BIOC_SVDEGRADED:
4107 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
4108 			sv->sv_sensor.status = SENSOR_S_WARN;
4109 			break;
4110 
4111 		case BIOC_SVSCRUB:
4112 		case BIOC_SVONLINE:
4113 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
4114 			sv->sv_sensor.status = SENSOR_S_OK;
4115 			break;
4116 
4117 		default:
4118 			sv->sv_sensor.value = 0; /* unknown */
4119 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
4120 		}
4121 	}
4122 }
4123 #endif /* SMALL_KERNEL */
4124 
4125 #ifdef SR_FANCY_STATS
4126 void				sr_print_stats(void);
4127 
4128 void
4129 sr_print_stats(void)
4130 {
4131 	struct sr_softc		*sc;
4132 	struct sr_discipline	*sd;
4133 	int			i, vol;
4134 
4135 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
4136 		if (softraid_cd.cd_devs[i]) {
4137 			sc = softraid_cd.cd_devs[i];
4138 			/* we'll only have one softc */
4139 			break;
4140 		}
4141 
4142 	if (!sc) {
4143 		printf("no softraid softc found\n");
4144 		return;
4145 	}
4146 
4147 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
4148 		/* XXX this will not work when we stagger disciplines */
4149 		if (!sc->sc_dis[i])
4150 			continue;
4151 
4152 		sd = sc->sc_dis[i];
4153 		printf("%s: ios pending: %d  collisions %llu\n",
4154 		    sd->sd_meta->ssd_devname,
4155 		    sd->sd_wu_pending,
4156 		    sd->sd_wu_collisions);
4157 	}
4158 }
4159 #endif /* SR_FANCY_STATS */
4160 
4161 #ifdef SR_DEBUG
4162 void
4163 sr_meta_print(struct sr_metadata *m)
4164 {
4165 	int			i;
4166 	struct sr_meta_chunk	*mc;
4167 	struct sr_meta_opt	*mo;
4168 
4169 	if (!(sr_debug & SR_D_META))
4170 		return;
4171 
4172 	printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
4173 	printf("\tssd_version %d\n", m->ssdi.ssd_version);
4174 	printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags);
4175 	printf("\tssd_uuid ");
4176 	sr_uuid_print(&m->ssdi.ssd_uuid, 1);
4177 	printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
4178 	printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
4179 	printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
4180 	printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
4181 	printf("\tssd_level %d\n", m->ssdi.ssd_level);
4182 	printf("\tssd_size %lld\n", m->ssdi.ssd_size);
4183 	printf("\tssd_devname %s\n", m->ssd_devname);
4184 	printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
4185 	printf("\tssd_product %s\n", m->ssdi.ssd_product);
4186 	printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
4187 	printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
4188 	printf("\tssd_checksum ");
4189 	sr_checksum_print(m->ssd_checksum);
4190 	printf("\n");
4191 	printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
4192 	printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
4193 
4194 	mc = (struct sr_meta_chunk *)(m + 1);
4195 	for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
4196 		printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
4197 		printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
4198 		printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
4199 		printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
4200 		printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
4201 		printf("\t\tscm_uuid ");
4202 		sr_uuid_print(&mc->scmi.scm_uuid, 1);
4203 		printf("\t\tscm_checksum ");
4204 		sr_checksum_print(mc->scm_checksum);
4205 		printf("\n");
4206 		printf("\t\tscm_status %d\n", mc->scm_status);
4207 	}
4208 
4209 	mo = (struct sr_meta_opt *)(mc);
4210 	for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) {
4211 		printf("\t\t\tsom_type %d\n", mo->somi.som_type);
4212 		printf("\t\t\tsom_checksum ");
4213 		sr_checksum_print(mo->som_checksum);
4214 		printf("\n");
4215 	}
4216 }
4217 
4218 void
4219 sr_dump_mem(u_int8_t *p, int len)
4220 {
4221 	int			i;
4222 
4223 	for (i = 0; i < len; i++)
4224 		printf("%02x ", *p++);
4225 	printf("\n");
4226 }
4227 
4228 #endif /* SR_DEBUG */
4229