xref: /openbsd-src/sys/dev/softraid.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /* $OpenBSD: softraid.c,v 1.120 2008/09/22 19:44:00 miod Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bio.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/device.h>
25 #include <sys/ioctl.h>
26 #include <sys/proc.h>
27 #include <sys/malloc.h>
28 #include <sys/pool.h>
29 #include <sys/kernel.h>
30 #include <sys/disk.h>
31 #include <sys/rwlock.h>
32 #include <sys/queue.h>
33 #include <sys/fcntl.h>
34 #include <sys/disklabel.h>
35 #include <sys/mount.h>
36 #include <sys/sensors.h>
37 #include <sys/stat.h>
38 #include <sys/conf.h>
39 #include <sys/uio.h>
40 
41 #include <crypto/cryptodev.h>
42 
43 #include <scsi/scsi_all.h>
44 #include <scsi/scsiconf.h>
45 #include <scsi/scsi_disk.h>
46 
47 #include <dev/softraidvar.h>
48 #include <dev/rndvar.h>
49 
50 /* #define SR_FANCY_STATS */
51 
52 #ifdef SR_DEBUG
53 #define SR_FANCY_STATS
54 uint32_t	sr_debug = 0
55 		    /* | SR_D_CMD */
56 		    /* | SR_D_MISC */
57 		    /* | SR_D_INTR */
58 		    /* | SR_D_IOCTL */
59 		    /* | SR_D_CCB */
60 		    /* | SR_D_WU */
61 		    /* | SR_D_META */
62 		    /* | SR_D_DIS */
63 		    /* | SR_D_STATE */
64 		;
65 #endif
66 
67 int		sr_match(struct device *, void *, void *);
68 void		sr_attach(struct device *, struct device *, void *);
69 int		sr_detach(struct device *, int);
70 int		sr_activate(struct device *, enum devact);
71 
72 struct cfattach softraid_ca = {
73 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
74 	sr_activate
75 };
76 
77 struct cfdriver softraid_cd = {
78 	NULL, "softraid", DV_DULL
79 };
80 
81 /* scsi & discipline */
82 int			sr_scsi_cmd(struct scsi_xfer *);
83 void			sr_minphys(struct buf *bp);
84 void			sr_copy_internal_data(struct scsi_xfer *,
85 			    void *, size_t);
86 int			sr_scsi_ioctl(struct scsi_link *, u_long,
87 			    caddr_t, int, struct proc *);
88 int			sr_ioctl(struct device *, u_long, caddr_t);
89 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
90 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
91 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
92 int			sr_ioctl_setstate(struct sr_softc *,
93 			    struct bioc_setstate *);
94 int			sr_ioctl_createraid(struct sr_softc *,
95 			    struct bioc_createraid *, int);
96 int			sr_ioctl_deleteraid(struct sr_softc *,
97 			    struct bioc_deleteraid *);
98 void			sr_chunks_unwind(struct sr_softc *,
99 			    struct sr_chunk_head *);
100 void			sr_discipline_free(struct sr_discipline *);
101 void			sr_discipline_shutdown(struct sr_discipline *);
102 
103 /* utility functions */
104 void			sr_shutdown(void *);
105 void			sr_uuid_get(struct sr_uuid *);
106 void			sr_uuid_print(struct sr_uuid *, int);
107 void			sr_checksum_print(u_int8_t *);
108 void			sr_checksum(struct sr_softc *, void *, void *,
109 			    u_int32_t);
110 int			sr_boot_assembly(struct sr_softc *);
111 int			sr_already_assembled(struct sr_discipline *);
112 
113 /* don't include these on RAMDISK */
114 #ifndef SMALL_KERNEL
115 void			sr_sensors_refresh(void *);
116 int			sr_sensors_create(struct sr_discipline *);
117 void			sr_sensors_delete(struct sr_discipline *);
118 #endif
119 
120 /* metadata */
121 int			sr_meta_probe(struct sr_discipline *, dev_t *, int);
122 int			sr_meta_attach(struct sr_discipline *, int);
123 void			sr_meta_getdevname(struct sr_softc *, dev_t, char *,
124 			    int);
125 int			sr_meta_rw(struct sr_discipline *, dev_t, void *,
126 			    size_t, daddr64_t, long);
127 int			sr_meta_clear(struct sr_discipline *);
128 int			sr_meta_read(struct sr_discipline *);
129 int			sr_meta_save(struct sr_discipline *, u_int32_t);
130 int			sr_meta_validate(struct sr_discipline *, dev_t,
131 			    struct sr_metadata *, void *);
132 void			sr_meta_chunks_create(struct sr_softc *,
133 			    struct sr_chunk_head *);
134 void			sr_meta_init(struct sr_discipline *,
135 			    struct sr_chunk_head *);
136 
137 /* native metadata format */
138 int			sr_meta_native_bootprobe(struct sr_softc *,
139 			    struct device *, struct sr_metadata_list_head *);
140 #define SR_META_NOTCLAIMED	(0)
141 #define SR_META_CLAIMED		(1)
142 int			sr_meta_native_probe(struct sr_softc *,
143 			   struct sr_chunk *);
144 int			sr_meta_native_attach(struct sr_discipline *, int);
145 int			sr_meta_native_read(struct sr_discipline *, dev_t,
146 			    struct sr_metadata *, void *);
147 int			sr_meta_native_write(struct sr_discipline *, dev_t,
148 			    struct sr_metadata *,void *);
149 
150 #ifdef SR_DEBUG
151 void			sr_meta_print(struct sr_metadata *);
152 #else
153 #define			sr_meta_print(m)
154 #endif
155 
156 /* the metadata driver should remain stateless */
157 struct sr_meta_driver {
158 	daddr64_t		smd_offset;	/* metadata location */
159 	u_int32_t		smd_size;	/* size of metadata */
160 
161 	int			(*smd_probe)(struct sr_softc *,
162 				   struct sr_chunk *);
163 	int			(*smd_attach)(struct sr_discipline *, int);
164 	int			(*smd_detach)(struct sr_discipline *);
165 	int			(*smd_read)(struct sr_discipline *, dev_t,
166 				    struct sr_metadata *, void *);
167 	int			(*smd_write)(struct sr_discipline *, dev_t,
168 				    struct sr_metadata *, void *);
169 	int			(*smd_validate)(struct sr_discipline *,
170 				    struct sr_metadata *, void *);
171 } smd[] = {
172 	{ SR_META_OFFSET, SR_META_SIZE * 512,
173 	  sr_meta_native_probe, sr_meta_native_attach, NULL,
174 	  sr_meta_native_read , sr_meta_native_write, NULL },
175 #define SR_META_F_NATIVE	0
176 	{ 0, 0, NULL, NULL, NULL, NULL }
177 #define SR_META_F_INVALID	-1
178 };
179 
180 int
181 sr_meta_attach(struct sr_discipline *sd, int force)
182 {
183 	struct sr_softc		*sc = sd->sd_sc;
184 	struct sr_chunk_head	*cl;
185 	struct sr_chunk		*ch_entry;
186 	int			rv = 1, i;
187 
188 	DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
189 
190 	/* in memory copy of metadata */
191 	sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
192 	if (!sd->sd_meta) {
193 		printf("%s: could not allocate memory for metadata\n",
194 		    DEVNAME(sc));
195 		goto bad;
196 	}
197 
198 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
199 		/* in memory copy of foreign metadata */
200 		sd->sd_meta_foreign =  malloc(smd[sd->sd_meta_type].smd_size ,
201 		    M_DEVBUF, M_ZERO);
202 		if (!sd->sd_meta_foreign) {
203 			/* unwind frees sd_meta */
204 			printf("%s: could not allocate memory for foreign "
205 			    "metadata\n", DEVNAME(sc));
206 			goto bad;
207 		}
208 	}
209 
210 	if (smd[sd->sd_meta_type].smd_attach(sd, force))
211 		goto bad;
212 
213 	/* fill out chunk array */
214 	cl = &sd->sd_vol.sv_chunk_list;
215 	i = 0;
216 	SLIST_FOREACH(ch_entry, cl, src_link)
217 		sd->sd_vol.sv_chunks[i++] = ch_entry;
218 
219 	rv = 0;
220 bad:
221 	return (rv);
222 }
223 
224 int
225 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
226 {
227 	struct sr_softc		*sc = sd->sd_sc;
228 	struct bdevsw		*bdsw;
229 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
230 	struct sr_chunk_head	*cl;
231 	char			devname[32];
232 	int			i, d, type, found, prevf, error;
233 	dev_t			dev;
234 
235 	DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
236 
237 	if (no_chunk == 0)
238 		goto unwind;
239 
240 
241 	cl = &sd->sd_vol.sv_chunk_list;
242 
243 	for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
244 		dev = dt[d];
245 		sr_meta_getdevname(sc, dev, devname, sizeof(devname));
246 		bdsw = bdevsw_lookup(dev);
247 
248 		/*
249 		 * XXX leaving dev open for now; move this to attach and figure
250 		 * out the open/close dance for unwind.
251 		 */
252 		error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc);
253 		if (error) {
254 			DNPRINTF(SR_D_META,"%s: sr_meta_probe can't open %s\n",
255 			    DEVNAME(sc), devname);
256 			/* XXX device isn't open but will be closed anyway */
257 			goto unwind;
258 		}
259 
260 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
261 		    M_WAITOK | M_ZERO);
262 		/* keep disks in user supplied order */
263 		if (ch_prev)
264 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
265 		else
266 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
267 		ch_prev = ch_entry;
268 		strlcpy(ch_entry->src_devname, devname,
269 		   sizeof(ch_entry->src_devname));
270 		ch_entry->src_dev_mm = dev;
271 
272 		/* determine if this is a device we understand */
273 		for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
274 			type = smd[i].smd_probe(sc, ch_entry);
275 			if (type == SR_META_F_INVALID)
276 				continue;
277 			else {
278 				found = type;
279 				break;
280 			}
281 		}
282 		if (found == SR_META_F_INVALID)
283 			goto unwind;
284 		if (prevf == SR_META_F_INVALID)
285 			prevf = found;
286 		if (prevf != found) {
287 			DNPRINTF(SR_D_META, "%s: prevf != found\n",
288 			    DEVNAME(sc));
289 			goto unwind;
290 		}
291 	}
292 
293 	return (prevf);
294 unwind:
295 	return (SR_META_F_INVALID);
296 }
297 
298 void
299 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
300 {
301 	int			maj, unit, part;
302 	char			*name;
303 
304 	DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
305 	    DEVNAME(sc), buf, size);
306 
307 	if (!buf)
308 		return;
309 
310 	maj = major(dev);
311 	part = DISKPART(dev);
312 	unit = DISKUNIT(dev);
313 
314 	name = findblkname(maj);
315 	if (name == NULL)
316 		return;
317 
318 	snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
319 }
320 
321 int
322 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz,
323     daddr64_t ofs, long flags)
324 {
325 	struct sr_softc		*sc = sd->sd_sc;
326 	struct buf		b;
327 	int			rv = 1;
328 
329 	DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
330 	    DEVNAME(sc), dev, md, sz, ofs, flags);
331 
332 	if (md == NULL) {
333 		printf("%s: read invalid metadata pointer\n", sc);
334 		goto done;
335 	}
336 
337 	bzero(&b, sizeof(b));
338 	b.b_flags = flags;
339 	b.b_blkno = ofs;
340 	b.b_bcount = sz;
341 	b.b_bufsize = sz;
342 	b.b_resid = sz;
343 	b.b_data = md;
344 	b.b_error = 0;
345 	b.b_proc = curproc;
346 	b.b_dev = dev;
347 	b.b_vp = NULL;
348 	b.b_iodone = NULL;
349 	LIST_INIT(&b.b_dep);
350 	bdevsw_lookup(b.b_dev)->d_strategy(&b);
351 	biowait(&b);
352 
353 	if (b.b_flags & B_ERROR) {
354 		printf("%s: 0x%x i/o error on block %lld while reading "
355 		    "metadata %d\n", sc, dev, b.b_blkno, b.b_error);
356 		goto done;
357 	}
358 	rv = 0;
359 done:
360 	return (rv);
361 }
362 
363 int
364 sr_meta_clear(struct sr_discipline *sd)
365 {
366 	struct sr_softc		*sc = sd->sd_sc;
367 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
368 	struct sr_chunk		*ch_entry;
369 	void			*m;
370 	int			rv = 1;
371 
372 	DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
373 
374 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
375 		printf("%s: sr_meta_clear can not clear foreign metadata\n",
376 		    DEVNAME(sc));
377 		goto done;
378 	}
379 
380 	m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO);
381 	SLIST_FOREACH(ch_entry, cl, src_link) {
382 		if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
383 			/* XXX mark disk offline */
384 			DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
385 			    "clear %s\n", ch_entry->src_devname);
386 			rv++;
387 			continue;
388 		}
389 		bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
390 		bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt));
391 	}
392 
393 	bzero(sd->sd_meta, SR_META_SIZE * 512);
394 
395 	free(m, M_DEVBUF);
396 	rv = 0;
397 done:
398 	return (rv);
399 }
400 
401 void
402 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl)
403 {
404 	struct sr_chunk		*ch_entry;
405 	struct sr_uuid		uuid;
406 	int			cid = 0;
407 	char			*name;
408 	u_int64_t		max_chunk_sz = 0, min_chunk_sz;
409 
410 	DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc));
411 
412 	sr_uuid_get(&uuid);
413 
414 	/* fill out stuff and get largest chunk size while looping */
415 	SLIST_FOREACH(ch_entry, cl, src_link) {
416 		name = ch_entry->src_devname;
417 		ch_entry->src_meta.scmi.scm_size = ch_entry->src_size;
418 		ch_entry->src_meta.scmi.scm_chunk_id = cid++;
419 		ch_entry->src_meta.scm_status = BIOC_SDONLINE;
420 		strlcpy(ch_entry->src_meta.scmi.scm_devname, name,
421 		    sizeof(ch_entry->src_meta.scmi.scm_devname));
422 		bcopy(&uuid,  &ch_entry->src_meta.scmi.scm_uuid,
423 		    sizeof(ch_entry->src_meta.scmi.scm_uuid));
424 
425 		if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz)
426 			max_chunk_sz = ch_entry->src_meta.scmi.scm_size;
427 	}
428 
429 	/* get smallest chunk size */
430 	min_chunk_sz = max_chunk_sz;
431 	SLIST_FOREACH(ch_entry, cl, src_link)
432 		if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz)
433 			min_chunk_sz = ch_entry->src_meta.scmi.scm_size;
434 
435 	/* equalize all sizes */
436 	SLIST_FOREACH(ch_entry, cl, src_link)
437 		ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz;
438 
439 	/* whine if chunks are not the same size */
440 	if (min_chunk_sz != max_chunk_sz)
441 		printf("%s: chunk sizes are not equal; up to %llu blocks "
442 		    "wasted per chunk\n",
443 		    DEVNAME(sc), max_chunk_sz - min_chunk_sz);
444 }
445 
446 void
447 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl)
448 {
449 	struct sr_softc		*sc = sd->sd_sc;
450 	struct sr_metadata	*sm = sd->sd_meta;
451 	struct sr_meta_chunk	*im_sc;
452 	struct sr_meta_opt	*im_so;
453 	int			i, chunk_no;
454 
455 	DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
456 
457 	if (!sm)
458 		return;
459 
460 	/* initial metadata */
461 	sm->ssdi.ssd_magic = SR_MAGIC;
462 	sm->ssdi.ssd_version = SR_META_VERSION;
463 	sm->ssd_ondisk = 0;
464 	sm->ssdi.ssd_flags = sd->sd_meta_flags;
465 	/* get uuid from chunk 0 */
466 	bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid,
467 	    &sm->ssdi.ssd_uuid,
468 	    sizeof(struct sr_uuid));
469 
470 	/* volume is filled in createraid */
471 
472 	/* add missing chunk bits */
473 	chunk_no = sm->ssdi.ssd_chunk_no;
474 	for (i = 0; i < chunk_no; i++) {
475 		im_sc = &sd->sd_vol.sv_chunks[i]->src_meta;
476 		im_sc->scmi.scm_volid = sm->ssdi.ssd_volid;
477 		sr_checksum(sc, im_sc, &im_sc->scm_checksum,
478 		    sizeof(struct sr_meta_chunk_invariant));
479 
480 		/* carry optional meta also in chunk area */
481 		im_so = &sd->sd_vol.sv_chunks[i]->src_opt;
482 		bzero(im_so, sizeof(*im_so));
483 		if (sd->sd_type == SR_MD_CRYPTO) {
484 			sm->ssdi.ssd_opt_no = 1;
485 			im_so->somi.som_type = SR_OPT_CRYPTO;
486 
487 			/*
488 			 * copy encrypted key / passphrase into optional
489 			 * metadata area
490 			 */
491 			bcopy(&sd->mds.mdd_crypto.scr_meta,
492 			    &im_so->somi.som_meta.smm_crypto,
493 			    sizeof(im_so->somi.som_meta.smm_crypto));
494 
495 			sr_checksum(sc, im_so, im_so->som_checksum,
496 			    sizeof(struct sr_meta_opt_invariant));
497 		}
498 	}
499 }
500 
501 void
502 sr_meta_save_callback(void *arg1, void *arg2)
503 {
504 	struct sr_discipline	*sd = arg1;
505 	int			s;
506 
507 	s = splbio();
508 
509 	if (sr_meta_save(arg1, SR_META_DIRTY))
510 		printf("%s: save metadata failed\n",
511 		    DEVNAME(sd->sd_sc));
512 
513 	sd->sd_must_flush = 0;
514 	splx(s);
515 }
516 
517 int
518 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
519 {
520 	struct sr_softc		*sc = sd->sd_sc;
521 	struct sr_metadata	*sm = sd->sd_meta, *m;
522 	struct sr_meta_driver	*s;
523 	struct sr_chunk		*src;
524 	struct sr_meta_chunk	*cm;
525 	struct sr_workunit	wu;
526 	struct sr_meta_opt	*om;
527 	int			i;
528 
529 	DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
530 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
531 
532 	if (!sm) {
533 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
534 		goto bad;
535 	}
536 
537 	/* meta scratchpad */
538 	s = &smd[sd->sd_meta_type];
539 	m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
540 	if (!m) {
541 		printf("%s: could not allocate metadata scratch area\n",
542 		    DEVNAME(sc));
543 		goto bad;
544 	}
545 
546 	if (sm->ssdi.ssd_opt_no > 1)
547 		panic("not yet save > 1 optional metadata members");
548 
549 	/* from here on out metadata is updated */
550 restart:
551 	sm->ssd_ondisk++;
552 	sm->ssd_meta_flags = flags;
553 	bcopy(sm, m, sizeof(*m));
554 
555 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
556 		src = sd->sd_vol.sv_chunks[i];
557 		cm = (struct sr_meta_chunk *)(m + 1);
558 		bcopy(&src->src_meta, cm + i, sizeof(*cm));
559 	}
560 
561 	/* optional metadata */
562 	om = (struct sr_meta_opt *)(cm + i);
563 	for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
564 		bcopy(&src->src_opt, om + i, sizeof(*om));
565 		sr_checksum(sc, om, &om->som_checksum,
566 		    sizeof(struct sr_meta_opt_invariant));
567 	}
568 
569 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
570 		src = sd->sd_vol.sv_chunks[i];
571 
572 		/* skip disks that are offline */
573 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
574 			continue;
575 
576 		/* calculate metdata checksum for correct chunk */
577 		m->ssdi.ssd_chunk_id = i;
578 		sr_checksum(sc, m, &m->ssd_checksum,
579 		    sizeof(struct sr_meta_invariant));
580 
581 #ifdef SR_DEBUG
582 		DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
583 		    "chunkid: %d checksum: ",
584 		    DEVNAME(sc), src->src_meta.scmi.scm_devname,
585 		    m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
586 
587 		if (sr_debug &= SR_D_META)
588 			sr_checksum_print((u_int8_t *)&m->ssd_checksum);
589 		DNPRINTF(SR_D_META, "\n");
590 		sr_meta_print(m);
591 #endif
592 
593 		/* translate and write to disk */
594 		if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
595 			printf("%s: could not write metadata to %s\n",
596 			    DEVNAME(sc), src->src_devname);
597 			/* restart the meta write */
598 			src->src_meta.scm_status = BIOC_SDOFFLINE;
599 			/* XXX recalculate volume status */
600 			goto restart;
601 		}
602 	}
603 
604 	bzero(&wu, sizeof(wu));
605 	wu.swu_fake = 1;
606 	wu.swu_dis = sd;
607 	sd->sd_scsi_sync(&wu);
608 
609 	free(m, M_DEVBUF);
610 	return (0);
611 bad:
612 	return (1);
613 }
614 
615 int
616 sr_meta_read(struct sr_discipline *sd)
617 {
618 #ifdef SR_DEBUG
619 	struct sr_softc		*sc = sd->sd_sc;
620 #endif
621 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
622 	struct sr_metadata	*sm;
623 	struct sr_chunk		*ch_entry;
624 	struct sr_meta_chunk	*cp;
625 	struct sr_meta_driver	*s;
626 	struct sr_meta_opt	*om;
627 	void			*fm = NULL;
628 	int			no_disk = 0;
629 
630 	DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
631 
632 	sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
633 	s = &smd[sd->sd_meta_type];
634 	if (sd->sd_meta_type != SR_META_F_NATIVE)
635 		fm = malloc(s->smd_size , M_DEVBUF, M_WAITOK | M_ZERO);
636 
637 	cp = (struct sr_meta_chunk *)(sm + 1);
638 	SLIST_FOREACH(ch_entry, cl, src_link) {
639 		/* read and translate */
640 		if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
641 			/* XXX mark disk offline */
642 			DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
643 			    DEVNAME(sc));
644 			continue;
645 		}
646 
647 		if (sm->ssdi.ssd_magic != SR_MAGIC) {
648 			DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
649 			    DEVNAME(sc));
650 			continue;
651 		}
652 
653 		/* validate metadata */
654 		if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
655 			DNPRINTF(SR_D_META, "%s: invalid metadata\n",
656 			    DEVNAME(sc));
657 			no_disk = -1;
658 			goto done;
659 		}
660 
661 		/* assume chunk 0 contains metadata */
662 		if (no_disk == 0)
663 			bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
664 
665 		bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
666 
667 		if (sm->ssdi.ssd_opt_no > 1)
668 			panic("not yet read > 1 optional metadata members");
669 
670 		if (sm->ssdi.ssd_opt_no) {
671 			om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) +
672 			    sizeof(struct sr_meta_chunk) *
673 			    sm->ssdi.ssd_chunk_no);
674 			bcopy(om, &ch_entry->src_opt, sizeof(ch_entry->src_opt));
675 
676 			if (om->somi.som_type == SR_OPT_CRYPTO) {
677 				bcopy(&ch_entry->src_opt.somi.som_meta.smm_crypto,
678 				    &sd->mds.mdd_crypto.scr_meta,
679 				    sizeof(sd->mds.mdd_crypto.scr_meta));
680 			}
681 
682 		}
683 
684 		cp++;
685 		no_disk++;
686 	}
687 
688 	free(sm, M_DEVBUF);
689 	if (fm)
690 		free(fm, M_DEVBUF);
691 
692 done:
693 	DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
694 	    no_disk);
695 	return (no_disk);
696 }
697 
698 int
699 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
700     void *fm)
701 {
702 	struct sr_softc		*sc = sd->sd_sc;
703 	struct sr_meta_driver	*s;
704 	struct sr_meta_chunk	*mc;
705 	char			devname[32];
706 	int			rv = 1;
707 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
708 
709 	DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
710 
711 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
712 
713 	s = &smd[sd->sd_meta_type];
714 	if (sd->sd_meta_type != SR_META_F_NATIVE)
715 		if (s->smd_validate(sd, sm, fm)) {
716 			printf("%s: invalid foreign metadata\n", DEVNAME(sc));
717 			goto done;
718 		}
719 
720 	/*
721 	 * at this point all foreign metadata has been translated to the native
722 	 * format and will be treated just like the native format
723 	 */
724 
725 	if (sm->ssdi.ssd_version != SR_META_VERSION) {
726 		printf("%s: %s can not read metadata version %d, expected %d\n",
727 		    DEVNAME(sc), devname, sm->ssdi.ssd_version,
728 		    SR_META_VERSION);
729 		goto done;
730 	}
731 
732 	sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
733 	if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
734 		printf("%s: invalid metadata checksum\n", DEVNAME(sc));
735 		goto done;
736 	}
737 
738 	/* XXX do other checksums */
739 
740 	/* warn if disk changed order */
741 	mc = (struct sr_meta_chunk *)(sm + 1);
742 	if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
743 	    sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
744 		printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
745 		    mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname);
746 
747 	/* we have meta data on disk */
748 	DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
749 	    DEVNAME(sc), devname);
750 
751 	rv = 0;
752 done:
753 	return (rv);
754 }
755 
756 int
757 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv,
758     struct sr_metadata_list_head *mlh)
759 {
760 	struct bdevsw		*bdsw;
761 	struct disklabel	label;
762 	struct sr_metadata	*md;
763 	struct sr_discipline	*fake_sd;
764 	struct sr_metadata_list *mle;
765 	char			devname[32];
766 	dev_t			dev, devr;
767 	int			error, i, majdev;
768 	int			rv = SR_META_NOTCLAIMED;
769 
770 	DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
771 
772 	majdev = findblkmajor(dv);
773 	if (majdev == -1)
774 		goto done;
775 	dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
776 	bdsw = &bdevsw[majdev];
777 
778 	/*
779 	 * The devices are being opened with S_IFCHR instead of
780 	 * S_IFBLK so that the SCSI mid-layer does not whine when
781 	 * media is not inserted in certain devices like zip drives
782 	 * and such.
783 	 */
784 
785 	/* open device */
786 	error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc);
787 	if (error) {
788 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
789 		    "failed\n" , DEVNAME(sc));
790 		goto done;
791 	}
792 
793 	/* get disklabel */
794 	error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label, FREAD,
795 	    curproc);
796 	if (error) {
797 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
798 		    "failed\n", DEVNAME(sc));
799 		error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
800 		goto done;
801 	}
802 
803 	/* we are done, close device */
804 	error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
805 	if (error) {
806 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
807 		    "failed\n", DEVNAME(sc));
808 		goto done;
809 	}
810 
811 	md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
812 	if (md == NULL) {
813 		printf("%s: not enough memory for metadata buffer\n",
814 		    DEVNAME(sc));
815 		goto done;
816 	}
817 
818 	/* create fake sd to use utility functions */
819 	fake_sd = malloc(sizeof(struct sr_discipline) , M_DEVBUF, M_ZERO);
820 	if (fake_sd == NULL) {
821 		printf("%s: not enough memory for fake discipline\n",
822 		    DEVNAME(sc));
823 		goto nosd;
824 	}
825 	fake_sd->sd_sc = sc;
826 	fake_sd->sd_meta_type = SR_META_F_NATIVE;
827 
828 	for (i = 0; i < MAXPARTITIONS; i++) {
829 		if (label.d_partitions[i].p_fstype != FS_RAID)
830 			continue;
831 
832 		/* open partition */
833 		devr = MAKEDISKDEV(majdev, dv->dv_unit, i);
834 		error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc);
835 		if (error) {
836 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
837 			    "open failed, partition %d\n",
838 			    DEVNAME(sc), i);
839 			continue;
840 		}
841 
842 		if (sr_meta_native_read(fake_sd, devr, md, NULL)) {
843 			printf("%s: native bootprobe could not read native "
844 			    "metadata\n", DEVNAME(sc));
845 			continue;
846 		}
847 
848 		/* are we a softraid partition? */
849 		sr_meta_getdevname(sc, devr, devname, sizeof(devname));
850 		if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) {
851 			if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) {
852 				DNPRINTF(SR_D_META, "%s: don't save %s\n",
853 				    DEVNAME(sc), devname);
854 			} else {
855 				/* XXX fix M_WAITOK, this is boot time */
856 				mle = malloc(sizeof(*mle), M_DEVBUF,
857 				    M_WAITOK | M_ZERO);
858 				bcopy(md, &mle->sml_metadata,
859 				    SR_META_SIZE * 512);
860 				mle->sml_mm = devr;
861 				SLIST_INSERT_HEAD(mlh, mle, sml_link);
862 				rv = SR_META_CLAIMED;
863 			}
864 		}
865 
866 		/* we are done, close partition */
867 		error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, curproc);
868 		if (error) {
869 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
870 			    "close failed\n", DEVNAME(sc));
871 			continue;
872 		}
873 	}
874 
875 	free(fake_sd, M_DEVBUF);
876 nosd:
877 	free(md, M_DEVBUF);
878 done:
879 	return (rv);
880 }
881 
882 int
883 sr_boot_assembly(struct sr_softc *sc)
884 {
885 	struct device		*dv;
886 	struct sr_metadata_list_head mlh;
887 	struct sr_metadata_list *mle, *mle2;
888 	struct sr_metadata	*m1, *m2;
889 	struct bioc_createraid	bc;
890 	int			rv = 0, no_dev;
891 	dev_t			*dt = NULL;
892 
893 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
894 
895 	SLIST_INIT(&mlh);
896 
897 	TAILQ_FOREACH(dv, &alldevs, dv_list) {
898 		if (dv->dv_class != DV_DISK)
899 			continue;
900 
901 		/* XXX is there  a better way of excluding some devices? */
902 		if (!strncmp(dv->dv_xname, "fd", 2) ||
903 		    !strncmp(dv->dv_xname, "cd", 2) ||
904 		    !strncmp(dv->dv_xname, "rx", 2))
905 			continue;
906 
907 		/* native softraid uses partitions */
908 		if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED)
909 			continue;
910 
911 		/* probe non-native disks */
912 	}
913 
914 	/*
915 	 * XXX poor mans hack that doesn't keep disks in order and does not
916 	 * roam disks correctly.  replace this with something smarter that
917 	 * orders disks by volid, chunkid and uuid.
918 	 */
919 	dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK);
920 	SLIST_FOREACH(mle, &mlh, sml_link) {
921 		/* chunk used already? */
922 		if (mle->sml_used)
923 			continue;
924 
925 		no_dev = 0;
926 		m1 = (struct sr_metadata *)&mle->sml_metadata;
927 		bzero(dt, BIOC_CRMAXLEN);
928 		SLIST_FOREACH(mle2, &mlh, sml_link) {
929 			/* chunk used already? */
930 			if (mle2->sml_used)
931 				continue;
932 
933 			m2 = (struct sr_metadata *)&mle2->sml_metadata;
934 
935 			/* are we the same volume? */
936 			if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid)
937 				continue;
938 
939 			/* same uuid? */
940 			if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid,
941 			    sizeof(m1->ssdi.ssd_uuid)))
942 				continue;
943 
944 			/* sanity */
945 			if (dt[m2->ssdi.ssd_chunk_id]) {
946 				printf("%s: chunk id already in use; can not "
947 				    "assemble volume\n", DEVNAME(sc));
948 				goto unwind;
949 			}
950 			dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm;
951 			no_dev++;
952 			mle2->sml_used = 1;
953 		}
954 		if (m1->ssdi.ssd_chunk_no != no_dev) {
955 			printf("%s: not assembling partial disk that used to "
956 			    "be volume %d\n", DEVNAME(sc),
957 			    m1->ssdi.ssd_volid);
958 			continue;
959 		}
960 
961 		bzero(&bc, sizeof(bc));
962 		bc.bc_level = m1->ssdi.ssd_level;
963 		bc.bc_dev_list_len = no_dev * sizeof(dev_t);
964 		bc.bc_dev_list = dt;
965 		bc.bc_flags = BIOC_SCDEVT;
966 		sr_ioctl_createraid(sc, &bc, 0);
967 		rv++;
968 	}
969 
970 	/* done with metadata */
971 unwind:
972 	for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
973 		mle2 = SLIST_NEXT(mle, sml_link);
974 		free(mle, M_DEVBUF);
975 	}
976 	SLIST_INIT(&mlh);
977 
978 	if (dt)
979 		free(dt, M_DEVBUF);
980 
981 	return (rv);
982 }
983 
984 int
985 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
986 {
987 	struct disklabel	label;
988 	char			*devname;
989 	int			error, part;
990 	daddr64_t		size;
991 	struct bdevsw		*bdsw;
992 	dev_t			dev;
993 
994 	DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
995 	   DEVNAME(sc), ch_entry->src_devname);
996 
997 	dev = ch_entry->src_dev_mm;
998 	devname = ch_entry->src_devname;
999 	bdsw = bdevsw_lookup(dev);
1000 	part = DISKPART(dev);
1001 
1002 	/* get disklabel */
1003 	error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label, FREAD, curproc);
1004 	if (error) {
1005 		DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
1006 		    DEVNAME(sc), devname);
1007 		goto unwind;
1008 	}
1009 
1010 	/* make sure the partition is of the right type */
1011 	if (label.d_partitions[part].p_fstype != FS_RAID) {
1012 		DNPRINTF(SR_D_META,
1013 		    "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc) ,
1014 		        devname,
1015 		    label.d_partitions[part].p_fstype);
1016 		goto unwind;
1017 	}
1018 
1019 	size = DL_GETPSIZE(&label.d_partitions[part]) -
1020 	    SR_META_SIZE - SR_META_OFFSET;
1021 	if (size <= 0) {
1022 		DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
1023 		    devname);
1024 		goto unwind;
1025 	}
1026 	ch_entry->src_size = size;
1027 
1028 	DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
1029 	    devname, size);
1030 
1031 	return (SR_META_F_NATIVE);
1032 unwind:
1033 	DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
1034 	    devname ? devname : "nodev");
1035 	return (SR_META_F_INVALID);
1036 }
1037 
1038 int
1039 sr_meta_native_attach(struct sr_discipline *sd, int force)
1040 {
1041 	struct sr_softc		*sc = sd->sd_sc;
1042 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
1043 	struct sr_metadata	*md = NULL;
1044 	struct sr_chunk		*ch_entry;
1045 	struct sr_uuid		uuid;
1046 	int			sr, not_sr, rv = 1, d, expected = -1;
1047 
1048 	DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
1049 
1050 	md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
1051 	if (md == NULL) {
1052 		printf("%s: not enough memory for metadata buffer\n",
1053 		    DEVNAME(sc));
1054 		goto bad;
1055 	}
1056 
1057 	bzero(&uuid, sizeof uuid);
1058 
1059 	sr = not_sr = d = 0;
1060 	SLIST_FOREACH(ch_entry, cl, src_link) {
1061 		if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
1062 			printf("%s: could not read native metadata\n",
1063 			    DEVNAME(sc));
1064 			goto bad;
1065 		}
1066 
1067 		if (md->ssdi.ssd_magic == SR_MAGIC) {
1068 			sr++;
1069 			if (d == 0) {
1070 				bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
1071 				expected = md->ssdi.ssd_chunk_no;
1072 				continue;
1073 			} else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
1074 			    sizeof uuid)) {
1075 				printf("%s: not part of the same volume\n",
1076 				    DEVNAME(sc));
1077 				goto bad;
1078 			}
1079 		} else
1080 			not_sr++;
1081 
1082 		d++;
1083 	}
1084 
1085 	if (sr && not_sr) {
1086 		printf("%s: not all chunks are of the native metadata format",
1087 		     DEVNAME(sc));
1088 		goto bad;
1089 	}
1090 	if (expected != sr && !force && expected != -1) {
1091 		/* XXX make this smart so that we can bring up degraded disks */
1092 		printf("%s: not all chunks were provided\n", DEVNAME(sc));
1093 		goto bad;
1094 	}
1095 
1096 	rv = 0;
1097 bad:
1098 	if (md)
1099 		free(md, M_DEVBUF);
1100 	return (rv);
1101 }
1102 
1103 int
1104 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
1105     struct sr_metadata *md, void *fm)
1106 {
1107 #ifdef SR_DEBUG
1108 	struct sr_softc		*sc = sd->sd_sc;
1109 #endif
1110 	DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
1111 	    DEVNAME(sc), dev, md);
1112 
1113 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1114 	    B_READ));
1115 }
1116 
1117 int
1118 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
1119     struct sr_metadata *md, void *fm)
1120 {
1121 #ifdef SR_DEBUG
1122 	struct sr_softc		*sc = sd->sd_sc;
1123 #endif
1124 	DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
1125 	    DEVNAME(sc), dev, md);
1126 
1127 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1128 	    B_WRITE));
1129 }
1130 
1131 struct scsi_adapter sr_switch = {
1132 	sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl
1133 };
1134 
1135 struct scsi_device sr_dev = {
1136 	NULL, NULL, NULL, NULL
1137 };
1138 
1139 int
1140 sr_match(struct device *parent, void *match, void *aux)
1141 {
1142 	return (1);
1143 }
1144 
1145 void
1146 sr_attach(struct device *parent, struct device *self, void *aux)
1147 {
1148 	struct sr_softc		*sc = (void *)self;
1149 
1150 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
1151 
1152 	rw_init(&sc->sc_lock, "sr_lock");
1153 
1154 	if (bio_register(&sc->sc_dev, sr_ioctl) != 0)
1155 		printf("%s: controller registration failed", DEVNAME(sc));
1156 	else
1157 		sc->sc_ioctl = sr_ioctl;
1158 
1159 	printf("\n");
1160 
1161 	sr_boot_assembly(sc);
1162 }
1163 
1164 int
1165 sr_detach(struct device *self, int flags)
1166 {
1167 	return (0);
1168 }
1169 
1170 int
1171 sr_activate(struct device *self, enum devact act)
1172 {
1173 	return (1);
1174 }
1175 
1176 void
1177 sr_minphys(struct buf *bp)
1178 {
1179 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
1180 
1181 	/* XXX currently using SR_MAXFER = MAXPHYS */
1182 	if (bp->b_bcount > SR_MAXFER)
1183 		bp->b_bcount = SR_MAXFER;
1184 	minphys(bp);
1185 }
1186 
1187 void
1188 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
1189 {
1190 	size_t			copy_cnt;
1191 
1192 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
1193 	    xs, size);
1194 
1195 	if (xs->datalen) {
1196 		copy_cnt = MIN(size, xs->datalen);
1197 		bcopy(v, xs->data, copy_cnt);
1198 	}
1199 }
1200 
1201 int
1202 sr_ccb_alloc(struct sr_discipline *sd)
1203 {
1204 	struct sr_ccb		*ccb;
1205 	int			i;
1206 
1207 	if (!sd)
1208 		return (1);
1209 
1210 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
1211 
1212 	if (sd->sd_ccb)
1213 		return (1);
1214 
1215 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
1216 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
1217 	TAILQ_INIT(&sd->sd_ccb_freeq);
1218 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
1219 		ccb = &sd->sd_ccb[i];
1220 		ccb->ccb_dis = sd;
1221 		sr_ccb_put(ccb);
1222 	}
1223 
1224 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
1225 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
1226 
1227 	return (0);
1228 }
1229 
1230 void
1231 sr_ccb_free(struct sr_discipline *sd)
1232 {
1233 	struct sr_ccb		*ccb;
1234 
1235 	if (!sd)
1236 		return;
1237 
1238 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
1239 
1240 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
1241 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1242 
1243 	if (sd->sd_ccb)
1244 		free(sd->sd_ccb, M_DEVBUF);
1245 }
1246 
1247 struct sr_ccb *
1248 sr_ccb_get(struct sr_discipline *sd)
1249 {
1250 	struct sr_ccb		*ccb;
1251 	int			s;
1252 
1253 	s = splbio();
1254 
1255 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
1256 	if (ccb) {
1257 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1258 		ccb->ccb_state = SR_CCB_INPROGRESS;
1259 	}
1260 
1261 	splx(s);
1262 
1263 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
1264 	    ccb);
1265 
1266 	return (ccb);
1267 }
1268 
1269 void
1270 sr_ccb_put(struct sr_ccb *ccb)
1271 {
1272 	struct sr_discipline	*sd = ccb->ccb_dis;
1273 	int			s;
1274 
1275 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
1276 	    ccb);
1277 
1278 	s = splbio();
1279 
1280 	ccb->ccb_wu = NULL;
1281 	ccb->ccb_state = SR_CCB_FREE;
1282 	ccb->ccb_target = -1;
1283 	ccb->ccb_opaque = NULL;
1284 
1285 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
1286 
1287 	splx(s);
1288 }
1289 
1290 int
1291 sr_wu_alloc(struct sr_discipline *sd)
1292 {
1293 	struct sr_workunit	*wu;
1294 	int			i, no_wu;
1295 
1296 	if (!sd)
1297 		return (1);
1298 
1299 	DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
1300 	    sd, sd->sd_max_wu);
1301 
1302 	if (sd->sd_wu)
1303 		return (1);
1304 
1305 	no_wu = sd->sd_max_wu;
1306 	sd->sd_wu_pending = no_wu;
1307 
1308 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
1309 	    M_DEVBUF, M_WAITOK | M_ZERO);
1310 	TAILQ_INIT(&sd->sd_wu_freeq);
1311 	TAILQ_INIT(&sd->sd_wu_pendq);
1312 	TAILQ_INIT(&sd->sd_wu_defq);
1313 	for (i = 0; i < no_wu; i++) {
1314 		wu = &sd->sd_wu[i];
1315 		wu->swu_dis = sd;
1316 		sr_wu_put(wu);
1317 	}
1318 
1319 	return (0);
1320 }
1321 
1322 void
1323 sr_wu_free(struct sr_discipline *sd)
1324 {
1325 	struct sr_workunit	*wu;
1326 
1327 	if (!sd)
1328 		return;
1329 
1330 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
1331 
1332 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
1333 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1334 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
1335 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
1336 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
1337 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
1338 
1339 	if (sd->sd_wu)
1340 		free(sd->sd_wu, M_DEVBUF);
1341 }
1342 
1343 void
1344 sr_wu_put(struct sr_workunit *wu)
1345 {
1346 	struct sr_discipline	*sd = wu->swu_dis;
1347 	struct sr_ccb		*ccb;
1348 
1349 	int			s;
1350 
1351 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
1352 
1353 	s = splbio();
1354 
1355 	wu->swu_xs = NULL;
1356 	wu->swu_state = SR_WU_FREE;
1357 	wu->swu_ios_complete = 0;
1358 	wu->swu_ios_failed = 0;
1359 	wu->swu_ios_succeeded = 0;
1360 	wu->swu_io_count = 0;
1361 	wu->swu_blk_start = 0;
1362 	wu->swu_blk_end = 0;
1363 	wu->swu_collider = NULL;
1364 	wu->swu_fake = 0;
1365 
1366 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1367 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1368 		sr_ccb_put(ccb);
1369 	}
1370 	TAILQ_INIT(&wu->swu_ccb);
1371 
1372 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
1373 	sd->sd_wu_pending--;
1374 
1375 	splx(s);
1376 }
1377 
1378 struct sr_workunit *
1379 sr_wu_get(struct sr_discipline *sd)
1380 {
1381 	struct sr_workunit	*wu;
1382 	int			s;
1383 
1384 	s = splbio();
1385 
1386 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
1387 	if (wu) {
1388 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1389 		wu->swu_state = SR_WU_INPROGRESS;
1390 	}
1391 	sd->sd_wu_pending++;
1392 
1393 	splx(s);
1394 
1395 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
1396 
1397 	return (wu);
1398 }
1399 
1400 int
1401 sr_scsi_cmd(struct scsi_xfer *xs)
1402 {
1403 	int			s;
1404 	struct scsi_link	*link = xs->sc_link;
1405 	struct sr_softc		*sc = link->adapter_softc;
1406 	struct sr_workunit	*wu;
1407 	struct sr_discipline	*sd;
1408 
1409 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p "
1410 	    "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags);
1411 
1412 	sd = sc->sc_dis[link->scsibus];
1413 	if (sd == NULL) {
1414 		s = splhigh();
1415 		sd = sc->sc_attach_dis;
1416 		splx(s);
1417 
1418 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n",
1419 		    DEVNAME(sc), sd);
1420 		if (sd == NULL) {
1421 			wu = NULL;
1422 			printf("%s: sr_scsi_cmd NULL discipline\n",
1423 			    DEVNAME(sc));
1424 			goto stuffup;
1425 		}
1426 	}
1427 
1428 	if (sd->sd_deleted) {
1429 		printf("%s: %s device is being deleted, failing io\n",
1430 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
1431 		goto stuffup;
1432 	}
1433 
1434 	if ((wu = sr_wu_get(sd)) == NULL) {
1435 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc));
1436 		return (TRY_AGAIN_LATER);
1437 	}
1438 
1439 	xs->error = XS_NOERROR;
1440 	wu->swu_xs = xs;
1441 
1442 	switch (xs->cmd->opcode) {
1443 	case READ_COMMAND:
1444 	case READ_BIG:
1445 	case READ_16:
1446 	case WRITE_COMMAND:
1447 	case WRITE_BIG:
1448 	case WRITE_16:
1449 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
1450 		    DEVNAME(sc), xs->cmd->opcode);
1451 		if (sd->sd_scsi_rw(wu))
1452 			goto stuffup;
1453 		break;
1454 
1455 	case SYNCHRONIZE_CACHE:
1456 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
1457 		    DEVNAME(sc));
1458 		if (sd->sd_scsi_sync(wu))
1459 			goto stuffup;
1460 		goto complete;
1461 
1462 	case TEST_UNIT_READY:
1463 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
1464 		    DEVNAME(sc));
1465 		if (sd->sd_scsi_tur(wu))
1466 			goto stuffup;
1467 		goto complete;
1468 
1469 	case START_STOP:
1470 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
1471 		    DEVNAME(sc));
1472 		if (sd->sd_scsi_start_stop(wu))
1473 			goto stuffup;
1474 		goto complete;
1475 
1476 	case INQUIRY:
1477 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
1478 		    DEVNAME(sc));
1479 		if (sd->sd_scsi_inquiry(wu))
1480 			goto stuffup;
1481 		goto complete;
1482 
1483 	case READ_CAPACITY:
1484 	case READ_CAPACITY_16:
1485 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
1486 		    DEVNAME(sc), xs->cmd->opcode);
1487 		if (sd->sd_scsi_read_cap(wu))
1488 			goto stuffup;
1489 		goto complete;
1490 
1491 	case REQUEST_SENSE:
1492 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
1493 		    DEVNAME(sc));
1494 		if (sd->sd_scsi_req_sense(wu))
1495 			goto stuffup;
1496 		goto complete;
1497 
1498 	default:
1499 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
1500 		    DEVNAME(sc), xs->cmd->opcode);
1501 		/* XXX might need to add generic function to handle others */
1502 		goto stuffup;
1503 	}
1504 
1505 	return (SUCCESSFULLY_QUEUED);
1506 stuffup:
1507 	if (sd->sd_scsi_sense.error_code) {
1508 		xs->error = XS_SENSE;
1509 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
1510 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
1511 	} else {
1512 		xs->error = XS_DRIVER_STUFFUP;
1513 		xs->flags |= ITSDONE;
1514 	}
1515 complete:
1516 	s = splbio();
1517 	scsi_done(xs);
1518 	splx(s);
1519 	if (wu)
1520 		sr_wu_put(wu);
1521 	return (COMPLETE);
1522 }
1523 int
1524 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag,
1525     struct proc *p)
1526 {
1527 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
1528 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
1529 
1530 	return (sr_ioctl(link->adapter_softc, cmd, addr));
1531 }
1532 
1533 int
1534 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr)
1535 {
1536 	struct sr_softc		*sc = (struct sr_softc *)dev;
1537 	int			rv = 0;
1538 
1539 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc));
1540 
1541 	rw_enter_write(&sc->sc_lock);
1542 
1543 	switch (cmd) {
1544 	case BIOCINQ:
1545 		DNPRINTF(SR_D_IOCTL, "inq\n");
1546 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
1547 		break;
1548 
1549 	case BIOCVOL:
1550 		DNPRINTF(SR_D_IOCTL, "vol\n");
1551 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
1552 		break;
1553 
1554 	case BIOCDISK:
1555 		DNPRINTF(SR_D_IOCTL, "disk\n");
1556 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
1557 		break;
1558 
1559 	case BIOCALARM:
1560 		DNPRINTF(SR_D_IOCTL, "alarm\n");
1561 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
1562 		break;
1563 
1564 	case BIOCBLINK:
1565 		DNPRINTF(SR_D_IOCTL, "blink\n");
1566 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
1567 		break;
1568 
1569 	case BIOCSETSTATE:
1570 		DNPRINTF(SR_D_IOCTL, "setstate\n");
1571 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
1572 		break;
1573 
1574 	case BIOCCREATERAID:
1575 		DNPRINTF(SR_D_IOCTL, "createraid\n");
1576 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
1577 		break;
1578 
1579 	case BIOCDELETERAID:
1580 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
1581 		break;
1582 	default:
1583 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
1584 		rv = ENOTTY;
1585 	}
1586 
1587 	rw_exit_write(&sc->sc_lock);
1588 
1589 	return (rv);
1590 }
1591 
1592 int
1593 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
1594 {
1595 	int			i, vol, disk;
1596 
1597 	for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++)
1598 		/* XXX this will not work when we stagger disciplines */
1599 		if (sc->sc_dis[i]) {
1600 			vol++;
1601 			disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
1602 		}
1603 
1604 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
1605 	bi->bi_novol = vol;
1606 	bi->bi_nodisk = disk;
1607 
1608 	return (0);
1609 }
1610 
1611 int
1612 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
1613 {
1614 	int			i, vol, rv = EINVAL;
1615 	struct sr_discipline	*sd;
1616 
1617 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1618 		/* XXX this will not work when we stagger disciplines */
1619 		if (sc->sc_dis[i])
1620 			vol++;
1621 		if (vol != bv->bv_volid)
1622 			continue;
1623 
1624 		sd = sc->sc_dis[i];
1625 		bv->bv_status = sd->sd_vol_status;
1626 		bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
1627 		bv->bv_level = sd->sd_meta->ssdi.ssd_level;
1628 		bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
1629 		strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
1630 		    sizeof(bv->bv_dev));
1631 		strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
1632 		    sizeof(bv->bv_vendor));
1633 		rv = 0;
1634 		break;
1635 	}
1636 
1637 	return (rv);
1638 }
1639 
1640 int
1641 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
1642 {
1643 	int			i, vol, rv = EINVAL, id;
1644 	struct sr_chunk		*src;
1645 
1646 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1647 		/* XXX this will not work when we stagger disciplines */
1648 		if (sc->sc_dis[i])
1649 			vol++;
1650 		if (vol != bd->bd_volid)
1651 			continue;
1652 
1653 		id = bd->bd_diskid;
1654 		if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
1655 			break;
1656 
1657 		src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
1658 		bd->bd_status = src->src_meta.scm_status;
1659 		bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
1660 		bd->bd_channel = vol;
1661 		bd->bd_target = id;
1662 		strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
1663 		    sizeof(bd->bd_vendor));
1664 		rv = 0;
1665 		break;
1666 	}
1667 
1668 	return (rv);
1669 }
1670 
1671 int
1672 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
1673 {
1674 	int			rv = EINVAL;
1675 
1676 #ifdef SR_UNIT_TEST
1677 	int			i, vol, state;
1678 	struct sr_discipline	*sd;
1679 
1680 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1681 		/* XXX this will not work when we stagger disciplines */
1682 		if (sc->sc_dis[i])
1683 			vol++;
1684 		if (vol != bs->bs_channel)
1685 			continue;
1686 
1687 		sd = sc->sc_dis[vol];
1688 		if (bs->bs_target >= sd->sd_meta->ssdi.ssd_chunk_no)
1689 			goto done;
1690 
1691 		switch (bs->bs_status) {
1692 		case BIOC_SSONLINE:
1693 			state = BIOC_SDONLINE;
1694 			break;
1695 		case BIOC_SSOFFLINE:
1696 			state = BIOC_SDOFFLINE;
1697 			break;
1698 		case BIOC_SSHOTSPARE:
1699 			state = BIOC_SDHOTSPARE;
1700 			break;
1701 		case BIOC_SSREBUILD:
1702 			state = BIOC_SDREBUILD;
1703 			break;
1704 		default:
1705 			printf("invalid state %d\n", bs->bs_status);
1706 			goto done;
1707 		}
1708 
1709 		printf("status change for %u:%u -> %u %u\n",
1710 		    bs->bs_channel, bs->bs_target, bs->bs_status, state);
1711 
1712 		sd->sd_set_chunk_state(sd, bs->bs_target, bs->bs_status);
1713 
1714 		rv = 0;
1715 
1716 		break;
1717 	}
1718 
1719 done:
1720 #endif
1721 	return (rv);
1722 }
1723 
1724 int
1725 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
1726 {
1727 	dev_t			*dt;
1728 	int			i, s, no_chunk, rv = EINVAL, vol;
1729 	int			no_meta, updatemeta = 0;
1730 	u_int64_t		vol_size;
1731 	int32_t			strip_size = 0;
1732 	struct sr_chunk_head	*cl;
1733 	struct sr_discipline	*sd = NULL;
1734 	struct sr_chunk		*ch_entry;
1735 	struct device		*dev, *dev2;
1736 	struct scsibus_attach_args saa;
1737 
1738 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
1739 	    DEVNAME(sc), user);
1740 
1741 	/* user input */
1742 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
1743 		goto unwind;
1744 
1745 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
1746 	if (user)
1747 		copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len);
1748 	else
1749 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
1750 
1751 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
1752 	sd->sd_sc = sc;
1753 
1754 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
1755 	cl = &sd->sd_vol.sv_chunk_list;
1756 	SLIST_INIT(cl);
1757 
1758 	/* we have a valid list now create an array index */
1759 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * no_chunk,
1760 	    M_DEVBUF, M_WAITOK | M_ZERO);
1761 
1762 	sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
1763 	if (sd->sd_meta_type == SR_META_F_INVALID) {
1764 		printf("%s: invalid metadata format\n", DEVNAME(sc));
1765 		goto unwind;
1766 	}
1767 
1768 	if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) {
1769 		printf("%s: can't attach metadata type %d\n", DEVNAME(sc),
1770 		    sd->sd_meta_type);
1771 		goto unwind;
1772 	}
1773 
1774 	/* force the raid volume by clearing metadata region */
1775 	if (bc->bc_flags & BIOC_SCFORCE) {
1776 		/* make sure disk isn't up and running */
1777 		if (sr_meta_read(sd))
1778 			if (sr_already_assembled(sd)) {
1779 				printf("%s: disk ", DEVNAME(sc));
1780 				sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1781 				printf(" is currently in use; can't force "
1782 				    "create\n");
1783 				goto unwind;
1784 			}
1785 
1786 		if (sr_meta_clear(sd)) {
1787 			printf("%s: failed to clear metadata\n", DEVNAME(sc));
1788 			goto unwind;
1789 		}
1790 	}
1791 
1792 	if ((no_meta = sr_meta_read(sd)) == 0) {
1793 		/* fill out all chunk metadata */
1794 		sr_meta_chunks_create(sc, cl);
1795 		ch_entry = SLIST_FIRST(cl);
1796 
1797 		/* no metadata available */
1798 		switch (bc->bc_level) {
1799 		case 0:
1800 			if (no_chunk < 2)
1801 				goto unwind;
1802 			strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name));
1803 			/*
1804 			 * XXX add variable strip size later even though
1805 			 * MAXPHYS is really the clever value, users like
1806 			 * to tinker with that type of stuff
1807 			 */
1808 			strip_size = MAXPHYS;
1809 			vol_size =
1810 			    ch_entry->src_meta.scmi.scm_coerced_size * no_chunk;
1811 			break;
1812 		case 1:
1813 			if (no_chunk < 2)
1814 				goto unwind;
1815 			strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
1816 			vol_size = ch_entry->src_meta.scmi.scm_coerced_size;
1817 			break;
1818 #ifdef CRYPTO
1819 		case 'C':
1820 			DNPRINTF(SR_D_IOCTL,
1821 			    "%s: sr_ioctl_createraid: no_chunk %d\n",
1822 			    DEVNAME(sc), no_chunk);
1823 
1824 			if (no_chunk != 1)
1825 				goto unwind;
1826 
1827 			/* no hint available yet */
1828 			if (bc->bc_opaque_flags & BIOC_SOOUT) {
1829 				bc->bc_opaque_status = BIOC_SOINOUT_FAILED;
1830 				rv = 0;
1831 				goto unwind;
1832 			}
1833 
1834 			if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE))
1835 				goto unwind;
1836 
1837 			if (sr_crypto_get_kdf(bc, sd))
1838 				goto unwind;
1839 
1840 			strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name));
1841 			vol_size = ch_entry->src_meta.scmi.scm_size;
1842 
1843 			sr_crypto_create_keys(sd);
1844 
1845 			break;
1846 #endif /* CRYPTO */
1847 		default:
1848 			goto unwind;
1849 		}
1850 
1851 		/* fill out all volume metadata */
1852 		DNPRINTF(SR_D_IOCTL,
1853 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
1854 		    DEVNAME(sc), vol_size);
1855 		sd->sd_meta->ssdi.ssd_chunk_no = no_chunk;
1856 		sd->sd_meta->ssdi.ssd_size = vol_size;
1857 		sd->sd_vol_status = BIOC_SVONLINE;
1858 		sd->sd_meta->ssdi.ssd_level = bc->bc_level;
1859 		sd->sd_meta->ssdi.ssd_strip_size = strip_size;
1860 		strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD",
1861 		    sizeof(sd->sd_meta->ssdi.ssd_vendor));
1862 		snprintf(sd->sd_meta->ssdi.ssd_product,
1863 		    sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s",
1864 		    sd->sd_name);
1865 		snprintf(sd->sd_meta->ssdi.ssd_revision,
1866 		    sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d",
1867 		    SR_META_VERSION);
1868 
1869 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
1870 		updatemeta = 1;
1871 	} else if (no_meta == no_chunk) {
1872 		if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
1873 			printf("%s: %s was not shutdown properly\n",
1874 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
1875 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
1876 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
1877 			    "metadata\n", DEVNAME(sc));
1878 			goto unwind;
1879 		}
1880 		if (sr_already_assembled(sd)) {
1881 			printf("%s: disk ", DEVNAME(sc));
1882 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1883 			printf(" already assembled\n");
1884 			goto unwind;
1885 		}
1886 #ifdef CRYPTO
1887 		/* provide userland with kdf hint */
1888 		if (bc->bc_opaque_flags & BIOC_SOOUT) {
1889 			if (bc->bc_opaque == NULL)
1890 				goto unwind;
1891 
1892 			if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) <
1893 			    bc->bc_opaque_size)
1894 				goto unwind;
1895 
1896 			if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint,
1897 			    bc->bc_opaque, bc->bc_opaque_size))
1898 				goto unwind;
1899 
1900 			/* we're done */
1901 			bc->bc_opaque_status = BIOC_SOINOUT_OK;
1902 			rv = 0;
1903 			goto unwind;
1904 		}
1905 		/* get kdf with maskkey from userland */
1906 		if (bc->bc_opaque_flags & BIOC_SOIN) {
1907 			if (sr_crypto_get_kdf(bc, sd))
1908 				goto unwind;
1909 		}
1910 #endif	/* CRYPTO */
1911 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
1912 		    DEVNAME(sc));
1913 		updatemeta = 0;
1914 	} else if (no_meta == -1) {
1915 		printf("%s: one of the chunks has corrupt metadata; aborting "
1916 		    "assembly\n", DEVNAME(sc));
1917 		goto unwind;
1918 	} else {
1919 		if (sr_already_assembled(sd)) {
1920 			printf("%s: disk ", DEVNAME(sc));
1921 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1922 			printf(" already assembled; will not partial "
1923 			    "assemble it\n");
1924 			goto unwind;
1925 		}
1926 		printf("%s: not yet partial bringup\n", DEVNAME(sc));
1927 		goto unwind;
1928 	}
1929 
1930 	/* XXX metadata SHALL be fully filled in at this point */
1931 
1932 	switch (bc->bc_level) {
1933 	case 0:
1934 		/* fill out discipline members */
1935 		sd->sd_type = SR_MD_RAID0;
1936 		sd->sd_max_ccb_per_wu =
1937 		    (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) *
1938 		    SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no;
1939 		sd->sd_max_wu = SR_RAID0_NOWU;
1940 
1941 		/* setup discipline pointers */
1942 		sd->sd_alloc_resources = sr_raid0_alloc_resources;
1943 		sd->sd_free_resources = sr_raid0_free_resources;
1944 		sd->sd_scsi_inquiry = sr_raid_inquiry;
1945 		sd->sd_scsi_read_cap = sr_raid_read_cap;
1946 		sd->sd_scsi_tur = sr_raid_tur;
1947 		sd->sd_scsi_req_sense = sr_raid_request_sense;
1948 		sd->sd_scsi_start_stop = sr_raid_start_stop;
1949 		sd->sd_scsi_sync = sr_raid_sync;
1950 		sd->sd_scsi_rw = sr_raid0_rw;
1951 		sd->sd_set_chunk_state = sr_raid0_set_chunk_state;
1952 		sd->sd_set_vol_state = sr_raid0_set_vol_state;
1953 		break;
1954 	case 1:
1955 		/* fill out discipline members */
1956 		sd->sd_type = SR_MD_RAID1;
1957 		sd->sd_max_ccb_per_wu = no_chunk;
1958 		sd->sd_max_wu = SR_RAID1_NOWU;
1959 
1960 		/* setup discipline pointers */
1961 		sd->sd_alloc_resources = sr_raid1_alloc_resources;
1962 		sd->sd_free_resources = sr_raid1_free_resources;
1963 		sd->sd_scsi_inquiry = sr_raid_inquiry;
1964 		sd->sd_scsi_read_cap = sr_raid_read_cap;
1965 		sd->sd_scsi_tur = sr_raid_tur;
1966 		sd->sd_scsi_req_sense = sr_raid_request_sense;
1967 		sd->sd_scsi_start_stop = sr_raid_start_stop;
1968 		sd->sd_scsi_sync = sr_raid_sync;
1969 		sd->sd_scsi_rw = sr_raid1_rw;
1970 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
1971 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
1972 		break;
1973 #ifdef CRYPTO
1974 	case 'C':
1975 		/* fill out discipline members */
1976 		sd->sd_type = SR_MD_CRYPTO;
1977 		sd->sd_max_ccb_per_wu = no_chunk;
1978 		sd->sd_max_wu = SR_CRYPTO_NOWU;
1979 
1980 		/* setup discipline pointers */
1981 		sd->sd_alloc_resources = sr_crypto_alloc_resources;
1982 		sd->sd_free_resources = sr_crypto_free_resources;
1983 		sd->sd_scsi_inquiry = sr_raid_inquiry;
1984 		sd->sd_scsi_read_cap = sr_raid_read_cap;
1985 		sd->sd_scsi_tur = sr_raid_tur;
1986 		sd->sd_scsi_req_sense = sr_raid_request_sense;
1987 		sd->sd_scsi_start_stop = sr_raid_start_stop;
1988 		sd->sd_scsi_sync = sr_raid_sync;
1989 		sd->sd_scsi_rw = sr_crypto_rw;
1990 		/* XXX reuse raid 1 functions for now FIXME */
1991 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
1992 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
1993 		break;
1994 #endif
1995 	default:
1996 		goto unwind;
1997 	}
1998 
1999 	/* allocate all resources */
2000 	if ((rv = sd->sd_alloc_resources(sd)))
2001 		goto unwind;
2002 
2003 	/* setup scsi midlayer */
2004 	sd->sd_link.openings = sd->sd_max_wu;
2005 	sd->sd_link.device = &sr_dev;
2006 	sd->sd_link.device_softc = sc;
2007 	sd->sd_link.adapter_softc = sc;
2008 	sd->sd_link.adapter = &sr_switch;
2009 	sd->sd_link.adapter_target = SR_MAX_LD;
2010 	sd->sd_link.adapter_buswidth = 1;
2011 	bzero(&saa, sizeof(saa));
2012 	saa.saa_sc_link = &sd->sd_link;
2013 
2014 	/* we passed all checks return ENXIO if volume can't be created */
2015 	rv = ENXIO;
2016 
2017 	/* clear sense data */
2018 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2019 
2020 	/* use temporary discipline pointer */
2021 	s = splhigh();
2022 	sc->sc_attach_dis = sd;
2023 	splx(s);
2024 	dev2 = config_found(&sc->sc_dev, &saa, scsiprint);
2025 	s = splhigh();
2026 	sc->sc_attach_dis = NULL;
2027 	splx(s);
2028 	TAILQ_FOREACH(dev, &alldevs, dv_list)
2029 		if (dev->dv_parent == dev2)
2030 			break;
2031 	if (dev == NULL)
2032 		goto unwind;
2033 
2034 	DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n",
2035 	    DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus);
2036 
2037 	sc->sc_dis[sd->sd_link.scsibus] = sd;
2038 	for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++)
2039 		if (sc->sc_dis[i])
2040 			vol++;
2041 
2042 	rv = 0;
2043 	if (updatemeta) {
2044 		/* fill out remaining volume metadata */
2045 		sd->sd_meta->ssdi.ssd_volid = vol;
2046 		strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
2047 		    sizeof(sd->sd_meta->ssd_devname));
2048 		sr_meta_init(sd, cl);
2049 	} else {
2050 		if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
2051 		    sizeof(dev->dv_xname))) {
2052 			printf("%s: volume %s is roaming, it used to be %s, "
2053 			    "updating metadata\n",
2054 			    DEVNAME(sc), dev->dv_xname,
2055 			    sd->sd_meta->ssd_devname);
2056 
2057 			sd->sd_meta->ssdi.ssd_volid = vol;
2058 			strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
2059 			    sizeof(sd->sd_meta->ssd_devname));
2060 		}
2061 	}
2062 
2063 	/* save metadata to disk */
2064 	rv = sr_meta_save(sd, SR_META_DIRTY);
2065 
2066 #ifndef SMALL_KERNEL
2067 	if (sr_sensors_create(sd))
2068 		printf("%s: unable to create sensor for %s\n", DEVNAME(sc),
2069 		    dev->dv_xname);
2070 	else
2071 		sd->sd_vol.sv_sensor_valid = 1;
2072 #endif /* SMALL_KERNEL */
2073 
2074 	sd->sd_scsibus_dev = dev2;
2075 	sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd);
2076 
2077 	return (rv);
2078 
2079 unwind:
2080 	sr_discipline_shutdown(sd);
2081 
2082 	return (rv);
2083 }
2084 
2085 int
2086 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
2087 {
2088 	struct sr_discipline	*sd = NULL;
2089 	int			rv = 1;
2090 	int			i;
2091 
2092 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
2093 	    dr->bd_dev);
2094 
2095 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2096 		if (sc->sc_dis[i]) {
2097 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, dr->bd_dev,
2098 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
2099 				sd = sc->sc_dis[i];
2100 				break;
2101 			}
2102 		}
2103 
2104 	if (sd == NULL)
2105 		goto bad;
2106 
2107 	sd->sd_deleted = 1;
2108 	sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE;
2109 	sr_shutdown(sd);
2110 
2111 	rv = 0;
2112 bad:
2113 	return (rv);
2114 }
2115 
2116 void
2117 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
2118 {
2119 	struct sr_chunk		*ch_entry, *ch_next;
2120 	dev_t			dev;
2121 
2122 	DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
2123 
2124 	if (!cl)
2125 		return;
2126 
2127 	for (ch_entry = SLIST_FIRST(cl);
2128 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
2129 		ch_next = SLIST_NEXT(ch_entry, src_link);
2130 
2131 		dev = ch_entry->src_dev_mm;
2132 		DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
2133 		    DEVNAME(sc), ch_entry->src_devname);
2134 		if (dev != NODEV)
2135 			bdevsw_lookup(dev)->d_close(dev, FWRITE, S_IFBLK,
2136 			    curproc);
2137 
2138 		free(ch_entry, M_DEVBUF);
2139 	}
2140 	SLIST_INIT(cl);
2141 }
2142 
2143 void
2144 sr_discipline_free(struct sr_discipline *sd)
2145 {
2146 	struct sr_softc		*sc = sd->sd_sc;
2147 	int			i;
2148 
2149 	if (!sd)
2150 		return;
2151 
2152 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
2153 	    DEVNAME(sc),
2154 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
2155 	if (sd->sd_free_resources)
2156 		sd->sd_free_resources(sd);
2157 	if (sd->sd_vol.sv_chunks)
2158 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
2159 	if (sd->sd_meta)
2160 		free(sd->sd_meta, M_DEVBUF);
2161 	if (sd->sd_meta_foreign)
2162 		free(sd->sd_meta_foreign, M_DEVBUF);
2163 
2164 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2165 		if (sc->sc_dis[i] == sd) {
2166 			sc->sc_dis[i] = NULL;
2167 			break;
2168 		}
2169 
2170 	free(sd, M_DEVBUF);
2171 }
2172 
2173 void
2174 sr_discipline_shutdown(struct sr_discipline *sd)
2175 {
2176 	struct sr_softc		*sc = sd->sd_sc;
2177 	int			s;
2178 
2179 	if (!sd || !sc)
2180 		return;
2181 
2182 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
2183 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
2184 
2185 	s = splbio();
2186 
2187 	if (sd->sd_shutdownhook)
2188 		shutdownhook_disestablish(sd->sd_shutdownhook);
2189 
2190 	/* make sure there isn't a sync pending and yield */
2191 	wakeup(sd);
2192 	while (sd->sd_sync || sd->sd_must_flush)
2193 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
2194 		    EWOULDBLOCK)
2195 			break;
2196 
2197 #ifndef SMALL_KERNEL
2198 	sr_sensors_delete(sd);
2199 #endif /* SMALL_KERNEL */
2200 
2201 	if (sd->sd_scsibus_dev)
2202 		config_detach(sd->sd_scsibus_dev, DETACH_FORCE);
2203 
2204 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
2205 
2206 	if (sd)
2207 		sr_discipline_free(sd);
2208 
2209 	splx(s);
2210 }
2211 
2212 int
2213 sr_raid_inquiry(struct sr_workunit *wu)
2214 {
2215 	struct sr_discipline	*sd = wu->swu_dis;
2216 	struct scsi_xfer	*xs = wu->swu_xs;
2217 	struct scsi_inquiry_data inq;
2218 
2219 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
2220 
2221 	bzero(&inq, sizeof(inq));
2222 	inq.device = T_DIRECT;
2223 	inq.dev_qual2 = 0;
2224 	inq.version = 2;
2225 	inq.response_format = 2;
2226 	inq.additional_length = 32;
2227 	strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
2228 	    sizeof(inq.vendor));
2229 	strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
2230 	    sizeof(inq.product));
2231 	strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
2232 	    sizeof(inq.revision));
2233 	sr_copy_internal_data(xs, &inq, sizeof(inq));
2234 
2235 	return (0);
2236 }
2237 
2238 int
2239 sr_raid_read_cap(struct sr_workunit *wu)
2240 {
2241 	struct sr_discipline	*sd = wu->swu_dis;
2242 	struct scsi_xfer	*xs = wu->swu_xs;
2243 	struct scsi_read_cap_data rcd;
2244 	struct scsi_read_cap_data_16 rcd16;
2245 	int			rv = 1;
2246 
2247 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
2248 
2249 	if (xs->cmd->opcode == READ_CAPACITY) {
2250 		bzero(&rcd, sizeof(rcd));
2251 		if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu)
2252 			_lto4b(0xffffffff, rcd.addr);
2253 		else
2254 			_lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr);
2255 		_lto4b(512, rcd.length);
2256 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
2257 		rv = 0;
2258 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
2259 		bzero(&rcd16, sizeof(rcd16));
2260 		_lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr);
2261 		_lto4b(512, rcd16.length);
2262 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
2263 		rv = 0;
2264 	}
2265 
2266 	return (rv);
2267 }
2268 
2269 int
2270 sr_raid_tur(struct sr_workunit *wu)
2271 {
2272 	struct sr_discipline	*sd = wu->swu_dis;
2273 
2274 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
2275 
2276 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2277 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
2278 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
2279 		sd->sd_scsi_sense.add_sense_code = 0x04;
2280 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
2281 		sd->sd_scsi_sense.extra_len = 4;
2282 		return (1);
2283 	} else if (sd->sd_vol_status == BIOC_SVINVALID) {
2284 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
2285 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
2286 		sd->sd_scsi_sense.add_sense_code = 0x05;
2287 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
2288 		sd->sd_scsi_sense.extra_len = 4;
2289 		return (1);
2290 	}
2291 
2292 	return (0);
2293 }
2294 
2295 int
2296 sr_raid_request_sense(struct sr_workunit *wu)
2297 {
2298 	struct sr_discipline	*sd = wu->swu_dis;
2299 	struct scsi_xfer	*xs = wu->swu_xs;
2300 
2301 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
2302 	    DEVNAME(sd->sd_sc));
2303 
2304 	/* use latest sense data */
2305 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
2306 
2307 	/* clear sense data */
2308 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2309 
2310 	return (0);
2311 }
2312 
2313 int
2314 sr_raid_start_stop(struct sr_workunit *wu)
2315 {
2316 	struct sr_discipline	*sd = wu->swu_dis;
2317 	struct scsi_xfer	*xs = wu->swu_xs;
2318 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
2319 	int			rv = 1;
2320 
2321 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
2322 	    DEVNAME(sd->sd_sc));
2323 
2324 	if (!ss)
2325 		return (rv);
2326 
2327 	if (ss->byte2 == 0x00) {
2328 		/* START */
2329 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2330 			/* bring volume online */
2331 			/* XXX check to see if volume can be brought online */
2332 			sd->sd_vol_status = BIOC_SVONLINE;
2333 		}
2334 		rv = 0;
2335 	} else /* XXX is this the check? if (byte == 0x01) */ {
2336 		/* STOP */
2337 		if (sd->sd_vol_status == BIOC_SVONLINE) {
2338 			/* bring volume offline */
2339 			sd->sd_vol_status = BIOC_SVOFFLINE;
2340 		}
2341 		rv = 0;
2342 	}
2343 
2344 	return (rv);
2345 }
2346 
2347 int
2348 sr_raid_sync(struct sr_workunit *wu)
2349 {
2350 	struct sr_discipline	*sd = wu->swu_dis;
2351 	int			s, rv = 0, ios;
2352 
2353 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
2354 
2355 	/* when doing a fake sync don't coun't the wu */
2356 	ios = wu->swu_fake ? 0 : 1;
2357 
2358 	s = splbio();
2359 	sd->sd_sync = 1;
2360 
2361 	while (sd->sd_wu_pending > ios)
2362 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
2363 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
2364 			    DEVNAME(sd->sd_sc));
2365 			rv = 1;
2366 			break;
2367 		}
2368 
2369 	sd->sd_sync = 0;
2370 	splx(s);
2371 
2372 	wakeup(&sd->sd_sync);
2373 
2374 	return (rv);
2375 }
2376 
2377 void
2378 sr_raid_startwu(struct sr_workunit *wu)
2379 {
2380 	struct sr_discipline	*sd = wu->swu_dis;
2381 	struct sr_ccb		*ccb;
2382 
2383 	splassert(IPL_BIO);
2384 
2385 	if (wu->swu_state == SR_WU_RESTART)
2386 		/*
2387 		 * no need to put the wu on the pending queue since we
2388 		 * are restarting the io
2389 		 */
2390 		 ;
2391 	else
2392 		/* move wu to pending queue */
2393 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
2394 
2395 	/* start all individual ios */
2396 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2397 		bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf);
2398 	}
2399 }
2400 
2401 void
2402 sr_checksum_print(u_int8_t *md5)
2403 {
2404 	int			i;
2405 
2406 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
2407 		printf("%02x", md5[i]);
2408 }
2409 
2410 void
2411 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
2412 {
2413 	MD5_CTX			ctx;
2414 
2415 	DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
2416 	    md5, len);
2417 
2418 	MD5Init(&ctx);
2419 	MD5Update(&ctx, src, len);
2420 	MD5Final(md5, &ctx);
2421 }
2422 
2423 void
2424 sr_uuid_get(struct sr_uuid *uuid)
2425 {
2426 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
2427 	/* UUID version 4: random */
2428 	uuid->sui_id[6] &= 0x0f;
2429 	uuid->sui_id[6] |= 0x40;
2430 	/* RFC4122 variant */
2431 	uuid->sui_id[8] &= 0x3f;
2432 	uuid->sui_id[8] |= 0x80;
2433 }
2434 
2435 void
2436 sr_uuid_print(struct sr_uuid *uuid, int cr)
2437 {
2438 	printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
2439 	    "%02x%02x%02x%02x%02x%02x",
2440 	    uuid->sui_id[0], uuid->sui_id[1],
2441 	    uuid->sui_id[2], uuid->sui_id[3],
2442 	    uuid->sui_id[4], uuid->sui_id[5],
2443 	    uuid->sui_id[6], uuid->sui_id[7],
2444 	    uuid->sui_id[8], uuid->sui_id[9],
2445 	    uuid->sui_id[10], uuid->sui_id[11],
2446 	    uuid->sui_id[12], uuid->sui_id[13],
2447 	    uuid->sui_id[14], uuid->sui_id[15]);
2448 
2449 	if (cr)
2450 		printf("\n");
2451 }
2452 
2453 int
2454 sr_already_assembled(struct sr_discipline *sd)
2455 {
2456 	struct sr_softc		*sc = sd->sd_sc;
2457 	int			i;
2458 
2459 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2460 		if (sc->sc_dis[i])
2461 			if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
2462 			    &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
2463 			    sizeof(sd->sd_meta->ssdi.ssd_uuid)))
2464 				return (1);
2465 
2466 	return (0);
2467 }
2468 
2469 int32_t
2470 sr_validate_stripsize(u_int32_t b)
2471 {
2472 	int			s = 0;
2473 
2474 	if (b % 512)
2475 		return (-1);
2476 
2477 	while ((b & 1) == 0) {
2478 		b >>= 1;
2479 		s++;
2480 	}
2481 
2482 	/* only multiple of twos */
2483 	b >>= 1;
2484 	if (b)
2485 		return(-1);
2486 
2487 	return (s);
2488 }
2489 
2490 void
2491 sr_shutdown(void *arg)
2492 {
2493 	struct sr_discipline	*sd = arg;
2494 #ifdef SR_DEBUG
2495 	struct sr_softc		*sc = sd->sd_sc;
2496 #endif
2497 	DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n",
2498 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
2499 
2500 	sr_meta_save(sd, 0);
2501 
2502 	sr_discipline_shutdown(sd);
2503 }
2504 
2505 int
2506 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
2507 {
2508 	struct sr_discipline	*sd = wu->swu_dis;
2509 	struct scsi_xfer	*xs = wu->swu_xs;
2510 	int			rv = 1;
2511 
2512 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
2513 	    xs->cmd->opcode);
2514 
2515 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2516 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
2517 		    DEVNAME(sd->sd_sc));
2518 		goto bad;
2519 	}
2520 
2521 	if (xs->datalen == 0) {
2522 		printf("%s: %s: illegal block count\n",
2523 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
2524 		goto bad;
2525 	}
2526 
2527 	if (xs->cmdlen == 10)
2528 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
2529 	else if (xs->cmdlen == 16)
2530 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
2531 	else if (xs->cmdlen == 6)
2532 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
2533 	else {
2534 		printf("%s: %s: illegal cmdlen\n", DEVNAME(sd->sd_sc), func,
2535 		    sd->sd_meta->ssd_devname);
2536 		goto bad;
2537 	}
2538 
2539 	wu->swu_blk_start = *blk;
2540 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
2541 
2542 	if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
2543 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
2544 		    "end: %lld length: %d\n",
2545 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
2546 		    wu->swu_blk_end, xs->datalen);
2547 
2548 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
2549 		    SSD_ERRCODE_VALID;
2550 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
2551 		sd->sd_scsi_sense.add_sense_code = 0x21;
2552 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
2553 		sd->sd_scsi_sense.extra_len = 4;
2554 		goto bad;
2555 	}
2556 
2557 	rv = 0;
2558 bad:
2559 	return (rv);
2560 }
2561 
2562 int
2563 sr_check_io_collision(struct sr_workunit *wu)
2564 {
2565 	struct sr_discipline	*sd = wu->swu_dis;
2566 	struct sr_workunit	*wup;
2567 
2568 	splassert(IPL_BIO);
2569 
2570 	/* walk queue backwards and fill in collider if we have one */
2571 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
2572 		if (wu->swu_blk_end < wup->swu_blk_start ||
2573 		    wup->swu_blk_end < wu->swu_blk_start)
2574 			continue;
2575 
2576 		/* we have an LBA collision, defer wu */
2577 		wu->swu_state = SR_WU_DEFERRED;
2578 		if (wup->swu_collider)
2579 			/* wu is on deferred queue, append to last wu */
2580 			while (wup->swu_collider)
2581 				wup = wup->swu_collider;
2582 
2583 		wup->swu_collider = wu;
2584 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
2585 		sd->sd_wu_collisions++;
2586 		goto queued;
2587 	}
2588 
2589 	return (0);
2590 queued:
2591 	return (1);
2592 }
2593 
2594 #ifndef SMALL_KERNEL
2595 int
2596 sr_sensors_create(struct sr_discipline *sd)
2597 {
2598 	struct sr_softc		*sc = sd->sd_sc;
2599 	int			rv = 1;
2600 
2601 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
2602 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
2603 
2604 	strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc),
2605 	    sizeof(sd->sd_vol.sv_sensordev.xname));
2606 
2607 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
2608 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
2609 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
2610 	    sizeof(sd->sd_vol.sv_sensor.desc));
2611 
2612 	sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor);
2613 
2614 	if (sc->sc_sensors_running == 0) {
2615 		if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL)
2616 			goto bad;
2617 		sc->sc_sensors_running = 1;
2618 	}
2619 	sensordev_install(&sd->sd_vol.sv_sensordev);
2620 
2621 	rv = 0;
2622 bad:
2623 	return (rv);
2624 }
2625 
2626 void
2627 sr_sensors_delete(struct sr_discipline *sd)
2628 {
2629 #ifdef SR_DEBUG
2630 	struct sr_softc		*sc = sd->sd_sc;
2631 #endif
2632 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_delete\n",
2633 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
2634 
2635 	if (sd->sd_vol.sv_sensor_valid)
2636 		sensordev_deinstall(&sd->sd_vol.sv_sensordev);
2637 }
2638 
2639 void
2640 sr_sensors_refresh(void *arg)
2641 {
2642 	struct sr_softc		*sc = arg;
2643 	struct sr_volume	*sv;
2644 	struct sr_discipline	*sd;
2645 	int			i, vol;
2646 
2647 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
2648 
2649 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2650 		/* XXX this will not work when we stagger disciplines */
2651 		if (!sc->sc_dis[i])
2652 			continue;
2653 
2654 		sd = sc->sc_dis[i];
2655 		sv = &sd->sd_vol;
2656 
2657 		switch(sd->sd_vol_status) {
2658 		case BIOC_SVOFFLINE:
2659 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
2660 			sv->sv_sensor.status = SENSOR_S_CRIT;
2661 			break;
2662 
2663 		case BIOC_SVDEGRADED:
2664 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
2665 			sv->sv_sensor.status = SENSOR_S_WARN;
2666 			break;
2667 
2668 		case BIOC_SVSCRUB:
2669 		case BIOC_SVONLINE:
2670 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
2671 			sv->sv_sensor.status = SENSOR_S_OK;
2672 			break;
2673 
2674 		default:
2675 			sv->sv_sensor.value = 0; /* unknown */
2676 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
2677 		}
2678 	}
2679 }
2680 #endif /* SMALL_KERNEL */
2681 
2682 #ifdef SR_FANCY_STATS
2683 void				sr_print_stats(void);
2684 
2685 void
2686 sr_print_stats(void)
2687 {
2688 	struct sr_softc		*sc;
2689 	struct sr_discipline	*sd;
2690 	int			i, vol;
2691 
2692 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
2693 		if (softraid_cd.cd_devs[i]) {
2694 			sc = softraid_cd.cd_devs[i];
2695 			/* we'll only have one softc */
2696 			break;
2697 		}
2698 
2699 	if (!sc) {
2700 		printf("no softraid softc found\n");
2701 		return;
2702 	}
2703 
2704 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2705 		/* XXX this will not work when we stagger disciplines */
2706 		if (!sc->sc_dis[i])
2707 			continue;
2708 
2709 		sd = sc->sc_dis[i];
2710 		printf("%s: ios pending: %d  collisions %llu\n",
2711 		    sd->sd_meta->ssd_devname,
2712 		    sd->sd_wu_pending,
2713 		    sd->sd_wu_collisions);
2714 	}
2715 }
2716 #endif /* SR_FANCY_STATS */
2717 
2718 #ifdef SR_DEBUG
2719 void
2720 sr_meta_print(struct sr_metadata *m)
2721 {
2722 	int			i;
2723 	struct sr_meta_chunk	*mc;
2724 	struct sr_meta_opt	*mo;
2725 
2726 	if (!(sr_debug & SR_D_META))
2727 		return;
2728 
2729 	printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
2730 	printf("\tssd_version %d\n", m->ssdi.ssd_version);
2731 	printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags);
2732 	printf("\tssd_uuid ");
2733 	sr_uuid_print(&m->ssdi.ssd_uuid, 1);
2734 	printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
2735 	printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
2736 	printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
2737 	printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
2738 	printf("\tssd_level %d\n", m->ssdi.ssd_level);
2739 	printf("\tssd_level %lld\n", m->ssdi.ssd_size);
2740 	printf("\tssd_devname %s\n", m->ssd_devname);
2741 	printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
2742 	printf("\tssd_product %s\n", m->ssdi.ssd_product);
2743 	printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
2744 	printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
2745 	printf("\tssd_checksum ");
2746 	sr_checksum_print(m->ssd_checksum);
2747 	printf("\n");
2748 	printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
2749 	printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
2750 
2751 	mc = (struct sr_meta_chunk *)(m + 1);
2752 	for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
2753 		printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
2754 		printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
2755 		printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
2756 		printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
2757 		printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
2758 		printf("\t\tscm_uuid ");
2759 		sr_uuid_print(&mc->scmi.scm_uuid, 1);
2760 		printf("\t\tscm_checksum ");
2761 		sr_checksum_print(mc->scm_checksum);
2762 		printf("\n");
2763 		printf("\t\tscm_status %d\n", mc->scm_status);
2764 	}
2765 
2766 	mo = (struct sr_meta_opt *)(mc);
2767 	for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) {
2768 		printf("\t\t\tsom_type %d\n", mo->somi.som_type);
2769 		printf("\t\t\tsom_checksum ");
2770 		sr_checksum_print(mo->som_checksum);
2771 		printf("\n");
2772 	}
2773 }
2774 
2775 void
2776 sr_dump_mem(u_int8_t *p, int len)
2777 {
2778 	int			i;
2779 
2780 	for (i = 0; i < len; i++)
2781 		printf("%02x ", *p++);
2782 	printf("\n");
2783 }
2784 
2785 #endif /* SR_DEBUG */
2786