xref: /openbsd-src/sys/dev/softraid.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /* $OpenBSD: softraid.c,v 1.127 2009/02/16 21:19:06 miod Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bio.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/device.h>
25 #include <sys/ioctl.h>
26 #include <sys/proc.h>
27 #include <sys/malloc.h>
28 #include <sys/pool.h>
29 #include <sys/kernel.h>
30 #include <sys/disk.h>
31 #include <sys/rwlock.h>
32 #include <sys/queue.h>
33 #include <sys/fcntl.h>
34 #include <sys/disklabel.h>
35 #include <sys/mount.h>
36 #include <sys/sensors.h>
37 #include <sys/stat.h>
38 #include <sys/conf.h>
39 #include <sys/uio.h>
40 #include <sys/workq.h>
41 
42 #ifdef AOE
43 #include <sys/mbuf.h>
44 #include <net/if_aoe.h>
45 #endif /* AOE */
46 
47 #include <crypto/cryptodev.h>
48 
49 #include <scsi/scsi_all.h>
50 #include <scsi/scsiconf.h>
51 #include <scsi/scsi_disk.h>
52 
53 #include <dev/softraidvar.h>
54 #include <dev/rndvar.h>
55 
56 /* #define SR_FANCY_STATS */
57 
58 #ifdef SR_DEBUG
59 #define SR_FANCY_STATS
60 uint32_t	sr_debug = 0
61 		    /* | SR_D_CMD */
62 		    /* | SR_D_MISC */
63 		    /* | SR_D_INTR */
64 		    /* | SR_D_IOCTL */
65 		    /* | SR_D_CCB */
66 		    /* | SR_D_WU */
67 		    /* | SR_D_META */
68 		    /* | SR_D_DIS */
69 		    /* | SR_D_STATE */
70 		;
71 #endif
72 
73 int		sr_match(struct device *, void *, void *);
74 void		sr_attach(struct device *, struct device *, void *);
75 int		sr_detach(struct device *, int);
76 int		sr_activate(struct device *, enum devact);
77 
78 struct cfattach softraid_ca = {
79 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
80 	sr_activate
81 };
82 
83 struct cfdriver softraid_cd = {
84 	NULL, "softraid", DV_DULL
85 };
86 
87 /* scsi & discipline */
88 int			sr_scsi_cmd(struct scsi_xfer *);
89 void			sr_minphys(struct buf *bp, struct scsi_link *sl);
90 void			sr_copy_internal_data(struct scsi_xfer *,
91 			    void *, size_t);
92 int			sr_scsi_ioctl(struct scsi_link *, u_long,
93 			    caddr_t, int, struct proc *);
94 int			sr_ioctl(struct device *, u_long, caddr_t);
95 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
96 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
97 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
98 int			sr_ioctl_setstate(struct sr_softc *,
99 			    struct bioc_setstate *);
100 int			sr_ioctl_createraid(struct sr_softc *,
101 			    struct bioc_createraid *, int);
102 int			sr_ioctl_deleteraid(struct sr_softc *,
103 			    struct bioc_deleteraid *);
104 void			sr_chunks_unwind(struct sr_softc *,
105 			    struct sr_chunk_head *);
106 void			sr_discipline_free(struct sr_discipline *);
107 void			sr_discipline_shutdown(struct sr_discipline *);
108 
109 /* utility functions */
110 void			sr_shutdown(void *);
111 void			sr_uuid_get(struct sr_uuid *);
112 void			sr_uuid_print(struct sr_uuid *, int);
113 void			sr_checksum_print(u_int8_t *);
114 void			sr_checksum(struct sr_softc *, void *, void *,
115 			    u_int32_t);
116 int			sr_boot_assembly(struct sr_softc *);
117 int			sr_already_assembled(struct sr_discipline *);
118 
119 /* don't include these on RAMDISK */
120 #ifndef SMALL_KERNEL
121 void			sr_sensors_refresh(void *);
122 int			sr_sensors_create(struct sr_discipline *);
123 void			sr_sensors_delete(struct sr_discipline *);
124 #endif
125 
126 /* metadata */
127 int			sr_meta_probe(struct sr_discipline *, dev_t *, int);
128 int			sr_meta_attach(struct sr_discipline *, int);
129 void			sr_meta_getdevname(struct sr_softc *, dev_t, char *,
130 			    int);
131 int			sr_meta_rw(struct sr_discipline *, dev_t, void *,
132 			    size_t, daddr64_t, long);
133 int			sr_meta_clear(struct sr_discipline *);
134 int			sr_meta_read(struct sr_discipline *);
135 int			sr_meta_save(struct sr_discipline *, u_int32_t);
136 int			sr_meta_validate(struct sr_discipline *, dev_t,
137 			    struct sr_metadata *, void *);
138 void			sr_meta_chunks_create(struct sr_softc *,
139 			    struct sr_chunk_head *);
140 void			sr_meta_init(struct sr_discipline *,
141 			    struct sr_chunk_head *);
142 
143 /* native metadata format */
144 int			sr_meta_native_bootprobe(struct sr_softc *,
145 			    struct device *, struct sr_metadata_list_head *);
146 #define SR_META_NOTCLAIMED	(0)
147 #define SR_META_CLAIMED		(1)
148 int			sr_meta_native_probe(struct sr_softc *,
149 			   struct sr_chunk *);
150 int			sr_meta_native_attach(struct sr_discipline *, int);
151 int			sr_meta_native_read(struct sr_discipline *, dev_t,
152 			    struct sr_metadata *, void *);
153 int			sr_meta_native_write(struct sr_discipline *, dev_t,
154 			    struct sr_metadata *,void *);
155 
156 #ifdef SR_DEBUG
157 void			sr_meta_print(struct sr_metadata *);
158 #else
159 #define			sr_meta_print(m)
160 #endif
161 
162 /* the metadata driver should remain stateless */
163 struct sr_meta_driver {
164 	daddr64_t		smd_offset;	/* metadata location */
165 	u_int32_t		smd_size;	/* size of metadata */
166 
167 	int			(*smd_probe)(struct sr_softc *,
168 				   struct sr_chunk *);
169 	int			(*smd_attach)(struct sr_discipline *, int);
170 	int			(*smd_detach)(struct sr_discipline *);
171 	int			(*smd_read)(struct sr_discipline *, dev_t,
172 				    struct sr_metadata *, void *);
173 	int			(*smd_write)(struct sr_discipline *, dev_t,
174 				    struct sr_metadata *, void *);
175 	int			(*smd_validate)(struct sr_discipline *,
176 				    struct sr_metadata *, void *);
177 } smd[] = {
178 	{ SR_META_OFFSET, SR_META_SIZE * 512,
179 	  sr_meta_native_probe, sr_meta_native_attach, NULL,
180 	  sr_meta_native_read , sr_meta_native_write, NULL },
181 #define SR_META_F_NATIVE	0
182 	{ 0, 0, NULL, NULL, NULL, NULL }
183 #define SR_META_F_INVALID	-1
184 };
185 
186 int
187 sr_meta_attach(struct sr_discipline *sd, int force)
188 {
189 	struct sr_softc		*sc = sd->sd_sc;
190 	struct sr_chunk_head	*cl;
191 	struct sr_chunk		*ch_entry;
192 	int			rv = 1, i;
193 
194 	DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
195 
196 	/* in memory copy of metadata */
197 	sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
198 	if (!sd->sd_meta) {
199 		printf("%s: could not allocate memory for metadata\n",
200 		    DEVNAME(sc));
201 		goto bad;
202 	}
203 
204 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
205 		/* in memory copy of foreign metadata */
206 		sd->sd_meta_foreign =  malloc(smd[sd->sd_meta_type].smd_size ,
207 		    M_DEVBUF, M_ZERO);
208 		if (!sd->sd_meta_foreign) {
209 			/* unwind frees sd_meta */
210 			printf("%s: could not allocate memory for foreign "
211 			    "metadata\n", DEVNAME(sc));
212 			goto bad;
213 		}
214 	}
215 
216 	if (smd[sd->sd_meta_type].smd_attach(sd, force))
217 		goto bad;
218 
219 	/* fill out chunk array */
220 	cl = &sd->sd_vol.sv_chunk_list;
221 	i = 0;
222 	SLIST_FOREACH(ch_entry, cl, src_link)
223 		sd->sd_vol.sv_chunks[i++] = ch_entry;
224 
225 	rv = 0;
226 bad:
227 	return (rv);
228 }
229 
230 int
231 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
232 {
233 	struct sr_softc		*sc = sd->sd_sc;
234 	struct bdevsw		*bdsw;
235 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
236 	struct sr_chunk_head	*cl;
237 	char			devname[32];
238 	int			i, d, type, found, prevf, error;
239 	dev_t			dev;
240 
241 	DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
242 
243 	if (no_chunk == 0)
244 		goto unwind;
245 
246 
247 	cl = &sd->sd_vol.sv_chunk_list;
248 
249 	for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
250 		dev = dt[d];
251 		sr_meta_getdevname(sc, dev, devname, sizeof(devname));
252 		bdsw = bdevsw_lookup(dev);
253 
254 		/*
255 		 * XXX leaving dev open for now; move this to attach and figure
256 		 * out the open/close dance for unwind.
257 		 */
258 		error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc);
259 		if (error) {
260 			DNPRINTF(SR_D_META,"%s: sr_meta_probe can't open %s\n",
261 			    DEVNAME(sc), devname);
262 			/* XXX device isn't open but will be closed anyway */
263 			goto unwind;
264 		}
265 
266 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
267 		    M_WAITOK | M_ZERO);
268 		/* keep disks in user supplied order */
269 		if (ch_prev)
270 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
271 		else
272 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
273 		ch_prev = ch_entry;
274 		strlcpy(ch_entry->src_devname, devname,
275 		   sizeof(ch_entry->src_devname));
276 		ch_entry->src_dev_mm = dev;
277 
278 		/* determine if this is a device we understand */
279 		for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
280 			type = smd[i].smd_probe(sc, ch_entry);
281 			if (type == SR_META_F_INVALID)
282 				continue;
283 			else {
284 				found = type;
285 				break;
286 			}
287 		}
288 		if (found == SR_META_F_INVALID)
289 			goto unwind;
290 		if (prevf == SR_META_F_INVALID)
291 			prevf = found;
292 		if (prevf != found) {
293 			DNPRINTF(SR_D_META, "%s: prevf != found\n",
294 			    DEVNAME(sc));
295 			goto unwind;
296 		}
297 	}
298 
299 	return (prevf);
300 unwind:
301 	return (SR_META_F_INVALID);
302 }
303 
304 void
305 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
306 {
307 	int			maj, unit, part;
308 	char			*name;
309 
310 	DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
311 	    DEVNAME(sc), buf, size);
312 
313 	if (!buf)
314 		return;
315 
316 	maj = major(dev);
317 	part = DISKPART(dev);
318 	unit = DISKUNIT(dev);
319 
320 	name = findblkname(maj);
321 	if (name == NULL)
322 		return;
323 
324 	snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
325 }
326 
327 int
328 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz,
329     daddr64_t ofs, long flags)
330 {
331 	struct sr_softc		*sc = sd->sd_sc;
332 	struct buf		b;
333 	int			rv = 1;
334 
335 	DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
336 	    DEVNAME(sc), dev, md, sz, ofs, flags);
337 
338 	if (md == NULL) {
339 		printf("%s: read invalid metadata pointer\n", DEVNAME(sc));
340 		goto done;
341 	}
342 
343 	bzero(&b, sizeof(b));
344 	b.b_flags = flags;
345 	b.b_blkno = ofs;
346 	b.b_bcount = sz;
347 	b.b_bufsize = sz;
348 	b.b_resid = sz;
349 	b.b_data = md;
350 	b.b_error = 0;
351 	b.b_proc = curproc;
352 	b.b_dev = dev;
353 	b.b_vp = NULL;
354 	b.b_iodone = NULL;
355 	LIST_INIT(&b.b_dep);
356 	bdevsw_lookup(b.b_dev)->d_strategy(&b);
357 	biowait(&b);
358 
359 	if (b.b_flags & B_ERROR) {
360 		printf("%s: 0x%x i/o error on block %lld while reading "
361 		    "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error);
362 		goto done;
363 	}
364 	rv = 0;
365 done:
366 	return (rv);
367 }
368 
369 int
370 sr_meta_clear(struct sr_discipline *sd)
371 {
372 	struct sr_softc		*sc = sd->sd_sc;
373 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
374 	struct sr_chunk		*ch_entry;
375 	void			*m;
376 	int			rv = 1;
377 
378 	DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
379 
380 	if (sd->sd_meta_type != SR_META_F_NATIVE) {
381 		printf("%s: sr_meta_clear can not clear foreign metadata\n",
382 		    DEVNAME(sc));
383 		goto done;
384 	}
385 
386 	m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO);
387 	SLIST_FOREACH(ch_entry, cl, src_link) {
388 		if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
389 			/* XXX mark disk offline */
390 			DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
391 			    "clear %s\n", ch_entry->src_devname);
392 			rv++;
393 			continue;
394 		}
395 		bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
396 		bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt));
397 	}
398 
399 	bzero(sd->sd_meta, SR_META_SIZE * 512);
400 
401 	free(m, M_DEVBUF);
402 	rv = 0;
403 done:
404 	return (rv);
405 }
406 
407 void
408 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl)
409 {
410 	struct sr_chunk		*ch_entry;
411 	struct sr_uuid		uuid;
412 	int			cid = 0;
413 	char			*name;
414 	u_int64_t		max_chunk_sz = 0, min_chunk_sz;
415 
416 	DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc));
417 
418 	sr_uuid_get(&uuid);
419 
420 	/* fill out stuff and get largest chunk size while looping */
421 	SLIST_FOREACH(ch_entry, cl, src_link) {
422 		name = ch_entry->src_devname;
423 		ch_entry->src_meta.scmi.scm_size = ch_entry->src_size;
424 		ch_entry->src_meta.scmi.scm_chunk_id = cid++;
425 		ch_entry->src_meta.scm_status = BIOC_SDONLINE;
426 		strlcpy(ch_entry->src_meta.scmi.scm_devname, name,
427 		    sizeof(ch_entry->src_meta.scmi.scm_devname));
428 		bcopy(&uuid,  &ch_entry->src_meta.scmi.scm_uuid,
429 		    sizeof(ch_entry->src_meta.scmi.scm_uuid));
430 
431 		if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz)
432 			max_chunk_sz = ch_entry->src_meta.scmi.scm_size;
433 	}
434 
435 	/* get smallest chunk size */
436 	min_chunk_sz = max_chunk_sz;
437 	SLIST_FOREACH(ch_entry, cl, src_link)
438 		if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz)
439 			min_chunk_sz = ch_entry->src_meta.scmi.scm_size;
440 
441 	/* equalize all sizes */
442 	SLIST_FOREACH(ch_entry, cl, src_link)
443 		ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz;
444 
445 	/* whine if chunks are not the same size */
446 	if (min_chunk_sz != max_chunk_sz)
447 		printf("%s: chunk sizes are not equal; up to %llu blocks "
448 		    "wasted per chunk\n",
449 		    DEVNAME(sc), max_chunk_sz - min_chunk_sz);
450 }
451 
452 void
453 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl)
454 {
455 	struct sr_softc		*sc = sd->sd_sc;
456 	struct sr_metadata	*sm = sd->sd_meta;
457 	struct sr_meta_chunk	*im_sc;
458 	struct sr_meta_opt	*im_so;
459 	int			i, chunk_no;
460 
461 	DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
462 
463 	if (!sm)
464 		return;
465 
466 	/* initial metadata */
467 	sm->ssdi.ssd_magic = SR_MAGIC;
468 	sm->ssdi.ssd_version = SR_META_VERSION;
469 	sm->ssd_ondisk = 0;
470 	sm->ssdi.ssd_flags = sd->sd_meta_flags;
471 	/* get uuid from chunk 0 */
472 	bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid,
473 	    &sm->ssdi.ssd_uuid,
474 	    sizeof(struct sr_uuid));
475 
476 	/* volume is filled in createraid */
477 
478 	/* add missing chunk bits */
479 	chunk_no = sm->ssdi.ssd_chunk_no;
480 	for (i = 0; i < chunk_no; i++) {
481 		im_sc = &sd->sd_vol.sv_chunks[i]->src_meta;
482 		im_sc->scmi.scm_volid = sm->ssdi.ssd_volid;
483 		sr_checksum(sc, im_sc, &im_sc->scm_checksum,
484 		    sizeof(struct sr_meta_chunk_invariant));
485 
486 		/* carry optional meta also in chunk area */
487 		im_so = &sd->sd_vol.sv_chunks[i]->src_opt;
488 		bzero(im_so, sizeof(*im_so));
489 		if (sd->sd_type == SR_MD_CRYPTO) {
490 			sm->ssdi.ssd_opt_no = 1;
491 			im_so->somi.som_type = SR_OPT_CRYPTO;
492 
493 			/*
494 			 * copy encrypted key / passphrase into optional
495 			 * metadata area
496 			 */
497 			bcopy(&sd->mds.mdd_crypto.scr_meta,
498 			    &im_so->somi.som_meta.smm_crypto,
499 			    sizeof(im_so->somi.som_meta.smm_crypto));
500 
501 			sr_checksum(sc, im_so, im_so->som_checksum,
502 			    sizeof(struct sr_meta_opt_invariant));
503 		}
504 	}
505 }
506 
507 void
508 sr_meta_save_callback(void *arg1, void *arg2)
509 {
510 	struct sr_discipline	*sd = arg1;
511 	int			s;
512 
513 	s = splbio();
514 
515 	if (sr_meta_save(arg1, SR_META_DIRTY))
516 		printf("%s: save metadata failed\n",
517 		    DEVNAME(sd->sd_sc));
518 
519 	sd->sd_must_flush = 0;
520 	splx(s);
521 }
522 
523 int
524 sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
525 {
526 	struct sr_softc		*sc = sd->sd_sc;
527 	struct sr_metadata	*sm = sd->sd_meta, *m;
528 	struct sr_meta_driver	*s;
529 	struct sr_chunk		*src;
530 	struct sr_meta_chunk	*cm;
531 	struct sr_workunit	wu;
532 	struct sr_meta_opt	*om;
533 	int			i;
534 
535 	DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
536 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
537 
538 	if (!sm) {
539 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
540 		goto bad;
541 	}
542 
543 	/* meta scratchpad */
544 	s = &smd[sd->sd_meta_type];
545 	m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
546 	if (!m) {
547 		printf("%s: could not allocate metadata scratch area\n",
548 		    DEVNAME(sc));
549 		goto bad;
550 	}
551 
552 	if (sm->ssdi.ssd_opt_no > 1)
553 		panic("not yet save > 1 optional metadata members");
554 
555 	/* from here on out metadata is updated */
556 restart:
557 	sm->ssd_ondisk++;
558 	sm->ssd_meta_flags = flags;
559 	bcopy(sm, m, sizeof(*m));
560 
561 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
562 		src = sd->sd_vol.sv_chunks[i];
563 		cm = (struct sr_meta_chunk *)(m + 1);
564 		bcopy(&src->src_meta, cm + i, sizeof(*cm));
565 	}
566 
567 	/* optional metadata */
568 	om = (struct sr_meta_opt *)(cm + i);
569 	for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
570 		bcopy(&src->src_opt, om + i, sizeof(*om));
571 		sr_checksum(sc, om, &om->som_checksum,
572 		    sizeof(struct sr_meta_opt_invariant));
573 	}
574 
575 	for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
576 		src = sd->sd_vol.sv_chunks[i];
577 
578 		/* skip disks that are offline */
579 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
580 			continue;
581 
582 		/* calculate metdata checksum for correct chunk */
583 		m->ssdi.ssd_chunk_id = i;
584 		sr_checksum(sc, m, &m->ssd_checksum,
585 		    sizeof(struct sr_meta_invariant));
586 
587 #ifdef SR_DEBUG
588 		DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
589 		    "chunkid: %d checksum: ",
590 		    DEVNAME(sc), src->src_meta.scmi.scm_devname,
591 		    m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
592 
593 		if (sr_debug &= SR_D_META)
594 			sr_checksum_print((u_int8_t *)&m->ssd_checksum);
595 		DNPRINTF(SR_D_META, "\n");
596 		sr_meta_print(m);
597 #endif
598 
599 		/* translate and write to disk */
600 		if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
601 			printf("%s: could not write metadata to %s\n",
602 			    DEVNAME(sc), src->src_devname);
603 			/* restart the meta write */
604 			src->src_meta.scm_status = BIOC_SDOFFLINE;
605 			/* XXX recalculate volume status */
606 			goto restart;
607 		}
608 	}
609 
610 	/* not al disciplines have sync */
611 	if (sd->sd_scsi_sync) {
612 		bzero(&wu, sizeof(wu));
613 		wu.swu_fake = 1;
614 		wu.swu_dis = sd;
615 		sd->sd_scsi_sync(&wu);
616 	}
617 	free(m, M_DEVBUF);
618 	return (0);
619 bad:
620 	return (1);
621 }
622 
623 int
624 sr_meta_read(struct sr_discipline *sd)
625 {
626 #ifdef SR_DEBUG
627 	struct sr_softc		*sc = sd->sd_sc;
628 #endif
629 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
630 	struct sr_metadata	*sm;
631 	struct sr_chunk		*ch_entry;
632 	struct sr_meta_chunk	*cp;
633 	struct sr_meta_driver	*s;
634 	struct sr_meta_opt	*om;
635 	void			*fm = NULL;
636 	int			no_disk = 0;
637 
638 	DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
639 
640 	sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
641 	s = &smd[sd->sd_meta_type];
642 	if (sd->sd_meta_type != SR_META_F_NATIVE)
643 		fm = malloc(s->smd_size , M_DEVBUF, M_WAITOK | M_ZERO);
644 
645 	cp = (struct sr_meta_chunk *)(sm + 1);
646 	SLIST_FOREACH(ch_entry, cl, src_link) {
647 		/* read and translate */
648 		if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
649 			/* XXX mark disk offline */
650 			DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
651 			    DEVNAME(sc));
652 			continue;
653 		}
654 
655 		if (sm->ssdi.ssd_magic != SR_MAGIC) {
656 			DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
657 			    DEVNAME(sc));
658 			continue;
659 		}
660 
661 		/* validate metadata */
662 		if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
663 			DNPRINTF(SR_D_META, "%s: invalid metadata\n",
664 			    DEVNAME(sc));
665 			no_disk = -1;
666 			goto done;
667 		}
668 
669 		/* assume chunk 0 contains metadata */
670 		if (no_disk == 0)
671 			bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
672 
673 		bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
674 
675 		if (sm->ssdi.ssd_opt_no > 1)
676 			panic("not yet read > 1 optional metadata members");
677 
678 		if (sm->ssdi.ssd_opt_no) {
679 			om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) +
680 			    sizeof(struct sr_meta_chunk) *
681 			    sm->ssdi.ssd_chunk_no);
682 			bcopy(om, &ch_entry->src_opt, sizeof(ch_entry->src_opt));
683 
684 			if (om->somi.som_type == SR_OPT_CRYPTO) {
685 				bcopy(&ch_entry->src_opt.somi.som_meta.smm_crypto,
686 				    &sd->mds.mdd_crypto.scr_meta,
687 				    sizeof(sd->mds.mdd_crypto.scr_meta));
688 			}
689 
690 		}
691 
692 		cp++;
693 		no_disk++;
694 	}
695 
696 	free(sm, M_DEVBUF);
697 	if (fm)
698 		free(fm, M_DEVBUF);
699 
700 done:
701 	DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
702 	    no_disk);
703 	return (no_disk);
704 }
705 
706 int
707 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
708     void *fm)
709 {
710 	struct sr_softc		*sc = sd->sd_sc;
711 	struct sr_meta_driver	*s;
712 	struct sr_meta_chunk	*mc;
713 	char			devname[32];
714 	int			rv = 1;
715 	u_int8_t		checksum[MD5_DIGEST_LENGTH];
716 
717 	DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
718 
719 	sr_meta_getdevname(sc, dev, devname, sizeof(devname));
720 
721 	s = &smd[sd->sd_meta_type];
722 	if (sd->sd_meta_type != SR_META_F_NATIVE)
723 		if (s->smd_validate(sd, sm, fm)) {
724 			printf("%s: invalid foreign metadata\n", DEVNAME(sc));
725 			goto done;
726 		}
727 
728 	/*
729 	 * at this point all foreign metadata has been translated to the native
730 	 * format and will be treated just like the native format
731 	 */
732 
733 	if (sm->ssdi.ssd_version != SR_META_VERSION) {
734 		printf("%s: %s can not read metadata version %d, expected %d\n",
735 		    DEVNAME(sc), devname, sm->ssdi.ssd_version,
736 		    SR_META_VERSION);
737 		goto done;
738 	}
739 
740 	sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
741 	if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
742 		printf("%s: invalid metadata checksum\n", DEVNAME(sc));
743 		goto done;
744 	}
745 
746 	/* XXX do other checksums */
747 
748 	/* warn if disk changed order */
749 	mc = (struct sr_meta_chunk *)(sm + 1);
750 	if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
751 	    sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
752 		printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
753 		    mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname);
754 
755 	/* we have meta data on disk */
756 	DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
757 	    DEVNAME(sc), devname);
758 
759 	rv = 0;
760 done:
761 	return (rv);
762 }
763 
764 int
765 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv,
766     struct sr_metadata_list_head *mlh)
767 {
768 	struct bdevsw		*bdsw;
769 	struct disklabel	label;
770 	struct sr_metadata	*md;
771 	struct sr_discipline	*fake_sd;
772 	struct sr_metadata_list *mle;
773 	char			devname[32];
774 	dev_t			dev, devr;
775 	int			error, i, majdev;
776 	int			rv = SR_META_NOTCLAIMED;
777 
778 	DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
779 
780 	majdev = findblkmajor(dv);
781 	if (majdev == -1)
782 		goto done;
783 	dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
784 	bdsw = &bdevsw[majdev];
785 
786 	/*
787 	 * The devices are being opened with S_IFCHR instead of
788 	 * S_IFBLK so that the SCSI mid-layer does not whine when
789 	 * media is not inserted in certain devices like zip drives
790 	 * and such.
791 	 */
792 
793 	/* open device */
794 	error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc);
795 	if (error) {
796 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
797 		    "failed\n" , DEVNAME(sc));
798 		goto done;
799 	}
800 
801 	/* get disklabel */
802 	error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label, FREAD,
803 	    curproc);
804 	if (error) {
805 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
806 		    "failed\n", DEVNAME(sc));
807 		error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
808 		goto done;
809 	}
810 
811 	/* we are done, close device */
812 	error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
813 	if (error) {
814 		DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
815 		    "failed\n", DEVNAME(sc));
816 		goto done;
817 	}
818 
819 	md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
820 	if (md == NULL) {
821 		printf("%s: not enough memory for metadata buffer\n",
822 		    DEVNAME(sc));
823 		goto done;
824 	}
825 
826 	/* create fake sd to use utility functions */
827 	fake_sd = malloc(sizeof(struct sr_discipline) , M_DEVBUF, M_ZERO);
828 	if (fake_sd == NULL) {
829 		printf("%s: not enough memory for fake discipline\n",
830 		    DEVNAME(sc));
831 		goto nosd;
832 	}
833 	fake_sd->sd_sc = sc;
834 	fake_sd->sd_meta_type = SR_META_F_NATIVE;
835 
836 	for (i = 0; i < MAXPARTITIONS; i++) {
837 		if (label.d_partitions[i].p_fstype != FS_RAID)
838 			continue;
839 
840 		/* open partition */
841 		devr = MAKEDISKDEV(majdev, dv->dv_unit, i);
842 		error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc);
843 		if (error) {
844 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
845 			    "open failed, partition %d\n",
846 			    DEVNAME(sc), i);
847 			continue;
848 		}
849 
850 		if (sr_meta_native_read(fake_sd, devr, md, NULL)) {
851 			printf("%s: native bootprobe could not read native "
852 			    "metadata\n", DEVNAME(sc));
853 			continue;
854 		}
855 
856 		/* are we a softraid partition? */
857 		sr_meta_getdevname(sc, devr, devname, sizeof(devname));
858 		if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) {
859 			if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) {
860 				DNPRINTF(SR_D_META, "%s: don't save %s\n",
861 				    DEVNAME(sc), devname);
862 			} else {
863 				/* XXX fix M_WAITOK, this is boot time */
864 				mle = malloc(sizeof(*mle), M_DEVBUF,
865 				    M_WAITOK | M_ZERO);
866 				bcopy(md, &mle->sml_metadata,
867 				    SR_META_SIZE * 512);
868 				mle->sml_mm = devr;
869 				SLIST_INSERT_HEAD(mlh, mle, sml_link);
870 				rv = SR_META_CLAIMED;
871 			}
872 		}
873 
874 		/* we are done, close partition */
875 		error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, curproc);
876 		if (error) {
877 			DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
878 			    "close failed\n", DEVNAME(sc));
879 			continue;
880 		}
881 	}
882 
883 	free(fake_sd, M_DEVBUF);
884 nosd:
885 	free(md, M_DEVBUF);
886 done:
887 	return (rv);
888 }
889 
890 int
891 sr_boot_assembly(struct sr_softc *sc)
892 {
893 	struct device		*dv;
894 	struct sr_metadata_list_head mlh;
895 	struct sr_metadata_list *mle, *mle2;
896 	struct sr_metadata	*m1, *m2;
897 	struct bioc_createraid	bc;
898 	int			rv = 0, no_dev;
899 	dev_t			*dt = NULL;
900 
901 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
902 
903 	SLIST_INIT(&mlh);
904 
905 	TAILQ_FOREACH(dv, &alldevs, dv_list) {
906 		if (dv->dv_class != DV_DISK)
907 			continue;
908 
909 		/* XXX is there  a better way of excluding some devices? */
910 		if (!strncmp(dv->dv_xname, "fd", 2) ||
911 		    !strncmp(dv->dv_xname, "cd", 2) ||
912 		    !strncmp(dv->dv_xname, "rx", 2))
913 			continue;
914 
915 		/* native softraid uses partitions */
916 		if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED)
917 			continue;
918 
919 		/* probe non-native disks */
920 	}
921 
922 	/*
923 	 * XXX poor mans hack that doesn't keep disks in order and does not
924 	 * roam disks correctly.  replace this with something smarter that
925 	 * orders disks by volid, chunkid and uuid.
926 	 */
927 	dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK);
928 	SLIST_FOREACH(mle, &mlh, sml_link) {
929 		/* chunk used already? */
930 		if (mle->sml_used)
931 			continue;
932 
933 		no_dev = 0;
934 		m1 = (struct sr_metadata *)&mle->sml_metadata;
935 		bzero(dt, BIOC_CRMAXLEN);
936 		SLIST_FOREACH(mle2, &mlh, sml_link) {
937 			/* chunk used already? */
938 			if (mle2->sml_used)
939 				continue;
940 
941 			m2 = (struct sr_metadata *)&mle2->sml_metadata;
942 
943 			/* are we the same volume? */
944 			if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid)
945 				continue;
946 
947 			/* same uuid? */
948 			if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid,
949 			    sizeof(m1->ssdi.ssd_uuid)))
950 				continue;
951 
952 			/* sanity */
953 			if (dt[m2->ssdi.ssd_chunk_id]) {
954 				printf("%s: chunk id already in use; can not "
955 				    "assemble volume\n", DEVNAME(sc));
956 				goto unwind;
957 			}
958 			dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm;
959 			no_dev++;
960 			mle2->sml_used = 1;
961 		}
962 		if (m1->ssdi.ssd_chunk_no != no_dev) {
963 			printf("%s: not assembling partial disk that used to "
964 			    "be volume %d\n", DEVNAME(sc),
965 			    m1->ssdi.ssd_volid);
966 			continue;
967 		}
968 
969 		bzero(&bc, sizeof(bc));
970 		bc.bc_level = m1->ssdi.ssd_level;
971 		bc.bc_dev_list_len = no_dev * sizeof(dev_t);
972 		bc.bc_dev_list = dt;
973 		bc.bc_flags = BIOC_SCDEVT;
974 		sr_ioctl_createraid(sc, &bc, 0);
975 		rv++;
976 	}
977 
978 	/* done with metadata */
979 unwind:
980 	for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
981 		mle2 = SLIST_NEXT(mle, sml_link);
982 		free(mle, M_DEVBUF);
983 	}
984 	SLIST_INIT(&mlh);
985 
986 	if (dt)
987 		free(dt, M_DEVBUF);
988 
989 	return (rv);
990 }
991 
992 int
993 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
994 {
995 	struct disklabel	label;
996 	char			*devname;
997 	int			error, part;
998 	daddr64_t		size;
999 	struct bdevsw		*bdsw;
1000 	dev_t			dev;
1001 
1002 	DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
1003 	   DEVNAME(sc), ch_entry->src_devname);
1004 
1005 	dev = ch_entry->src_dev_mm;
1006 	devname = ch_entry->src_devname;
1007 	bdsw = bdevsw_lookup(dev);
1008 	part = DISKPART(dev);
1009 
1010 	/* get disklabel */
1011 	error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label, FREAD, curproc);
1012 	if (error) {
1013 		DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
1014 		    DEVNAME(sc), devname);
1015 		goto unwind;
1016 	}
1017 
1018 	/* make sure the partition is of the right type */
1019 	if (label.d_partitions[part].p_fstype != FS_RAID) {
1020 		DNPRINTF(SR_D_META,
1021 		    "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc) ,
1022 		        devname,
1023 		    label.d_partitions[part].p_fstype);
1024 		goto unwind;
1025 	}
1026 
1027 	size = DL_GETPSIZE(&label.d_partitions[part]) -
1028 	    SR_META_SIZE - SR_META_OFFSET;
1029 	if (size <= 0) {
1030 		DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
1031 		    devname);
1032 		goto unwind;
1033 	}
1034 	ch_entry->src_size = size;
1035 
1036 	DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
1037 	    devname, size);
1038 
1039 	return (SR_META_F_NATIVE);
1040 unwind:
1041 	DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
1042 	    devname ? devname : "nodev");
1043 	return (SR_META_F_INVALID);
1044 }
1045 
1046 int
1047 sr_meta_native_attach(struct sr_discipline *sd, int force)
1048 {
1049 	struct sr_softc		*sc = sd->sd_sc;
1050 	struct sr_chunk_head 	*cl = &sd->sd_vol.sv_chunk_list;
1051 	struct sr_metadata	*md = NULL;
1052 	struct sr_chunk		*ch_entry;
1053 	struct sr_uuid		uuid;
1054 	int			sr, not_sr, rv = 1, d, expected = -1;
1055 
1056 	DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
1057 
1058 	md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
1059 	if (md == NULL) {
1060 		printf("%s: not enough memory for metadata buffer\n",
1061 		    DEVNAME(sc));
1062 		goto bad;
1063 	}
1064 
1065 	bzero(&uuid, sizeof uuid);
1066 
1067 	sr = not_sr = d = 0;
1068 	SLIST_FOREACH(ch_entry, cl, src_link) {
1069 		if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
1070 			printf("%s: could not read native metadata\n",
1071 			    DEVNAME(sc));
1072 			goto bad;
1073 		}
1074 
1075 		if (md->ssdi.ssd_magic == SR_MAGIC) {
1076 			sr++;
1077 			if (d == 0) {
1078 				bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
1079 				expected = md->ssdi.ssd_chunk_no;
1080 				continue;
1081 			} else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
1082 			    sizeof uuid)) {
1083 				printf("%s: not part of the same volume\n",
1084 				    DEVNAME(sc));
1085 				goto bad;
1086 			}
1087 		} else
1088 			not_sr++;
1089 
1090 		d++;
1091 	}
1092 
1093 	if (sr && not_sr) {
1094 		printf("%s: not all chunks are of the native metadata format\n",
1095 		     DEVNAME(sc));
1096 		goto bad;
1097 	}
1098 	if (expected != sr && !force && expected != -1) {
1099 		/* XXX make this smart so that we can bring up degraded disks */
1100 		printf("%s: not all chunks were provided\n", DEVNAME(sc));
1101 		goto bad;
1102 	}
1103 
1104 	rv = 0;
1105 bad:
1106 	if (md)
1107 		free(md, M_DEVBUF);
1108 	return (rv);
1109 }
1110 
1111 int
1112 sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
1113     struct sr_metadata *md, void *fm)
1114 {
1115 #ifdef SR_DEBUG
1116 	struct sr_softc		*sc = sd->sd_sc;
1117 #endif
1118 	DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
1119 	    DEVNAME(sc), dev, md);
1120 
1121 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1122 	    B_READ));
1123 }
1124 
1125 int
1126 sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
1127     struct sr_metadata *md, void *fm)
1128 {
1129 #ifdef SR_DEBUG
1130 	struct sr_softc		*sc = sd->sd_sc;
1131 #endif
1132 	DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
1133 	    DEVNAME(sc), dev, md);
1134 
1135 	return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
1136 	    B_WRITE));
1137 }
1138 
1139 struct scsi_adapter sr_switch = {
1140 	sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl
1141 };
1142 
1143 struct scsi_device sr_dev = {
1144 	NULL, NULL, NULL, NULL
1145 };
1146 
1147 int
1148 sr_match(struct device *parent, void *match, void *aux)
1149 {
1150 	return (1);
1151 }
1152 
1153 void
1154 sr_attach(struct device *parent, struct device *self, void *aux)
1155 {
1156 	struct sr_softc		*sc = (void *)self;
1157 
1158 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
1159 
1160 	rw_init(&sc->sc_lock, "sr_lock");
1161 
1162 	if (bio_register(&sc->sc_dev, sr_ioctl) != 0)
1163 		printf("%s: controller registration failed", DEVNAME(sc));
1164 	else
1165 		sc->sc_ioctl = sr_ioctl;
1166 
1167 	printf("\n");
1168 
1169 	sr_boot_assembly(sc);
1170 }
1171 
1172 int
1173 sr_detach(struct device *self, int flags)
1174 {
1175 	return (0);
1176 }
1177 
1178 int
1179 sr_activate(struct device *self, enum devact act)
1180 {
1181 	return (1);
1182 }
1183 
1184 void
1185 sr_minphys(struct buf *bp, struct scsi_link *sl)
1186 {
1187 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
1188 
1189 	/* XXX currently using SR_MAXFER = MAXPHYS */
1190 	if (bp->b_bcount > SR_MAXFER)
1191 		bp->b_bcount = SR_MAXFER;
1192 	minphys(bp);
1193 }
1194 
1195 void
1196 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
1197 {
1198 	size_t			copy_cnt;
1199 
1200 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
1201 	    xs, size);
1202 
1203 	if (xs->datalen) {
1204 		copy_cnt = MIN(size, xs->datalen);
1205 		bcopy(v, xs->data, copy_cnt);
1206 	}
1207 }
1208 
1209 int
1210 sr_ccb_alloc(struct sr_discipline *sd)
1211 {
1212 	struct sr_ccb		*ccb;
1213 	int			i;
1214 
1215 	if (!sd)
1216 		return (1);
1217 
1218 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
1219 
1220 	if (sd->sd_ccb)
1221 		return (1);
1222 
1223 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
1224 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
1225 	TAILQ_INIT(&sd->sd_ccb_freeq);
1226 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
1227 		ccb = &sd->sd_ccb[i];
1228 		ccb->ccb_dis = sd;
1229 		sr_ccb_put(ccb);
1230 	}
1231 
1232 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
1233 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
1234 
1235 	return (0);
1236 }
1237 
1238 void
1239 sr_ccb_free(struct sr_discipline *sd)
1240 {
1241 	struct sr_ccb		*ccb;
1242 
1243 	if (!sd)
1244 		return;
1245 
1246 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
1247 
1248 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
1249 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1250 
1251 	if (sd->sd_ccb)
1252 		free(sd->sd_ccb, M_DEVBUF);
1253 }
1254 
1255 struct sr_ccb *
1256 sr_ccb_get(struct sr_discipline *sd)
1257 {
1258 	struct sr_ccb		*ccb;
1259 	int			s;
1260 
1261 	s = splbio();
1262 
1263 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
1264 	if (ccb) {
1265 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
1266 		ccb->ccb_state = SR_CCB_INPROGRESS;
1267 	}
1268 
1269 	splx(s);
1270 
1271 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
1272 	    ccb);
1273 
1274 	return (ccb);
1275 }
1276 
1277 void
1278 sr_ccb_put(struct sr_ccb *ccb)
1279 {
1280 	struct sr_discipline	*sd = ccb->ccb_dis;
1281 	int			s;
1282 
1283 	DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
1284 	    ccb);
1285 
1286 	s = splbio();
1287 
1288 	ccb->ccb_wu = NULL;
1289 	ccb->ccb_state = SR_CCB_FREE;
1290 	ccb->ccb_target = -1;
1291 	ccb->ccb_opaque = NULL;
1292 
1293 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
1294 
1295 	splx(s);
1296 }
1297 
1298 int
1299 sr_wu_alloc(struct sr_discipline *sd)
1300 {
1301 	struct sr_workunit	*wu;
1302 	int			i, no_wu;
1303 
1304 	if (!sd)
1305 		return (1);
1306 
1307 	DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
1308 	    sd, sd->sd_max_wu);
1309 
1310 	if (sd->sd_wu)
1311 		return (1);
1312 
1313 	no_wu = sd->sd_max_wu;
1314 	sd->sd_wu_pending = no_wu;
1315 
1316 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
1317 	    M_DEVBUF, M_WAITOK | M_ZERO);
1318 	TAILQ_INIT(&sd->sd_wu_freeq);
1319 	TAILQ_INIT(&sd->sd_wu_pendq);
1320 	TAILQ_INIT(&sd->sd_wu_defq);
1321 	for (i = 0; i < no_wu; i++) {
1322 		wu = &sd->sd_wu[i];
1323 		wu->swu_dis = sd;
1324 		sr_wu_put(wu);
1325 	}
1326 
1327 	return (0);
1328 }
1329 
1330 void
1331 sr_wu_free(struct sr_discipline *sd)
1332 {
1333 	struct sr_workunit	*wu;
1334 
1335 	if (!sd)
1336 		return;
1337 
1338 	DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
1339 
1340 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
1341 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1342 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
1343 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
1344 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
1345 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
1346 
1347 	if (sd->sd_wu)
1348 		free(sd->sd_wu, M_DEVBUF);
1349 }
1350 
1351 void
1352 sr_wu_put(struct sr_workunit *wu)
1353 {
1354 	struct sr_discipline	*sd = wu->swu_dis;
1355 	struct sr_ccb		*ccb;
1356 
1357 	int			s;
1358 
1359 	DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
1360 
1361 	s = splbio();
1362 
1363 	wu->swu_xs = NULL;
1364 	wu->swu_state = SR_WU_FREE;
1365 	wu->swu_ios_complete = 0;
1366 	wu->swu_ios_failed = 0;
1367 	wu->swu_ios_succeeded = 0;
1368 	wu->swu_io_count = 0;
1369 	wu->swu_blk_start = 0;
1370 	wu->swu_blk_end = 0;
1371 	wu->swu_collider = NULL;
1372 	wu->swu_fake = 0;
1373 
1374 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
1375 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
1376 		sr_ccb_put(ccb);
1377 	}
1378 	TAILQ_INIT(&wu->swu_ccb);
1379 
1380 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
1381 	sd->sd_wu_pending--;
1382 
1383 	splx(s);
1384 }
1385 
1386 struct sr_workunit *
1387 sr_wu_get(struct sr_discipline *sd)
1388 {
1389 	struct sr_workunit	*wu;
1390 	int			s;
1391 
1392 	s = splbio();
1393 
1394 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
1395 	if (wu) {
1396 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
1397 		wu->swu_state = SR_WU_INPROGRESS;
1398 	}
1399 	sd->sd_wu_pending++;
1400 
1401 	splx(s);
1402 
1403 	DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
1404 
1405 	return (wu);
1406 }
1407 
1408 void
1409 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs)
1410 {
1411 	int			s;
1412 
1413 	DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs);
1414 
1415 	s = splbio();
1416 	scsi_done(xs);
1417 	splx(s);
1418 }
1419 
1420 int
1421 sr_scsi_cmd(struct scsi_xfer *xs)
1422 {
1423 	int			s;
1424 	struct scsi_link	*link = xs->sc_link;
1425 	struct sr_softc		*sc = link->adapter_softc;
1426 	struct sr_workunit	*wu;
1427 	struct sr_discipline	*sd;
1428 
1429 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p "
1430 	    "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags);
1431 
1432 	sd = sc->sc_dis[link->scsibus];
1433 	if (sd == NULL) {
1434 		s = splhigh();
1435 		sd = sc->sc_attach_dis;
1436 		splx(s);
1437 
1438 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n",
1439 		    DEVNAME(sc), sd);
1440 		if (sd == NULL) {
1441 			wu = NULL;
1442 			printf("%s: sr_scsi_cmd NULL discipline\n",
1443 			    DEVNAME(sc));
1444 			goto stuffup;
1445 		}
1446 	}
1447 
1448 	if (sd->sd_deleted) {
1449 		printf("%s: %s device is being deleted, failing io\n",
1450 		    DEVNAME(sc), sd->sd_meta->ssd_devname);
1451 		goto stuffup;
1452 	}
1453 
1454 	if ((wu = sr_wu_get(sd)) == NULL) {
1455 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc));
1456 		return (TRY_AGAIN_LATER);
1457 	}
1458 
1459 	xs->error = XS_NOERROR;
1460 	wu->swu_xs = xs;
1461 
1462 	/* the midlayer will query LUNs so report sense to stop scanning */
1463 	if (link->target != 0 || link->lun != 0) {
1464 		DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n",
1465 		    DEVNAME(sc), link->target, link->lun);
1466 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
1467 		    SSD_ERRCODE_VALID;
1468 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
1469 		sd->sd_scsi_sense.add_sense_code = 0x25;
1470 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
1471 		sd->sd_scsi_sense.extra_len = 4;
1472 		goto stuffup;
1473 	}
1474 
1475 	switch (xs->cmd->opcode) {
1476 	case READ_COMMAND:
1477 	case READ_BIG:
1478 	case READ_16:
1479 	case WRITE_COMMAND:
1480 	case WRITE_BIG:
1481 	case WRITE_16:
1482 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
1483 		    DEVNAME(sc), xs->cmd->opcode);
1484 		if (sd->sd_scsi_rw(wu))
1485 			goto stuffup;
1486 		break;
1487 
1488 	case SYNCHRONIZE_CACHE:
1489 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
1490 		    DEVNAME(sc));
1491 		if (sd->sd_scsi_sync(wu))
1492 			goto stuffup;
1493 		goto complete;
1494 
1495 	case TEST_UNIT_READY:
1496 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
1497 		    DEVNAME(sc));
1498 		if (sd->sd_scsi_tur(wu))
1499 			goto stuffup;
1500 		goto complete;
1501 
1502 	case START_STOP:
1503 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
1504 		    DEVNAME(sc));
1505 		if (sd->sd_scsi_start_stop(wu))
1506 			goto stuffup;
1507 		goto complete;
1508 
1509 	case INQUIRY:
1510 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
1511 		    DEVNAME(sc));
1512 		if (sd->sd_scsi_inquiry(wu))
1513 			goto stuffup;
1514 		goto complete;
1515 
1516 	case READ_CAPACITY:
1517 	case READ_CAPACITY_16:
1518 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
1519 		    DEVNAME(sc), xs->cmd->opcode);
1520 		if (sd->sd_scsi_read_cap(wu))
1521 			goto stuffup;
1522 		goto complete;
1523 
1524 	case REQUEST_SENSE:
1525 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
1526 		    DEVNAME(sc));
1527 		if (sd->sd_scsi_req_sense(wu))
1528 			goto stuffup;
1529 		goto complete;
1530 
1531 	default:
1532 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
1533 		    DEVNAME(sc), xs->cmd->opcode);
1534 		/* XXX might need to add generic function to handle others */
1535 		goto stuffup;
1536 	}
1537 
1538 	return (SUCCESSFULLY_QUEUED);
1539 stuffup:
1540 	if (sd->sd_scsi_sense.error_code) {
1541 		xs->error = XS_SENSE;
1542 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
1543 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
1544 	} else {
1545 		xs->error = XS_DRIVER_STUFFUP;
1546 		xs->flags |= ITSDONE;
1547 	}
1548 complete:
1549 	if (wu)
1550 		sr_wu_put(wu);
1551 	sr_scsi_done(sd, xs);
1552 	return (COMPLETE);
1553 }
1554 int
1555 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag,
1556     struct proc *p)
1557 {
1558 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
1559 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
1560 
1561 	return (sr_ioctl(link->adapter_softc, cmd, addr));
1562 }
1563 
1564 int
1565 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr)
1566 {
1567 	struct sr_softc		*sc = (struct sr_softc *)dev;
1568 	int			rv = 0;
1569 
1570 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc));
1571 
1572 	rw_enter_write(&sc->sc_lock);
1573 
1574 	switch (cmd) {
1575 	case BIOCINQ:
1576 		DNPRINTF(SR_D_IOCTL, "inq\n");
1577 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
1578 		break;
1579 
1580 	case BIOCVOL:
1581 		DNPRINTF(SR_D_IOCTL, "vol\n");
1582 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
1583 		break;
1584 
1585 	case BIOCDISK:
1586 		DNPRINTF(SR_D_IOCTL, "disk\n");
1587 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
1588 		break;
1589 
1590 	case BIOCALARM:
1591 		DNPRINTF(SR_D_IOCTL, "alarm\n");
1592 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
1593 		break;
1594 
1595 	case BIOCBLINK:
1596 		DNPRINTF(SR_D_IOCTL, "blink\n");
1597 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
1598 		break;
1599 
1600 	case BIOCSETSTATE:
1601 		DNPRINTF(SR_D_IOCTL, "setstate\n");
1602 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
1603 		break;
1604 
1605 	case BIOCCREATERAID:
1606 		DNPRINTF(SR_D_IOCTL, "createraid\n");
1607 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
1608 		break;
1609 
1610 	case BIOCDELETERAID:
1611 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
1612 		break;
1613 	default:
1614 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
1615 		rv = ENOTTY;
1616 	}
1617 
1618 	rw_exit_write(&sc->sc_lock);
1619 
1620 	return (rv);
1621 }
1622 
1623 int
1624 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
1625 {
1626 	int			i, vol, disk;
1627 
1628 	for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++)
1629 		/* XXX this will not work when we stagger disciplines */
1630 		if (sc->sc_dis[i]) {
1631 			vol++;
1632 			disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
1633 		}
1634 
1635 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
1636 	bi->bi_novol = vol;
1637 	bi->bi_nodisk = disk;
1638 
1639 	return (0);
1640 }
1641 
1642 int
1643 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
1644 {
1645 	int			i, vol, rv = EINVAL;
1646 	struct sr_discipline	*sd;
1647 
1648 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1649 		/* XXX this will not work when we stagger disciplines */
1650 		if (sc->sc_dis[i])
1651 			vol++;
1652 		if (vol != bv->bv_volid)
1653 			continue;
1654 
1655 		sd = sc->sc_dis[i];
1656 		bv->bv_status = sd->sd_vol_status;
1657 		bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
1658 		bv->bv_level = sd->sd_meta->ssdi.ssd_level;
1659 		bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
1660 		strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
1661 		    sizeof(bv->bv_dev));
1662 		strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
1663 		    sizeof(bv->bv_vendor));
1664 		rv = 0;
1665 		break;
1666 	}
1667 
1668 	return (rv);
1669 }
1670 
1671 int
1672 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
1673 {
1674 	int			i, vol, rv = EINVAL, id;
1675 	struct sr_chunk		*src;
1676 
1677 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1678 		/* XXX this will not work when we stagger disciplines */
1679 		if (sc->sc_dis[i])
1680 			vol++;
1681 		if (vol != bd->bd_volid)
1682 			continue;
1683 
1684 		id = bd->bd_diskid;
1685 		if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
1686 			break;
1687 
1688 		src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
1689 		bd->bd_status = src->src_meta.scm_status;
1690 		bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
1691 		bd->bd_channel = vol;
1692 		bd->bd_target = id;
1693 		strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
1694 		    sizeof(bd->bd_vendor));
1695 		rv = 0;
1696 		break;
1697 	}
1698 
1699 	return (rv);
1700 }
1701 
1702 int
1703 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
1704 {
1705 	int			rv = EINVAL;
1706 
1707 #ifdef SR_UNIT_TEST
1708 	int			i, vol, state;
1709 	struct sr_discipline	*sd;
1710 
1711 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
1712 		/* XXX this will not work when we stagger disciplines */
1713 		if (sc->sc_dis[i])
1714 			vol++;
1715 		if (vol != bs->bs_channel)
1716 			continue;
1717 
1718 		sd = sc->sc_dis[vol];
1719 		if (bs->bs_target >= sd->sd_meta->ssdi.ssd_chunk_no)
1720 			goto done;
1721 
1722 		switch (bs->bs_status) {
1723 		case BIOC_SSONLINE:
1724 			state = BIOC_SDONLINE;
1725 			break;
1726 		case BIOC_SSOFFLINE:
1727 			state = BIOC_SDOFFLINE;
1728 			break;
1729 		case BIOC_SSHOTSPARE:
1730 			state = BIOC_SDHOTSPARE;
1731 			break;
1732 		case BIOC_SSREBUILD:
1733 			state = BIOC_SDREBUILD;
1734 			break;
1735 		default:
1736 			printf("invalid state %d\n", bs->bs_status);
1737 			goto done;
1738 		}
1739 
1740 		printf("status change for %u:%u -> %u %u\n",
1741 		    bs->bs_channel, bs->bs_target, bs->bs_status, state);
1742 
1743 		sd->sd_set_chunk_state(sd, bs->bs_target, bs->bs_status);
1744 
1745 		rv = 0;
1746 
1747 		break;
1748 	}
1749 
1750 done:
1751 #endif
1752 	return (rv);
1753 }
1754 
1755 int
1756 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
1757 {
1758 	dev_t			*dt;
1759 	int			i, s, no_chunk, rv = EINVAL, vol;
1760 	int			no_meta, updatemeta = 0, disk = 1;
1761 	u_int64_t		vol_size;
1762 	int32_t			strip_size = 0;
1763 	struct sr_chunk_head	*cl;
1764 	struct sr_discipline	*sd = NULL;
1765 	struct sr_chunk		*ch_entry;
1766 	struct device		*dev, *dev2;
1767 	struct scsibus_attach_args saa;
1768 
1769 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
1770 	    DEVNAME(sc), user);
1771 
1772 	/* user input */
1773 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
1774 		goto unwind;
1775 
1776 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
1777 	if (user)
1778 		copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len);
1779 	else
1780 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
1781 
1782 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
1783 	sd->sd_sc = sc;
1784 
1785 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
1786 	cl = &sd->sd_vol.sv_chunk_list;
1787 	SLIST_INIT(cl);
1788 
1789 	/* we have a valid list now create an array index */
1790 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * no_chunk,
1791 	    M_DEVBUF, M_WAITOK | M_ZERO);
1792 
1793 	sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
1794 	if (sd->sd_meta_type == SR_META_F_INVALID) {
1795 		printf("%s: invalid metadata format\n", DEVNAME(sc));
1796 		goto unwind;
1797 	}
1798 
1799 	if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) {
1800 		printf("%s: can't attach metadata type %d\n", DEVNAME(sc),
1801 		    sd->sd_meta_type);
1802 		goto unwind;
1803 	}
1804 
1805 	/* force the raid volume by clearing metadata region */
1806 	if (bc->bc_flags & BIOC_SCFORCE) {
1807 		/* make sure disk isn't up and running */
1808 		if (sr_meta_read(sd))
1809 			if (sr_already_assembled(sd)) {
1810 				printf("%s: disk ", DEVNAME(sc));
1811 				sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1812 				printf(" is currently in use; can't force "
1813 				    "create\n");
1814 				goto unwind;
1815 			}
1816 
1817 		if (sr_meta_clear(sd)) {
1818 			printf("%s: failed to clear metadata\n", DEVNAME(sc));
1819 			goto unwind;
1820 		}
1821 	}
1822 
1823 	if ((no_meta = sr_meta_read(sd)) == 0) {
1824 		/* fill out all chunk metadata */
1825 		sr_meta_chunks_create(sc, cl);
1826 		ch_entry = SLIST_FIRST(cl);
1827 
1828 		/* no metadata available */
1829 		switch (bc->bc_level) {
1830 		case 0:
1831 			if (no_chunk < 2)
1832 				goto unwind;
1833 			strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name));
1834 			/*
1835 			 * XXX add variable strip size later even though
1836 			 * MAXPHYS is really the clever value, users like
1837 			 * to tinker with that type of stuff
1838 			 */
1839 			strip_size = MAXPHYS;
1840 			vol_size =
1841 			    ch_entry->src_meta.scmi.scm_coerced_size * no_chunk;
1842 			break;
1843 		case 1:
1844 			if (no_chunk < 2)
1845 				goto unwind;
1846 			strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
1847 			vol_size = ch_entry->src_meta.scmi.scm_coerced_size;
1848 			break;
1849 #ifdef AOE
1850 #ifdef not_yet
1851 		case 'A':
1852 			/* target */
1853 			if (no_chunk != 1)
1854 				goto unwind;
1855 			strlcpy(sd->sd_name, "AOE TARG", sizeof(sd->sd_name));
1856 			vol_size = ch_entry->src_meta.scmi.scm_coerced_size;
1857 			break;
1858 		case 'a':
1859 			/* initiator */
1860 			if (no_chunk != 1)
1861 				goto unwind;
1862 			strlcpy(sd->sd_name, "AOE INIT", sizeof(sd->sd_name));
1863 			break;
1864 #endif /* not_yet */
1865 #endif /* AOE */
1866 #ifdef CRYPTO
1867 		case 'C':
1868 			DNPRINTF(SR_D_IOCTL,
1869 			    "%s: sr_ioctl_createraid: no_chunk %d\n",
1870 			    DEVNAME(sc), no_chunk);
1871 
1872 			if (no_chunk != 1)
1873 				goto unwind;
1874 
1875 			/* no hint available yet */
1876 			if (bc->bc_opaque_flags & BIOC_SOOUT) {
1877 				bc->bc_opaque_status = BIOC_SOINOUT_FAILED;
1878 				rv = 0;
1879 				goto unwind;
1880 			}
1881 
1882 			if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE))
1883 				goto unwind;
1884 
1885 			if (sr_crypto_get_kdf(bc, sd))
1886 				goto unwind;
1887 
1888 			strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name));
1889 			vol_size = ch_entry->src_meta.scmi.scm_size;
1890 
1891 			sr_crypto_create_keys(sd);
1892 
1893 			break;
1894 #endif /* CRYPTO */
1895 		default:
1896 			goto unwind;
1897 		}
1898 
1899 		/* fill out all volume metadata */
1900 		DNPRINTF(SR_D_IOCTL,
1901 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
1902 		    DEVNAME(sc), vol_size);
1903 		sd->sd_meta->ssdi.ssd_chunk_no = no_chunk;
1904 		sd->sd_meta->ssdi.ssd_size = vol_size;
1905 		sd->sd_vol_status = BIOC_SVONLINE;
1906 		sd->sd_meta->ssdi.ssd_level = bc->bc_level;
1907 		sd->sd_meta->ssdi.ssd_strip_size = strip_size;
1908 		strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD",
1909 		    sizeof(sd->sd_meta->ssdi.ssd_vendor));
1910 		snprintf(sd->sd_meta->ssdi.ssd_product,
1911 		    sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s",
1912 		    sd->sd_name);
1913 		snprintf(sd->sd_meta->ssdi.ssd_revision,
1914 		    sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d",
1915 		    SR_META_VERSION);
1916 
1917 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
1918 		updatemeta = 1;
1919 	} else if (no_meta == no_chunk) {
1920 		if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
1921 			printf("%s: %s was not shutdown properly\n",
1922 			    DEVNAME(sc), sd->sd_meta->ssd_devname);
1923 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
1924 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
1925 			    "metadata\n", DEVNAME(sc));
1926 			goto unwind;
1927 		}
1928 		if (sr_already_assembled(sd)) {
1929 			printf("%s: disk ", DEVNAME(sc));
1930 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1931 			printf(" already assembled\n");
1932 			goto unwind;
1933 		}
1934 #ifdef CRYPTO
1935 		/* provide userland with kdf hint */
1936 		if (bc->bc_opaque_flags & BIOC_SOOUT) {
1937 			if (bc->bc_opaque == NULL)
1938 				goto unwind;
1939 
1940 			if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) <
1941 			    bc->bc_opaque_size)
1942 				goto unwind;
1943 
1944 			if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint,
1945 			    bc->bc_opaque, bc->bc_opaque_size))
1946 				goto unwind;
1947 
1948 			/* we're done */
1949 			bc->bc_opaque_status = BIOC_SOINOUT_OK;
1950 			rv = 0;
1951 			goto unwind;
1952 		}
1953 		/* get kdf with maskkey from userland */
1954 		if (bc->bc_opaque_flags & BIOC_SOIN) {
1955 			if (sr_crypto_get_kdf(bc, sd))
1956 				goto unwind;
1957 		}
1958 #endif	/* CRYPTO */
1959 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
1960 		    DEVNAME(sc));
1961 		updatemeta = 0;
1962 	} else if (no_meta == -1) {
1963 		printf("%s: one of the chunks has corrupt metadata; aborting "
1964 		    "assembly\n", DEVNAME(sc));
1965 		goto unwind;
1966 	} else {
1967 		if (sr_already_assembled(sd)) {
1968 			printf("%s: disk ", DEVNAME(sc));
1969 			sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
1970 			printf(" already assembled; will not partial "
1971 			    "assemble it\n");
1972 			goto unwind;
1973 		}
1974 		printf("%s: not yet partial bringup\n", DEVNAME(sc));
1975 		goto unwind;
1976 	}
1977 
1978 	/* XXX metadata SHALL be fully filled in at this point */
1979 
1980 	switch (bc->bc_level) {
1981 	case 0:
1982 		/* fill out discipline members */
1983 		sd->sd_type = SR_MD_RAID0;
1984 		sd->sd_max_ccb_per_wu =
1985 		    (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) *
1986 		    SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no;
1987 		sd->sd_max_wu = SR_RAID0_NOWU;
1988 
1989 		/* setup discipline pointers */
1990 		sd->sd_alloc_resources = sr_raid0_alloc_resources;
1991 		sd->sd_free_resources = sr_raid0_free_resources;
1992 		sd->sd_start_discipline = NULL;
1993 		sd->sd_scsi_inquiry = sr_raid_inquiry;
1994 		sd->sd_scsi_read_cap = sr_raid_read_cap;
1995 		sd->sd_scsi_tur = sr_raid_tur;
1996 		sd->sd_scsi_req_sense = sr_raid_request_sense;
1997 		sd->sd_scsi_start_stop = sr_raid_start_stop;
1998 		sd->sd_scsi_sync = sr_raid_sync;
1999 		sd->sd_scsi_rw = sr_raid0_rw;
2000 		sd->sd_set_chunk_state = sr_raid0_set_chunk_state;
2001 		sd->sd_set_vol_state = sr_raid0_set_vol_state;
2002 		break;
2003 	case 1:
2004 		/* fill out discipline members */
2005 		sd->sd_type = SR_MD_RAID1;
2006 		sd->sd_max_ccb_per_wu = no_chunk;
2007 		sd->sd_max_wu = SR_RAID1_NOWU;
2008 
2009 		/* setup discipline pointers */
2010 		sd->sd_alloc_resources = sr_raid1_alloc_resources;
2011 		sd->sd_free_resources = sr_raid1_free_resources;
2012 		sd->sd_start_discipline = NULL;
2013 		sd->sd_scsi_inquiry = sr_raid_inquiry;
2014 		sd->sd_scsi_read_cap = sr_raid_read_cap;
2015 		sd->sd_scsi_tur = sr_raid_tur;
2016 		sd->sd_scsi_req_sense = sr_raid_request_sense;
2017 		sd->sd_scsi_start_stop = sr_raid_start_stop;
2018 		sd->sd_scsi_sync = sr_raid_sync;
2019 		sd->sd_scsi_rw = sr_raid1_rw;
2020 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
2021 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
2022 		break;
2023 #ifdef AOE
2024 	/* target */
2025 	case 'A':
2026 		/* fill out discipline members */
2027 		sd->sd_type = SR_MD_AOE_TARG;
2028 		sd->sd_max_ccb_per_wu = no_chunk;
2029 		sd->sd_max_wu = SR_RAIDAOE_NOWU;
2030 
2031 		/* setup discipline pointers */
2032 		sd->sd_alloc_resources = sr_aoe_server_alloc_resources;
2033 		sd->sd_free_resources = sr_aoe_server_free_resources;
2034 		sd->sd_start_discipline = sr_aoe_server_start;
2035 		sd->sd_scsi_inquiry = NULL;
2036 		sd->sd_scsi_read_cap = NULL;
2037 		sd->sd_scsi_tur = NULL;
2038 		sd->sd_scsi_req_sense = NULL;
2039 		sd->sd_scsi_start_stop = NULL;
2040 		sd->sd_scsi_sync = NULL;
2041 		sd->sd_scsi_rw = NULL;
2042 		sd->sd_set_chunk_state = NULL;
2043 		sd->sd_set_vol_state = NULL;
2044 		disk = 0; /* we are not a disk */
2045 		break;
2046 	case 'a':
2047 		/* initiator */
2048 		/* fill out discipline members */
2049 		sd->sd_type = SR_MD_AOE_INIT;
2050 		sd->sd_max_ccb_per_wu = no_chunk;
2051 		sd->sd_max_wu = SR_RAIDAOE_NOWU;
2052 
2053 		/* setup discipline pointers */
2054 		sd->sd_alloc_resources = sr_aoe_alloc_resources;
2055 		sd->sd_free_resources = sr_aoe_free_resources;
2056 		sd->sd_start_discipline = NULL;
2057 		sd->sd_scsi_inquiry = sr_raid_inquiry;
2058 		sd->sd_scsi_read_cap = sr_raid_read_cap;
2059 		sd->sd_scsi_tur = sr_raid_tur;
2060 		sd->sd_scsi_req_sense = sr_raid_request_sense;
2061 		sd->sd_scsi_start_stop = sr_raid_start_stop;
2062 		sd->sd_scsi_sync = sr_raid_sync;
2063 		sd->sd_scsi_rw = sr_aoe_rw;
2064 		/* XXX reuse raid 1 functions for now FIXME */
2065 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
2066 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
2067 		break;
2068 #endif
2069 #ifdef CRYPTO
2070 	case 'C':
2071 		/* fill out discipline members */
2072 		sd->sd_type = SR_MD_CRYPTO;
2073 		sd->sd_max_ccb_per_wu = no_chunk;
2074 		sd->sd_max_wu = SR_CRYPTO_NOWU;
2075 
2076 		/* setup discipline pointers */
2077 		sd->sd_alloc_resources = sr_crypto_alloc_resources;
2078 		sd->sd_free_resources = sr_crypto_free_resources;
2079 		sd->sd_start_discipline = NULL;
2080 		sd->sd_scsi_inquiry = sr_raid_inquiry;
2081 		sd->sd_scsi_read_cap = sr_raid_read_cap;
2082 		sd->sd_scsi_tur = sr_raid_tur;
2083 		sd->sd_scsi_req_sense = sr_raid_request_sense;
2084 		sd->sd_scsi_start_stop = sr_raid_start_stop;
2085 		sd->sd_scsi_sync = sr_raid_sync;
2086 		sd->sd_scsi_rw = sr_crypto_rw;
2087 		/* XXX reuse raid 1 functions for now FIXME */
2088 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
2089 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
2090 		break;
2091 #endif
2092 	default:
2093 		goto unwind;
2094 	}
2095 
2096 	/* allocate all resources */
2097 	if ((rv = sd->sd_alloc_resources(sd)))
2098 		goto unwind;
2099 
2100 	if (disk) {
2101 		/* setup scsi midlayer */
2102 		sd->sd_link.openings = sd->sd_max_wu;
2103 		sd->sd_link.device = &sr_dev;
2104 		sd->sd_link.device_softc = sc;
2105 		sd->sd_link.adapter_softc = sc;
2106 		sd->sd_link.adapter = &sr_switch;
2107 		sd->sd_link.adapter_target = SR_MAX_LD;
2108 		sd->sd_link.adapter_buswidth = 1;
2109 		bzero(&saa, sizeof(saa));
2110 		saa.saa_sc_link = &sd->sd_link;
2111 
2112 		/*
2113 		 * we passed all checks return ENXIO if volume can't be created
2114 		 */
2115 		rv = ENXIO;
2116 
2117 		/* clear sense data */
2118 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2119 
2120 		/* use temporary discipline pointer */
2121 		s = splhigh();
2122 		sc->sc_attach_dis = sd;
2123 		splx(s);
2124 		dev2 = config_found(&sc->sc_dev, &saa, scsiprint);
2125 		s = splhigh();
2126 		sc->sc_attach_dis = NULL;
2127 		splx(s);
2128 		TAILQ_FOREACH(dev, &alldevs, dv_list)
2129 			if (dev->dv_parent == dev2)
2130 				break;
2131 		if (dev == NULL)
2132 			goto unwind;
2133 
2134 		DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n",
2135 		    DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus);
2136 
2137 		sc->sc_dis[sd->sd_link.scsibus] = sd;
2138 		for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++)
2139 			if (sc->sc_dis[i])
2140 				vol++;
2141 		sd->sd_scsibus_dev = dev2;
2142 
2143 		rv = 0;
2144 		if (updatemeta) {
2145 			/* fill out remaining volume metadata */
2146 			sd->sd_meta->ssdi.ssd_volid = vol;
2147 			strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
2148 			    sizeof(sd->sd_meta->ssd_devname));
2149 			sr_meta_init(sd, cl);
2150 		} else {
2151 			if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
2152 			    sizeof(dev->dv_xname))) {
2153 				printf("%s: volume %s is roaming, it used to "
2154 				    "be %s, updating metadata\n",
2155 				    DEVNAME(sc), dev->dv_xname,
2156 				    sd->sd_meta->ssd_devname);
2157 
2158 				sd->sd_meta->ssdi.ssd_volid = vol;
2159 				strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
2160 				    sizeof(sd->sd_meta->ssd_devname));
2161 			}
2162 		}
2163 #ifndef SMALL_KERNEL
2164 		if (sr_sensors_create(sd))
2165 			printf("%s: unable to create sensor for %s\n",
2166 			    DEVNAME(sc), dev->dv_xname);
2167 		else
2168 			sd->sd_vol.sv_sensor_valid = 1;
2169 #endif /* SMALL_KERNEL */
2170 	} else {
2171 		/* we are not an os disk */
2172 		if (updatemeta) {
2173 			/* fill out remaining volume metadata */
2174 			sd->sd_meta->ssdi.ssd_volid = 0;
2175 			strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname,
2176 			    sizeof(sd->sd_meta->ssd_devname));
2177 			sr_meta_init(sd, cl);
2178 		}
2179 		if (sd->sd_start_discipline(sd))
2180 			goto unwind;
2181 	}
2182 
2183 	/* save metadata to disk */
2184 	rv = sr_meta_save(sd, SR_META_DIRTY);
2185 	sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd);
2186 
2187 	return (rv);
2188 unwind:
2189 	sr_discipline_shutdown(sd);
2190 
2191 	return (rv);
2192 }
2193 
2194 int
2195 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
2196 {
2197 	struct sr_discipline	*sd = NULL;
2198 	int			rv = 1;
2199 	int			i;
2200 
2201 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
2202 	    dr->bd_dev);
2203 
2204 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2205 		if (sc->sc_dis[i]) {
2206 			if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname,
2207 			    dr->bd_dev,
2208 			    sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
2209 				sd = sc->sc_dis[i];
2210 				break;
2211 			}
2212 		}
2213 
2214 	if (sd == NULL)
2215 		goto bad;
2216 
2217 	sd->sd_deleted = 1;
2218 	sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE;
2219 	sr_shutdown(sd);
2220 
2221 	rv = 0;
2222 bad:
2223 	return (rv);
2224 }
2225 
2226 void
2227 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
2228 {
2229 	struct sr_chunk		*ch_entry, *ch_next;
2230 	dev_t			dev;
2231 
2232 	DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
2233 
2234 	if (!cl)
2235 		return;
2236 
2237 	for (ch_entry = SLIST_FIRST(cl);
2238 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
2239 		ch_next = SLIST_NEXT(ch_entry, src_link);
2240 
2241 		dev = ch_entry->src_dev_mm;
2242 		DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
2243 		    DEVNAME(sc), ch_entry->src_devname);
2244 		if (dev != NODEV)
2245 			bdevsw_lookup(dev)->d_close(dev, FWRITE, S_IFBLK,
2246 			    curproc);
2247 
2248 		free(ch_entry, M_DEVBUF);
2249 	}
2250 	SLIST_INIT(cl);
2251 }
2252 
2253 void
2254 sr_discipline_free(struct sr_discipline *sd)
2255 {
2256 	struct sr_softc		*sc = sd->sd_sc;
2257 	int			i;
2258 
2259 	if (!sd)
2260 		return;
2261 
2262 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
2263 	    DEVNAME(sc),
2264 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
2265 	if (sd->sd_free_resources)
2266 		sd->sd_free_resources(sd);
2267 	if (sd->sd_vol.sv_chunks)
2268 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
2269 	if (sd->sd_meta)
2270 		free(sd->sd_meta, M_DEVBUF);
2271 	if (sd->sd_meta_foreign)
2272 		free(sd->sd_meta_foreign, M_DEVBUF);
2273 
2274 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2275 		if (sc->sc_dis[i] == sd) {
2276 			sc->sc_dis[i] = NULL;
2277 			break;
2278 		}
2279 
2280 	free(sd, M_DEVBUF);
2281 }
2282 
2283 void
2284 sr_discipline_shutdown(struct sr_discipline *sd)
2285 {
2286 	struct sr_softc		*sc = sd->sd_sc;
2287 	int			s;
2288 
2289 	if (!sd || !sc)
2290 		return;
2291 
2292 	DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
2293 	    sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
2294 
2295 	s = splbio();
2296 
2297 	if (sd->sd_shutdownhook)
2298 		shutdownhook_disestablish(sd->sd_shutdownhook);
2299 
2300 	/* make sure there isn't a sync pending and yield */
2301 	wakeup(sd);
2302 	while (sd->sd_sync || sd->sd_must_flush)
2303 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
2304 		    EWOULDBLOCK)
2305 			break;
2306 
2307 #ifndef SMALL_KERNEL
2308 	sr_sensors_delete(sd);
2309 #endif /* SMALL_KERNEL */
2310 
2311 	if (sd->sd_scsibus_dev)
2312 		config_detach(sd->sd_scsibus_dev, DETACH_FORCE);
2313 
2314 	sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
2315 
2316 	if (sd)
2317 		sr_discipline_free(sd);
2318 
2319 	splx(s);
2320 }
2321 
2322 int
2323 sr_raid_inquiry(struct sr_workunit *wu)
2324 {
2325 	struct sr_discipline	*sd = wu->swu_dis;
2326 	struct scsi_xfer	*xs = wu->swu_xs;
2327 	struct scsi_inquiry_data inq;
2328 
2329 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
2330 
2331 	bzero(&inq, sizeof(inq));
2332 	inq.device = T_DIRECT;
2333 	inq.dev_qual2 = 0;
2334 	inq.version = 2;
2335 	inq.response_format = 2;
2336 	inq.additional_length = 32;
2337 	strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
2338 	    sizeof(inq.vendor));
2339 	strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
2340 	    sizeof(inq.product));
2341 	strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
2342 	    sizeof(inq.revision));
2343 	sr_copy_internal_data(xs, &inq, sizeof(inq));
2344 
2345 	return (0);
2346 }
2347 
2348 int
2349 sr_raid_read_cap(struct sr_workunit *wu)
2350 {
2351 	struct sr_discipline	*sd = wu->swu_dis;
2352 	struct scsi_xfer	*xs = wu->swu_xs;
2353 	struct scsi_read_cap_data rcd;
2354 	struct scsi_read_cap_data_16 rcd16;
2355 	int			rv = 1;
2356 
2357 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
2358 
2359 	if (xs->cmd->opcode == READ_CAPACITY) {
2360 		bzero(&rcd, sizeof(rcd));
2361 		if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu)
2362 			_lto4b(0xffffffff, rcd.addr);
2363 		else
2364 			_lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr);
2365 		_lto4b(512, rcd.length);
2366 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
2367 		rv = 0;
2368 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
2369 		bzero(&rcd16, sizeof(rcd16));
2370 		_lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr);
2371 		_lto4b(512, rcd16.length);
2372 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
2373 		rv = 0;
2374 	}
2375 
2376 	return (rv);
2377 }
2378 
2379 int
2380 sr_raid_tur(struct sr_workunit *wu)
2381 {
2382 	struct sr_discipline	*sd = wu->swu_dis;
2383 
2384 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
2385 
2386 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2387 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
2388 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
2389 		sd->sd_scsi_sense.add_sense_code = 0x04;
2390 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
2391 		sd->sd_scsi_sense.extra_len = 4;
2392 		return (1);
2393 	} else if (sd->sd_vol_status == BIOC_SVINVALID) {
2394 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
2395 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
2396 		sd->sd_scsi_sense.add_sense_code = 0x05;
2397 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
2398 		sd->sd_scsi_sense.extra_len = 4;
2399 		return (1);
2400 	}
2401 
2402 	return (0);
2403 }
2404 
2405 int
2406 sr_raid_request_sense(struct sr_workunit *wu)
2407 {
2408 	struct sr_discipline	*sd = wu->swu_dis;
2409 	struct scsi_xfer	*xs = wu->swu_xs;
2410 
2411 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
2412 	    DEVNAME(sd->sd_sc));
2413 
2414 	/* use latest sense data */
2415 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
2416 
2417 	/* clear sense data */
2418 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
2419 
2420 	return (0);
2421 }
2422 
2423 int
2424 sr_raid_start_stop(struct sr_workunit *wu)
2425 {
2426 	struct sr_discipline	*sd = wu->swu_dis;
2427 	struct scsi_xfer	*xs = wu->swu_xs;
2428 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
2429 	int			rv = 1;
2430 
2431 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
2432 	    DEVNAME(sd->sd_sc));
2433 
2434 	if (!ss)
2435 		return (rv);
2436 
2437 	if (ss->byte2 == 0x00) {
2438 		/* START */
2439 		if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2440 			/* bring volume online */
2441 			/* XXX check to see if volume can be brought online */
2442 			sd->sd_vol_status = BIOC_SVONLINE;
2443 		}
2444 		rv = 0;
2445 	} else /* XXX is this the check? if (byte == 0x01) */ {
2446 		/* STOP */
2447 		if (sd->sd_vol_status == BIOC_SVONLINE) {
2448 			/* bring volume offline */
2449 			sd->sd_vol_status = BIOC_SVOFFLINE;
2450 		}
2451 		rv = 0;
2452 	}
2453 
2454 	return (rv);
2455 }
2456 
2457 int
2458 sr_raid_sync(struct sr_workunit *wu)
2459 {
2460 	struct sr_discipline	*sd = wu->swu_dis;
2461 	int			s, rv = 0, ios;
2462 
2463 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
2464 
2465 	/* when doing a fake sync don't coun't the wu */
2466 	ios = wu->swu_fake ? 0 : 1;
2467 
2468 	s = splbio();
2469 	sd->sd_sync = 1;
2470 
2471 	while (sd->sd_wu_pending > ios)
2472 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
2473 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
2474 			    DEVNAME(sd->sd_sc));
2475 			rv = 1;
2476 			break;
2477 		}
2478 
2479 	sd->sd_sync = 0;
2480 	splx(s);
2481 
2482 	wakeup(&sd->sd_sync);
2483 
2484 	return (rv);
2485 }
2486 
2487 void
2488 sr_raid_startwu(struct sr_workunit *wu)
2489 {
2490 	struct sr_discipline	*sd = wu->swu_dis;
2491 	struct sr_ccb		*ccb;
2492 
2493 	splassert(IPL_BIO);
2494 
2495 	if (wu->swu_state == SR_WU_RESTART)
2496 		/*
2497 		 * no need to put the wu on the pending queue since we
2498 		 * are restarting the io
2499 		 */
2500 		 ;
2501 	else
2502 		/* move wu to pending queue */
2503 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
2504 
2505 	/* start all individual ios */
2506 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
2507 		bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf);
2508 	}
2509 }
2510 
2511 void
2512 sr_checksum_print(u_int8_t *md5)
2513 {
2514 	int			i;
2515 
2516 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
2517 		printf("%02x", md5[i]);
2518 }
2519 
2520 void
2521 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
2522 {
2523 	MD5_CTX			ctx;
2524 
2525 	DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
2526 	    md5, len);
2527 
2528 	MD5Init(&ctx);
2529 	MD5Update(&ctx, src, len);
2530 	MD5Final(md5, &ctx);
2531 }
2532 
2533 void
2534 sr_uuid_get(struct sr_uuid *uuid)
2535 {
2536 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
2537 	/* UUID version 4: random */
2538 	uuid->sui_id[6] &= 0x0f;
2539 	uuid->sui_id[6] |= 0x40;
2540 	/* RFC4122 variant */
2541 	uuid->sui_id[8] &= 0x3f;
2542 	uuid->sui_id[8] |= 0x80;
2543 }
2544 
2545 void
2546 sr_uuid_print(struct sr_uuid *uuid, int cr)
2547 {
2548 	printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
2549 	    "%02x%02x%02x%02x%02x%02x",
2550 	    uuid->sui_id[0], uuid->sui_id[1],
2551 	    uuid->sui_id[2], uuid->sui_id[3],
2552 	    uuid->sui_id[4], uuid->sui_id[5],
2553 	    uuid->sui_id[6], uuid->sui_id[7],
2554 	    uuid->sui_id[8], uuid->sui_id[9],
2555 	    uuid->sui_id[10], uuid->sui_id[11],
2556 	    uuid->sui_id[12], uuid->sui_id[13],
2557 	    uuid->sui_id[14], uuid->sui_id[15]);
2558 
2559 	if (cr)
2560 		printf("\n");
2561 }
2562 
2563 int
2564 sr_already_assembled(struct sr_discipline *sd)
2565 {
2566 	struct sr_softc		*sc = sd->sd_sc;
2567 	int			i;
2568 
2569 	for (i = 0; i < SR_MAXSCSIBUS; i++)
2570 		if (sc->sc_dis[i])
2571 			if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
2572 			    &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
2573 			    sizeof(sd->sd_meta->ssdi.ssd_uuid)))
2574 				return (1);
2575 
2576 	return (0);
2577 }
2578 
2579 int32_t
2580 sr_validate_stripsize(u_int32_t b)
2581 {
2582 	int			s = 0;
2583 
2584 	if (b % 512)
2585 		return (-1);
2586 
2587 	while ((b & 1) == 0) {
2588 		b >>= 1;
2589 		s++;
2590 	}
2591 
2592 	/* only multiple of twos */
2593 	b >>= 1;
2594 	if (b)
2595 		return(-1);
2596 
2597 	return (s);
2598 }
2599 
2600 void
2601 sr_shutdown(void *arg)
2602 {
2603 	struct sr_discipline	*sd = arg;
2604 #ifdef SR_DEBUG
2605 	struct sr_softc		*sc = sd->sd_sc;
2606 #endif
2607 	DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n",
2608 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
2609 
2610 	sr_meta_save(sd, 0);
2611 
2612 	sr_discipline_shutdown(sd);
2613 }
2614 
2615 int
2616 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
2617 {
2618 	struct sr_discipline	*sd = wu->swu_dis;
2619 	struct scsi_xfer	*xs = wu->swu_xs;
2620 	int			rv = 1;
2621 
2622 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
2623 	    xs->cmd->opcode);
2624 
2625 	if (sd->sd_vol_status == BIOC_SVOFFLINE) {
2626 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
2627 		    DEVNAME(sd->sd_sc), func);
2628 		goto bad;
2629 	}
2630 
2631 	if (xs->datalen == 0) {
2632 		printf("%s: %s: illegal block count for %s\n",
2633 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
2634 		goto bad;
2635 	}
2636 
2637 	if (xs->cmdlen == 10)
2638 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
2639 	else if (xs->cmdlen == 16)
2640 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
2641 	else if (xs->cmdlen == 6)
2642 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
2643 	else {
2644 		printf("%s: %s: illegal cmdlen for %s\n",
2645 		    DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
2646 		goto bad;
2647 	}
2648 
2649 	wu->swu_blk_start = *blk;
2650 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
2651 
2652 	if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
2653 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
2654 		    "end: %lld length: %d\n",
2655 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
2656 		    wu->swu_blk_end, xs->datalen);
2657 
2658 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
2659 		    SSD_ERRCODE_VALID;
2660 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
2661 		sd->sd_scsi_sense.add_sense_code = 0x21;
2662 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
2663 		sd->sd_scsi_sense.extra_len = 4;
2664 		goto bad;
2665 	}
2666 
2667 	rv = 0;
2668 bad:
2669 	return (rv);
2670 }
2671 
2672 int
2673 sr_check_io_collision(struct sr_workunit *wu)
2674 {
2675 	struct sr_discipline	*sd = wu->swu_dis;
2676 	struct sr_workunit	*wup;
2677 
2678 	splassert(IPL_BIO);
2679 
2680 	/* walk queue backwards and fill in collider if we have one */
2681 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
2682 		if (wu->swu_blk_end < wup->swu_blk_start ||
2683 		    wup->swu_blk_end < wu->swu_blk_start)
2684 			continue;
2685 
2686 		/* we have an LBA collision, defer wu */
2687 		wu->swu_state = SR_WU_DEFERRED;
2688 		if (wup->swu_collider)
2689 			/* wu is on deferred queue, append to last wu */
2690 			while (wup->swu_collider)
2691 				wup = wup->swu_collider;
2692 
2693 		wup->swu_collider = wu;
2694 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
2695 		sd->sd_wu_collisions++;
2696 		goto queued;
2697 	}
2698 
2699 	return (0);
2700 queued:
2701 	return (1);
2702 }
2703 
2704 #ifndef SMALL_KERNEL
2705 int
2706 sr_sensors_create(struct sr_discipline *sd)
2707 {
2708 	struct sr_softc		*sc = sd->sd_sc;
2709 	int			rv = 1;
2710 
2711 	DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
2712 	    DEVNAME(sc), sd->sd_meta->ssd_devname);
2713 
2714 	strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc),
2715 	    sizeof(sd->sd_vol.sv_sensordev.xname));
2716 
2717 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
2718 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
2719 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
2720 	    sizeof(sd->sd_vol.sv_sensor.desc));
2721 
2722 	sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor);
2723 
2724 	if (sc->sc_sensors_running == 0) {
2725 		if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL)
2726 			goto bad;
2727 		sc->sc_sensors_running = 1;
2728 	}
2729 	sensordev_install(&sd->sd_vol.sv_sensordev);
2730 
2731 	rv = 0;
2732 bad:
2733 	return (rv);
2734 }
2735 
2736 void
2737 sr_sensors_delete(struct sr_discipline *sd)
2738 {
2739 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc));
2740 
2741 	if (sd->sd_vol.sv_sensor_valid)
2742 		sensordev_deinstall(&sd->sd_vol.sv_sensordev);
2743 }
2744 
2745 void
2746 sr_sensors_refresh(void *arg)
2747 {
2748 	struct sr_softc		*sc = arg;
2749 	struct sr_volume	*sv;
2750 	struct sr_discipline	*sd;
2751 	int			i, vol;
2752 
2753 	DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
2754 
2755 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2756 		/* XXX this will not work when we stagger disciplines */
2757 		if (!sc->sc_dis[i])
2758 			continue;
2759 
2760 		sd = sc->sc_dis[i];
2761 		sv = &sd->sd_vol;
2762 
2763 		switch(sd->sd_vol_status) {
2764 		case BIOC_SVOFFLINE:
2765 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
2766 			sv->sv_sensor.status = SENSOR_S_CRIT;
2767 			break;
2768 
2769 		case BIOC_SVDEGRADED:
2770 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
2771 			sv->sv_sensor.status = SENSOR_S_WARN;
2772 			break;
2773 
2774 		case BIOC_SVSCRUB:
2775 		case BIOC_SVONLINE:
2776 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
2777 			sv->sv_sensor.status = SENSOR_S_OK;
2778 			break;
2779 
2780 		default:
2781 			sv->sv_sensor.value = 0; /* unknown */
2782 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
2783 		}
2784 	}
2785 }
2786 #endif /* SMALL_KERNEL */
2787 
2788 #ifdef SR_FANCY_STATS
2789 void				sr_print_stats(void);
2790 
2791 void
2792 sr_print_stats(void)
2793 {
2794 	struct sr_softc		*sc;
2795 	struct sr_discipline	*sd;
2796 	int			i, vol;
2797 
2798 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
2799 		if (softraid_cd.cd_devs[i]) {
2800 			sc = softraid_cd.cd_devs[i];
2801 			/* we'll only have one softc */
2802 			break;
2803 		}
2804 
2805 	if (!sc) {
2806 		printf("no softraid softc found\n");
2807 		return;
2808 	}
2809 
2810 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2811 		/* XXX this will not work when we stagger disciplines */
2812 		if (!sc->sc_dis[i])
2813 			continue;
2814 
2815 		sd = sc->sc_dis[i];
2816 		printf("%s: ios pending: %d  collisions %llu\n",
2817 		    sd->sd_meta->ssd_devname,
2818 		    sd->sd_wu_pending,
2819 		    sd->sd_wu_collisions);
2820 	}
2821 }
2822 #endif /* SR_FANCY_STATS */
2823 
2824 #ifdef SR_DEBUG
2825 void
2826 sr_meta_print(struct sr_metadata *m)
2827 {
2828 	int			i;
2829 	struct sr_meta_chunk	*mc;
2830 	struct sr_meta_opt	*mo;
2831 
2832 	if (!(sr_debug & SR_D_META))
2833 		return;
2834 
2835 	printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
2836 	printf("\tssd_version %d\n", m->ssdi.ssd_version);
2837 	printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags);
2838 	printf("\tssd_uuid ");
2839 	sr_uuid_print(&m->ssdi.ssd_uuid, 1);
2840 	printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
2841 	printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
2842 	printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
2843 	printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
2844 	printf("\tssd_level %d\n", m->ssdi.ssd_level);
2845 	printf("\tssd_size %lld\n", m->ssdi.ssd_size);
2846 	printf("\tssd_devname %s\n", m->ssd_devname);
2847 	printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
2848 	printf("\tssd_product %s\n", m->ssdi.ssd_product);
2849 	printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
2850 	printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
2851 	printf("\tssd_checksum ");
2852 	sr_checksum_print(m->ssd_checksum);
2853 	printf("\n");
2854 	printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
2855 	printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
2856 
2857 	mc = (struct sr_meta_chunk *)(m + 1);
2858 	for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
2859 		printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
2860 		printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
2861 		printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
2862 		printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
2863 		printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
2864 		printf("\t\tscm_uuid ");
2865 		sr_uuid_print(&mc->scmi.scm_uuid, 1);
2866 		printf("\t\tscm_checksum ");
2867 		sr_checksum_print(mc->scm_checksum);
2868 		printf("\n");
2869 		printf("\t\tscm_status %d\n", mc->scm_status);
2870 	}
2871 
2872 	mo = (struct sr_meta_opt *)(mc);
2873 	for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) {
2874 		printf("\t\t\tsom_type %d\n", mo->somi.som_type);
2875 		printf("\t\t\tsom_checksum ");
2876 		sr_checksum_print(mo->som_checksum);
2877 		printf("\n");
2878 	}
2879 }
2880 
2881 void
2882 sr_dump_mem(u_int8_t *p, int len)
2883 {
2884 	int			i;
2885 
2886 	for (i = 0; i < len; i++)
2887 		printf("%02x ", *p++);
2888 	printf("\n");
2889 }
2890 
2891 #endif /* SR_DEBUG */
2892