xref: /openbsd-src/sys/dev/softraid.c (revision d874cce4b1d9fe6b41c9e4f2117a77d8a4a37b92)
1 /* $OpenBSD: softraid.c,v 1.116 2008/06/25 17:43:09 thib Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bio.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/device.h>
25 #include <sys/ioctl.h>
26 #include <sys/proc.h>
27 #include <sys/malloc.h>
28 #include <sys/pool.h>
29 #include <sys/kernel.h>
30 #include <sys/disk.h>
31 #include <sys/rwlock.h>
32 #include <sys/queue.h>
33 #include <sys/fcntl.h>
34 #include <sys/disklabel.h>
35 #include <sys/mount.h>
36 #include <sys/sensors.h>
37 #include <sys/stat.h>
38 #include <sys/conf.h>
39 #include <sys/uio.h>
40 
41 #include <crypto/cryptodev.h>
42 
43 #include <scsi/scsi_all.h>
44 #include <scsi/scsiconf.h>
45 #include <scsi/scsi_disk.h>
46 
47 #include <dev/softraidvar.h>
48 #include <dev/rndvar.h>
49 
50 /* #define SR_FANCY_STATS */
51 
52 #ifdef SR_DEBUG
53 #define SR_FANCY_STATS
54 uint32_t	sr_debug = 0
55 		    /* | SR_D_CMD */
56 		    /* | SR_D_MISC */
57 		    /* | SR_D_INTR */
58 		    /* | SR_D_IOCTL */
59 		    /* | SR_D_CCB */
60 		    /* | SR_D_WU */
61 		    /* | SR_D_META */
62 		    /* | SR_D_DIS */
63 		    /* | SR_D_STATE */
64 		;
65 #endif
66 
67 void		sr_init(void);
68 
69 int		sr_match(struct device *, void *, void *);
70 void		sr_attach(struct device *, struct device *, void *);
71 int		sr_detach(struct device *, int);
72 int		sr_activate(struct device *, enum devact);
73 
74 struct cfattach softraid_ca = {
75 	sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
76 	sr_activate
77 };
78 
79 struct cfdriver softraid_cd = {
80 	NULL, "softraid", DV_DULL
81 };
82 
83 int			sr_scsi_cmd(struct scsi_xfer *);
84 void			sr_minphys(struct buf *bp);
85 void			sr_copy_internal_data(struct scsi_xfer *,
86 			    void *, size_t);
87 int			sr_scsi_ioctl(struct scsi_link *, u_long,
88 			    caddr_t, int, struct proc *);
89 int			sr_ioctl(struct device *, u_long, caddr_t);
90 int			sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
91 int			sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
92 int			sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
93 int			sr_ioctl_setstate(struct sr_softc *,
94 			    struct bioc_setstate *);
95 int			sr_ioctl_createraid(struct sr_softc *,
96 			    struct bioc_createraid *, int);
97 int			sr_ioctl_deleteraid(struct sr_softc *,
98 			    struct bioc_deleteraid *);
99 int			sr_open_chunks(struct sr_softc *,
100 			    struct sr_chunk_head *, dev_t *, int);
101 int			sr_read_meta(struct sr_discipline *);
102 int			sr_create_chunk_meta(struct sr_softc *,
103 			    struct sr_chunk_head *);
104 void			sr_unwind_chunks(struct sr_softc *,
105 			    struct sr_chunk_head *);
106 void			sr_free_discipline(struct sr_discipline *);
107 void			sr_shutdown_discipline(struct sr_discipline *);
108 
109 /* utility functions */
110 void			sr_shutdown(void *);
111 void			sr_get_uuid(struct sr_uuid *);
112 void			sr_print_uuid(struct sr_uuid *, int);
113 u_int32_t		sr_checksum(char *, u_int32_t *, u_int32_t);
114 int			sr_clear_metadata(struct sr_discipline *);
115 int			sr_save_metadata(struct sr_discipline *, u_int32_t);
116 int			sr_boot_assembly(struct sr_softc *);
117 int			sr_already_assembled(struct sr_discipline *);
118 int			sr_validate_metadata(struct sr_softc *, dev_t,
119 			    struct sr_metadata *);
120 
121 /* don't include these on RAMDISK */
122 #ifndef SMALL_KERNEL
123 void			sr_refresh_sensors(void *);
124 int			sr_create_sensors(struct sr_discipline *);
125 void			sr_delete_sensors(struct sr_discipline *);
126 #endif
127 
128 #ifdef SR_DEBUG
129 void			sr_print_metadata(struct sr_metadata *);
130 #else
131 #define			sr_print_metadata(m)
132 #endif
133 
134 struct pool sr_uiopl;
135 struct pool sr_iovpl;
136 
137 struct scsi_adapter sr_switch = {
138 	sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl
139 };
140 
141 struct scsi_device sr_dev = {
142 	NULL, NULL, NULL, NULL
143 };
144 
145 void
146 sr_init(void)
147 {
148 	pool_init(&sr_uiopl, sizeof(struct uio), 0, 0, 0, "sr_uiopl", NULL);
149 	pool_init(&sr_iovpl, sizeof(struct iovec), 0, 0, 0, "sr_iovpl", NULL);
150 }
151 
152 int
153 sr_match(struct device *parent, void *match, void *aux)
154 {
155 	static int called = 0;
156 
157 	if (!called) {
158 		sr_init();
159 		called = 1;
160 	}
161 
162 	return (1);
163 }
164 
165 void
166 sr_attach(struct device *parent, struct device *self, void *aux)
167 {
168 	struct sr_softc		*sc = (void *)self;
169 
170 	DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc));
171 
172 	rw_init(&sc->sc_lock, "sr_lock");
173 
174 	if (bio_register(&sc->sc_dev, sr_ioctl) != 0)
175 		printf("%s: controller registration failed", DEVNAME(sc));
176 	else
177 		sc->sc_ioctl = sr_ioctl;
178 
179 	printf("\n");
180 
181 	sr_boot_assembly(sc);
182 }
183 
184 int
185 sr_detach(struct device *self, int flags)
186 {
187 	return (0);
188 }
189 
190 int
191 sr_activate(struct device *self, enum devact act)
192 {
193 	return (1);
194 }
195 
196 void
197 sr_minphys(struct buf *bp)
198 {
199 	DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount);
200 
201 	/* XXX currently using SR_MAXFER = MAXPHYS */
202 	if (bp->b_bcount > SR_MAXFER)
203 		bp->b_bcount = SR_MAXFER;
204 	minphys(bp);
205 }
206 
207 void
208 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
209 {
210 	size_t			copy_cnt;
211 
212 	DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n",
213 	    xs, size);
214 
215 	if (xs->datalen) {
216 		copy_cnt = MIN(size, xs->datalen);
217 		bcopy(v, xs->data, copy_cnt);
218 	}
219 }
220 
221 int
222 sr_alloc_ccb(struct sr_discipline *sd)
223 {
224 	struct sr_ccb		*ccb;
225 	int			i;
226 
227 	if (!sd)
228 		return (1);
229 
230 	DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb\n", DEVNAME(sd->sd_sc));
231 
232 	if (sd->sd_ccb)
233 		return (1);
234 
235 	sd->sd_ccb = malloc(sizeof(struct sr_ccb) *
236 	    sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO);
237 	TAILQ_INIT(&sd->sd_ccb_freeq);
238 	for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
239 		ccb = &sd->sd_ccb[i];
240 		ccb->ccb_dis = sd;
241 		sr_put_ccb(ccb);
242 	}
243 
244 	DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb ccb: %d\n",
245 	    DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
246 
247 	return (0);
248 }
249 
250 void
251 sr_free_ccb(struct sr_discipline *sd)
252 {
253 	struct sr_ccb		*ccb;
254 
255 	if (!sd)
256 		return;
257 
258 	DNPRINTF(SR_D_CCB, "%s: sr_free_ccb %p\n", DEVNAME(sd->sd_sc), sd);
259 
260 	while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
261 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
262 
263 	if (sd->sd_ccb)
264 		free(sd->sd_ccb, M_DEVBUF);
265 }
266 
267 struct sr_ccb *
268 sr_get_ccb(struct sr_discipline *sd)
269 {
270 	struct sr_ccb		*ccb;
271 	int			s;
272 
273 	s = splbio();
274 
275 	ccb = TAILQ_FIRST(&sd->sd_ccb_freeq);
276 	if (ccb) {
277 		TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
278 		ccb->ccb_state = SR_CCB_INPROGRESS;
279 	}
280 
281 	splx(s);
282 
283 	DNPRINTF(SR_D_CCB, "%s: sr_get_ccb: %p\n", DEVNAME(sd->sd_sc),
284 	    ccb);
285 
286 	return (ccb);
287 }
288 
289 void
290 sr_put_ccb(struct sr_ccb *ccb)
291 {
292 	struct sr_discipline	*sd = ccb->ccb_dis;
293 	int			s;
294 
295 	DNPRINTF(SR_D_CCB, "%s: sr_put_ccb: %p\n", DEVNAME(sd->sd_sc),
296 	    ccb);
297 
298 	s = splbio();
299 
300 	ccb->ccb_wu = NULL;
301 	ccb->ccb_state = SR_CCB_FREE;
302 	ccb->ccb_target = -1;
303 	ccb->ccb_opaque = NULL;
304 
305 	TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link);
306 
307 	splx(s);
308 }
309 
310 int
311 sr_alloc_wu(struct sr_discipline *sd)
312 {
313 	struct sr_workunit	*wu;
314 	int			i, no_wu;
315 
316 	if (!sd)
317 		return (1);
318 
319 	DNPRINTF(SR_D_WU, "%s: sr_alloc_wu %p %d\n", DEVNAME(sd->sd_sc),
320 	    sd, sd->sd_max_wu);
321 
322 	if (sd->sd_wu)
323 		return (1);
324 
325 	no_wu = sd->sd_max_wu;
326 	sd->sd_wu_pending = no_wu;
327 
328 	sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu,
329 	    M_DEVBUF, M_WAITOK | M_ZERO);
330 	TAILQ_INIT(&sd->sd_wu_freeq);
331 	TAILQ_INIT(&sd->sd_wu_pendq);
332 	TAILQ_INIT(&sd->sd_wu_defq);
333 	for (i = 0; i < no_wu; i++) {
334 		wu = &sd->sd_wu[i];
335 		wu->swu_dis = sd;
336 		sr_put_wu(wu);
337 	}
338 
339 	return (0);
340 }
341 
342 void
343 sr_free_wu(struct sr_discipline *sd)
344 {
345 	struct sr_workunit	*wu;
346 
347 	if (!sd)
348 		return;
349 
350 	DNPRINTF(SR_D_WU, "%s: sr_free_wu %p\n", DEVNAME(sd->sd_sc), sd);
351 
352 	while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
353 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
354 	while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL)
355 		TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
356 	while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL)
357 		TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link);
358 
359 	if (sd->sd_wu)
360 		free(sd->sd_wu, M_DEVBUF);
361 }
362 
363 void
364 sr_put_wu(struct sr_workunit *wu)
365 {
366 	struct sr_discipline	*sd = wu->swu_dis;
367 	struct sr_ccb		*ccb;
368 
369 	int			s;
370 
371 	DNPRINTF(SR_D_WU, "%s: sr_put_wu: %p\n", DEVNAME(sd->sd_sc), wu);
372 
373 	s = splbio();
374 
375 	wu->swu_xs = NULL;
376 	wu->swu_state = SR_WU_FREE;
377 	wu->swu_ios_complete = 0;
378 	wu->swu_ios_failed = 0;
379 	wu->swu_ios_succeeded = 0;
380 	wu->swu_io_count = 0;
381 	wu->swu_blk_start = 0;
382 	wu->swu_blk_end = 0;
383 	wu->swu_collider = NULL;
384 	wu->swu_fake = 0;
385 
386 	while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
387 		TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
388 		sr_put_ccb(ccb);
389 	}
390 	TAILQ_INIT(&wu->swu_ccb);
391 
392 	TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
393 	sd->sd_wu_pending--;
394 
395 	splx(s);
396 }
397 
398 struct sr_workunit *
399 sr_get_wu(struct sr_discipline *sd)
400 {
401 	struct sr_workunit	*wu;
402 	int			s;
403 
404 	s = splbio();
405 
406 	wu = TAILQ_FIRST(&sd->sd_wu_freeq);
407 	if (wu) {
408 		TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
409 		wu->swu_state = SR_WU_INPROGRESS;
410 	}
411 	sd->sd_wu_pending++;
412 
413 	splx(s);
414 
415 	DNPRINTF(SR_D_WU, "%s: sr_get_wu: %p\n", DEVNAME(sd->sd_sc), wu);
416 
417 	return (wu);
418 }
419 
420 int
421 sr_scsi_cmd(struct scsi_xfer *xs)
422 {
423 	int			s;
424 	struct scsi_link	*link = xs->sc_link;
425 	struct sr_softc		*sc = link->adapter_softc;
426 	struct sr_workunit	*wu;
427 	struct sr_discipline	*sd;
428 
429 	DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p "
430 	    "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags);
431 
432 	sd = sc->sc_dis[link->scsibus];
433 	if (sd == NULL) {
434 		s = splhigh();
435 		sd = sc->sc_attach_dis;
436 		splx(s);
437 
438 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n",
439 		    DEVNAME(sc), sd);
440 		if (sd == NULL) {
441 			wu = NULL;
442 			printf("%s: sr_scsi_cmd NULL discipline\n",
443 			    DEVNAME(sc));
444 			goto stuffup;
445 		}
446 	}
447 
448 	if (sd->sd_deleted) {
449 		printf("%s: %s device is being deleted, failing io\n",
450 		    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
451 		goto stuffup;
452 	}
453 
454 	if ((wu = sr_get_wu(sd)) == NULL) {
455 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc));
456 		return (TRY_AGAIN_LATER);
457 	}
458 
459 	xs->error = XS_NOERROR;
460 	wu->swu_xs = xs;
461 
462 	switch (xs->cmd->opcode) {
463 	case READ_COMMAND:
464 	case READ_BIG:
465 	case READ_16:
466 	case WRITE_COMMAND:
467 	case WRITE_BIG:
468 	case WRITE_16:
469 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n",
470 		    DEVNAME(sc), xs->cmd->opcode);
471 		if (sd->sd_scsi_rw(wu))
472 			goto stuffup;
473 		break;
474 
475 	case SYNCHRONIZE_CACHE:
476 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n",
477 		    DEVNAME(sc));
478 		if (sd->sd_scsi_sync(wu))
479 			goto stuffup;
480 		goto complete;
481 
482 	case TEST_UNIT_READY:
483 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n",
484 		    DEVNAME(sc));
485 		if (sd->sd_scsi_tur(wu))
486 			goto stuffup;
487 		goto complete;
488 
489 	case START_STOP:
490 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n",
491 		    DEVNAME(sc));
492 		if (sd->sd_scsi_start_stop(wu))
493 			goto stuffup;
494 		goto complete;
495 
496 	case INQUIRY:
497 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n",
498 		    DEVNAME(sc));
499 		if (sd->sd_scsi_inquiry(wu))
500 			goto stuffup;
501 		goto complete;
502 
503 	case READ_CAPACITY:
504 	case READ_CAPACITY_16:
505 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n",
506 		    DEVNAME(sc), xs->cmd->opcode);
507 		if (sd->sd_scsi_read_cap(wu))
508 			goto stuffup;
509 		goto complete;
510 
511 	case REQUEST_SENSE:
512 		DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n",
513 		    DEVNAME(sc));
514 		if (sd->sd_scsi_req_sense(wu))
515 			goto stuffup;
516 		goto complete;
517 
518 	default:
519 		DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n",
520 		    DEVNAME(sc), xs->cmd->opcode);
521 		/* XXX might need to add generic function to handle others */
522 		goto stuffup;
523 	}
524 
525 	return (SUCCESSFULLY_QUEUED);
526 stuffup:
527 	if (sd->sd_scsi_sense.error_code) {
528 		xs->error = XS_SENSE;
529 		bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
530 		bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
531 	} else {
532 		xs->error = XS_DRIVER_STUFFUP;
533 		xs->flags |= ITSDONE;
534 	}
535 complete:
536 	s = splbio();
537 	scsi_done(xs);
538 	splx(s);
539 	if (wu)
540 		sr_put_wu(wu);
541 	return (COMPLETE);
542 }
543 int
544 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag,
545     struct proc *p)
546 {
547 	DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n",
548 	    DEVNAME((struct sr_softc *)link->adapter_softc), cmd);
549 
550 	return (sr_ioctl(link->adapter_softc, cmd, addr));
551 }
552 
553 int
554 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr)
555 {
556 	struct sr_softc		*sc = (struct sr_softc *)dev;
557 	int			rv = 0;
558 
559 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc));
560 
561 	rw_enter_write(&sc->sc_lock);
562 
563 	switch (cmd) {
564 	case BIOCINQ:
565 		DNPRINTF(SR_D_IOCTL, "inq\n");
566 		rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr);
567 		break;
568 
569 	case BIOCVOL:
570 		DNPRINTF(SR_D_IOCTL, "vol\n");
571 		rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr);
572 		break;
573 
574 	case BIOCDISK:
575 		DNPRINTF(SR_D_IOCTL, "disk\n");
576 		rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr);
577 		break;
578 
579 	case BIOCALARM:
580 		DNPRINTF(SR_D_IOCTL, "alarm\n");
581 		/*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */
582 		break;
583 
584 	case BIOCBLINK:
585 		DNPRINTF(SR_D_IOCTL, "blink\n");
586 		/*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */
587 		break;
588 
589 	case BIOCSETSTATE:
590 		DNPRINTF(SR_D_IOCTL, "setstate\n");
591 		rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr);
592 		break;
593 
594 	case BIOCCREATERAID:
595 		DNPRINTF(SR_D_IOCTL, "createraid\n");
596 		rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1);
597 		break;
598 
599 	case BIOCDELETERAID:
600 		rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr);
601 		break;
602 	default:
603 		DNPRINTF(SR_D_IOCTL, "invalid ioctl\n");
604 		rv = ENOTTY;
605 	}
606 
607 	rw_exit_write(&sc->sc_lock);
608 
609 	return (rv);
610 }
611 
612 int
613 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
614 {
615 	int			i, vol, disk;
616 
617 	for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++)
618 		/* XXX this will not work when we stagger disciplines */
619 		if (sc->sc_dis[i]) {
620 			vol++;
621 			disk += sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk;
622 		}
623 
624 	strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
625 	bi->bi_novol = vol;
626 	bi->bi_nodisk = disk;
627 
628 	return (0);
629 }
630 
631 int
632 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
633 {
634 	int			i, vol, rv = EINVAL;
635 	struct sr_volume	*sv;
636 
637 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
638 		/* XXX this will not work when we stagger disciplines */
639 		if (sc->sc_dis[i])
640 			vol++;
641 		if (vol != bv->bv_volid)
642 			continue;
643 
644 		sv = &sc->sc_dis[i]->sd_vol;
645 		bv->bv_status = sv->sv_meta.svm_status;
646 		bv->bv_size = sv->sv_meta.svm_size << DEV_BSHIFT;
647 		bv->bv_level = sv->sv_meta.svm_level;
648 		bv->bv_nodisk = sv->sv_meta.svm_no_chunk;
649 		strlcpy(bv->bv_dev, sv->sv_meta.svm_devname,
650 		    sizeof(bv->bv_dev));
651 		strlcpy(bv->bv_vendor, sv->sv_meta.svm_vendor,
652 		    sizeof(bv->bv_vendor));
653 		rv = 0;
654 		break;
655 	}
656 
657 	return (rv);
658 }
659 
660 int
661 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
662 {
663 	int			i, vol, rv = EINVAL, id;
664 	struct sr_chunk		*src;
665 
666 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
667 		/* XXX this will not work when we stagger disciplines */
668 		if (sc->sc_dis[i])
669 			vol++;
670 		if (vol != bd->bd_volid)
671 			continue;
672 
673 		id = bd->bd_diskid;
674 		if (id >= sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk)
675 			break;
676 
677 		src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
678 		bd->bd_status = src->src_meta.scm_status;
679 		bd->bd_size = src->src_meta.scm_size << DEV_BSHIFT;
680 		bd->bd_channel = vol;
681 		bd->bd_target = id;
682 		strlcpy(bd->bd_vendor, src->src_meta.scm_devname,
683 		    sizeof(bd->bd_vendor));
684 		rv = 0;
685 		break;
686 	}
687 
688 	return (rv);
689 }
690 
691 int
692 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
693 {
694 	int			rv = EINVAL;
695 
696 #ifdef SR_UNIT_TEST
697 	int			i, vol, state;
698 	struct sr_discipline	*sd;
699 
700 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
701 		/* XXX this will not work when we stagger disciplines */
702 		if (sc->sc_dis[i])
703 			vol++;
704 		if (vol != bs->bs_channel)
705 			continue;
706 
707 		sd = sc->sc_dis[vol];
708 		if (bs->bs_target >= sd->sd_vol.sv_meta.svm_no_chunk)
709 			goto done;
710 
711 		switch (bs->bs_status) {
712 		case BIOC_SSONLINE:
713 			state = BIOC_SDONLINE;
714 			break;
715 		case BIOC_SSOFFLINE:
716 			state = BIOC_SDOFFLINE;
717 			break;
718 		case BIOC_SSHOTSPARE:
719 			state = BIOC_SDHOTSPARE;
720 			break;
721 		case BIOC_SSREBUILD:
722 			state = BIOC_SDREBUILD;
723 			break;
724 		default:
725 			printf("invalid state %d\n", bs->bs_status);
726 			goto done;
727 		}
728 
729 		printf("status change for %u:%u -> %u %u\n",
730 		    bs->bs_channel, bs->bs_target, bs->bs_status, state);
731 
732 		sd->sd_set_chunk_state(sd, bs->bs_target, bs->bs_status);
733 
734 		rv = 0;
735 
736 		break;
737 	}
738 
739 done:
740 #endif
741 	return (rv);
742 }
743 
744 int
745 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
746 {
747 	dev_t			*dt;
748 	int			i, s, no_chunk, rv = EINVAL, vol;
749 	int			no_meta, updatemeta = 0;
750 	u_int64_t		vol_size;
751 	int32_t			strip_size = 0;
752 	struct sr_chunk_head	*cl;
753 	struct sr_discipline	*sd = NULL;
754 	struct sr_chunk		*ch_entry;
755 	struct device		*dev, *dev2;
756 	struct scsibus_attach_args saa;
757 
758 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n",
759 	    DEVNAME(sc), user);
760 
761 	/* user input */
762 	if (bc->bc_dev_list_len > BIOC_CRMAXLEN)
763 		goto unwind;
764 
765 	dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO);
766 	if (user)
767 		copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len);
768 	else
769 		bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len);
770 
771 	sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO);
772 	sd->sd_sc = sc;
773 
774 	no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
775 	cl = &sd->sd_vol.sv_chunk_list;
776 	SLIST_INIT(cl);
777 	if (sr_open_chunks(sc, cl, dt, no_chunk))
778 		goto unwind;
779 
780 	/* in memory copy of metadata */
781 	sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO);
782 
783 	/* we have a valid list now create an array index */
784 	sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * no_chunk,
785 	    M_DEVBUF, M_WAITOK | M_ZERO);
786 
787 	/* force the raid volume by clearing metadata region */
788 	if (bc->bc_flags & BIOC_SCFORCE) {
789 		/* make sure disk isn't up and running */
790 		if (sr_read_meta(sd))
791 			if (sr_already_assembled(sd)) {
792 				printf("%s: disk ", DEVNAME(sc));
793 				sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
794 				printf(" is currently in use; can't force "
795 				    "create\n");
796 				goto unwind;
797 			}
798 
799 		/* zero out pointers and metadata again to create disk */
800 		bzero(sd->sd_vol.sv_chunks,
801 		    sizeof(struct sr_chunk *) * no_chunk);
802 		bzero(sd->sd_meta, SR_META_SIZE  * 512);
803 
804 		if (sr_clear_metadata(sd)) {
805 			printf("%s: failed to clear metadata\n", DEVNAME(sc));
806 			goto unwind;
807 		}
808 	}
809 
810 	if ((no_meta = sr_read_meta(sd)) == 0) {
811 		/* fill out chunk array */
812 		i = 0;
813 		SLIST_FOREACH(ch_entry, cl, src_link)
814 			sd->sd_vol.sv_chunks[i++] = ch_entry;
815 
816 		/* fill out all chunk metadata */
817 		sr_create_chunk_meta(sc, cl);
818 		ch_entry = SLIST_FIRST(cl);
819 
820 		/* no metadata available */
821 		switch (bc->bc_level) {
822 		case 0:
823 			if (no_chunk < 2)
824 				goto unwind;
825 			strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name));
826 			/*
827 			 * XXX add variable strip size later even though
828 			 * MAXPHYS is really the clever value, users like
829 			 * to tinker with that type of stuff
830 			 */
831 			strip_size = MAXPHYS;
832 			vol_size =
833 			    ch_entry->src_meta.scm_coerced_size * no_chunk;
834 			break;
835 		case 1:
836 			if (no_chunk < 2)
837 				goto unwind;
838 			strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
839 			vol_size = ch_entry->src_meta.scm_coerced_size;
840 			break;
841 #ifdef CRYPTO
842 		case 'C':
843 			DNPRINTF(SR_D_IOCTL,
844 			    "%s: sr_ioctl_createraid: no_chunk %d\n",
845 			    DEVNAME(sc), no_chunk);
846 
847 			if (no_chunk != 1)
848 				goto unwind;
849 
850 			/* no hint available yet */
851 			if (bc->bc_opaque_flags & BIOC_SOOUT) {
852 				bc->bc_opaque_status = BIOC_SOINOUT_FAILED;
853 				rv = 0;
854 				goto unwind;
855 			}
856 
857 			if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE))
858 				goto unwind;
859 
860 			if (sr_crypto_get_kdf(bc, sd))
861 				goto unwind;
862 
863 			strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name));
864 			vol_size = ch_entry->src_meta.scm_size;
865 
866 			sr_crypto_create_keys(sd);
867 
868 			break;
869 #endif /* CRYPTO */
870 		default:
871 			goto unwind;
872 		}
873 
874 		/* fill out all volume metadata */
875 		DNPRINTF(SR_D_IOCTL,
876 		    "%s: sr_ioctl_createraid: vol_size: %lld\n",
877 		    DEVNAME(sc), vol_size);
878 		sd->sd_vol.sv_meta.svm_no_chunk = no_chunk;
879 		sd->sd_vol.sv_meta.svm_size = vol_size;
880 		sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE;
881 		sd->sd_vol.sv_meta.svm_level = bc->bc_level;
882 		sd->sd_vol.sv_meta.svm_strip_size = strip_size;
883 		strlcpy(sd->sd_vol.sv_meta.svm_vendor, "OPENBSD",
884 		    sizeof(sd->sd_vol.sv_meta.svm_vendor));
885 		snprintf(sd->sd_vol.sv_meta.svm_product,
886 		    sizeof(sd->sd_vol.sv_meta.svm_product), "SR %s",
887 		    sd->sd_name);
888 		snprintf(sd->sd_vol.sv_meta.svm_revision,
889 		    sizeof(sd->sd_vol.sv_meta.svm_revision), "%03d",
890 		    SR_META_VERSION);
891 
892 		sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
893 		updatemeta = 1;
894 	} else if (no_meta == no_chunk) {
895 		if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
896 			DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
897 			    "metadata\n", DEVNAME(sc));
898 			goto unwind;
899 		}
900 		if (sr_already_assembled(sd)) {
901 			printf("%s: disk ", DEVNAME(sc));
902 			sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
903 			printf(" already assembled\n");
904 			goto unwind;
905 		}
906 #ifdef CRYPTO
907 		/* provide userland with kdf hint */
908 		if (bc->bc_opaque_flags & BIOC_SOOUT) {
909 			if (bc->bc_opaque == NULL)
910 				goto unwind;
911 
912 			if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) <
913 			    bc->bc_opaque_size)
914 				goto unwind;
915 
916 			if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint,
917 			    bc->bc_opaque, bc->bc_opaque_size))
918 				goto unwind;
919 
920 			/* we're done */
921 			bc->bc_opaque_status = BIOC_SOINOUT_OK;
922 			rv = 0;
923 			goto unwind;
924 		}
925 		/* get kdf with maskkey from userland */
926 		if (bc->bc_opaque_flags & BIOC_SOIN) {
927 			if (sr_crypto_get_kdf(bc, sd))
928 				goto unwind;
929 		}
930 #endif	/* CRYPTO */
931 		DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
932 		    DEVNAME(sc));
933 		updatemeta = 0;
934 	} else {
935 		if (sr_already_assembled(sd)) {
936 			printf("%s: disk ", DEVNAME(sc));
937 			sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
938 			printf(" already assembled; will not partial "
939 			    "assemble it\n");
940 			goto unwind;
941 		}
942 		printf("%s: not yet partial bringup\n", DEVNAME(sc));
943 		goto unwind;
944 	}
945 
946 	/* XXX metadata SHALL be fully filled in at this point */
947 
948 	switch (bc->bc_level) {
949 	case 0:
950 		/* fill out discipline members */
951 		sd->sd_type = SR_MD_RAID0;
952 		sd->sd_max_ccb_per_wu =
953 		    (MAXPHYS / sd->sd_vol.sv_meta.svm_strip_size + 1) *
954 		    SR_RAID0_NOWU * sd->sd_vol.sv_meta.svm_no_chunk;
955 		sd->sd_max_wu = SR_RAID0_NOWU;
956 
957 		/* setup discipline pointers */
958 		sd->sd_alloc_resources = sr_raid0_alloc_resources;
959 		sd->sd_free_resources = sr_raid0_free_resources;
960 		sd->sd_scsi_inquiry = sr_raid_inquiry;
961 		sd->sd_scsi_read_cap = sr_raid_read_cap;
962 		sd->sd_scsi_tur = sr_raid_tur;
963 		sd->sd_scsi_req_sense = sr_raid_request_sense;
964 		sd->sd_scsi_start_stop = sr_raid_start_stop;
965 		sd->sd_scsi_sync = sr_raid_sync;
966 		sd->sd_scsi_rw = sr_raid0_rw;
967 		sd->sd_set_chunk_state = sr_raid0_set_chunk_state;
968 		sd->sd_set_vol_state = sr_raid0_set_vol_state;
969 		break;
970 	case 1:
971 		/* fill out discipline members */
972 		sd->sd_type = SR_MD_RAID1;
973 		sd->sd_max_ccb_per_wu = no_chunk;
974 		sd->sd_max_wu = SR_RAID1_NOWU;
975 
976 		/* setup discipline pointers */
977 		sd->sd_alloc_resources = sr_raid1_alloc_resources;
978 		sd->sd_free_resources = sr_raid1_free_resources;
979 		sd->sd_scsi_inquiry = sr_raid_inquiry;
980 		sd->sd_scsi_read_cap = sr_raid_read_cap;
981 		sd->sd_scsi_tur = sr_raid_tur;
982 		sd->sd_scsi_req_sense = sr_raid_request_sense;
983 		sd->sd_scsi_start_stop = sr_raid_start_stop;
984 		sd->sd_scsi_sync = sr_raid_sync;
985 		sd->sd_scsi_rw = sr_raid1_rw;
986 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
987 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
988 		break;
989 #ifdef CRYPTO
990 	case 'C':
991 		/* fill out discipline members */
992 		sd->sd_type = SR_MD_CRYPTO;
993 		sd->sd_max_ccb_per_wu = no_chunk;
994 		sd->sd_max_wu = SR_CRYPTO_NOWU;
995 
996 		/* setup discipline pointers */
997 		sd->sd_alloc_resources = sr_crypto_alloc_resources;
998 		sd->sd_free_resources = sr_crypto_free_resources;
999 		sd->sd_scsi_inquiry = sr_raid_inquiry;
1000 		sd->sd_scsi_read_cap = sr_raid_read_cap;
1001 		sd->sd_scsi_tur = sr_raid_tur;
1002 		sd->sd_scsi_req_sense = sr_raid_request_sense;
1003 		sd->sd_scsi_start_stop = sr_raid_start_stop;
1004 		sd->sd_scsi_sync = sr_raid_sync;
1005 		sd->sd_scsi_rw = sr_crypto_rw;
1006 		/* XXX reuse raid 1 functions for now FIXME */
1007 		sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
1008 		sd->sd_set_vol_state = sr_raid1_set_vol_state;
1009 		break;
1010 #endif
1011 	default:
1012 		goto unwind;
1013 	}
1014 
1015 	/* allocate all resources */
1016 	if ((rv = sd->sd_alloc_resources(sd)))
1017 		goto unwind;
1018 
1019 	/* setup scsi midlayer */
1020 	sd->sd_link.openings = sd->sd_max_wu;
1021 	sd->sd_link.device = &sr_dev;
1022 	sd->sd_link.device_softc = sc;
1023 	sd->sd_link.adapter_softc = sc;
1024 	sd->sd_link.adapter = &sr_switch;
1025 	sd->sd_link.adapter_target = SR_MAX_LD;
1026 	sd->sd_link.adapter_buswidth = 1;
1027 	bzero(&saa, sizeof(saa));
1028 	saa.saa_sc_link = &sd->sd_link;
1029 
1030 	/* we passed all checks return ENXIO if volume can't be created */
1031 	rv = ENXIO;
1032 
1033 	/* clear sense data */
1034 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
1035 
1036 	/* use temporary discipline pointer */
1037 	s = splhigh();
1038 	sc->sc_attach_dis = sd;
1039 	splx(s);
1040 	dev2 = config_found(&sc->sc_dev, &saa, scsiprint);
1041 	s = splhigh();
1042 	sc->sc_attach_dis = NULL;
1043 	splx(s);
1044 	TAILQ_FOREACH(dev, &alldevs, dv_list)
1045 		if (dev->dv_parent == dev2)
1046 			break;
1047 	if (dev == NULL)
1048 		goto unwind;
1049 
1050 	DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n",
1051 	    DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus);
1052 
1053 	sc->sc_dis[sd->sd_link.scsibus] = sd;
1054 	for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++)
1055 		if (sc->sc_dis[i])
1056 			vol++;
1057 
1058 	rv = 0;
1059 	if (updatemeta) {
1060 		/* fill out remaining volume metadata */
1061 		sd->sd_vol.sv_meta.svm_volid = vol;
1062 		strlcpy(sd->sd_vol.sv_meta.svm_devname, dev->dv_xname,
1063 		    sizeof(sd->sd_vol.sv_meta.svm_devname));
1064 	}
1065 
1066 	/* save metadata to disk */
1067 	rv = sr_save_metadata(sd, SR_VOL_DIRTY);
1068 
1069 #ifndef SMALL_KERNEL
1070 	if (sr_create_sensors(sd))
1071 		printf("%s: unable to create sensor for %s\n", DEVNAME(sc),
1072 		    dev->dv_xname);
1073 	else
1074 		sd->sd_vol.sv_sensor_valid = 1;
1075 #endif /* SMALL_KERNEL */
1076 
1077 	sd->sd_scsibus_dev = dev2;
1078 	sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd);
1079 
1080 	return (rv);
1081 
1082 unwind:
1083 	sr_shutdown_discipline(sd);
1084 
1085 	return (rv);
1086 }
1087 
1088 int
1089 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
1090 {
1091 	struct sr_discipline	*sd = NULL;
1092 	int			rv = 1;
1093 	int			i;
1094 
1095 	DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
1096 	    dr->bd_dev);
1097 
1098 	for (i = 0; i < SR_MAXSCSIBUS; i++)
1099 		if (sc->sc_dis[i]) {
1100 			if (!strncmp(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname, dr->bd_dev,
1101 			    sizeof(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname))) {
1102 				sd = sc->sc_dis[i];
1103 				break;
1104 			}
1105 		}
1106 
1107 	if (sd == NULL)
1108 		goto bad;
1109 
1110 	sd->sd_deleted = 1;
1111 	sd->sd_meta->ssd_flags = BIOC_SCNOAUTOASSEMBLE;
1112 	sr_shutdown(sd);
1113 
1114 	rv = 0;
1115 bad:
1116 	return (rv);
1117 }
1118 
1119 int
1120 sr_open_chunks(struct sr_softc *sc, struct sr_chunk_head *cl, dev_t *dt,
1121     int no_chunk)
1122 {
1123 	struct sr_chunk		*ch_entry, *ch_prev = NULL;
1124 	struct disklabel	label;
1125 	struct bdevsw		*bdsw;
1126 	char			*name;
1127 	int			maj, unit, part, i, error;
1128 	daddr64_t		size;
1129 	dev_t			dev;
1130 
1131 	DNPRINTF(SR_D_IOCTL, "%s: sr_open_chunks(%d)\n", DEVNAME(sc), no_chunk);
1132 
1133 	/* fill out chunk list */
1134 	for (i = 0; i < no_chunk; i++) {
1135 		ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
1136 		    M_WAITOK | M_ZERO);
1137 		/* keep disks in user supplied order */
1138 		if (ch_prev)
1139 			SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
1140 		else
1141 			SLIST_INSERT_HEAD(cl, ch_entry, src_link);
1142 		ch_prev = ch_entry;
1143 
1144 		dev = dt[i];
1145 		maj = major(dev);
1146 		part = DISKPART(dev);
1147 		unit = DISKUNIT(dev);
1148 		bdsw = &bdevsw[maj];
1149 
1150 		name = findblkname(maj);
1151 		if (name == NULL)
1152 			goto unwind;
1153 
1154 		snprintf(ch_entry->src_devname, sizeof(ch_entry->src_devname),
1155 		    "%s%d%c", name, unit, part + 'a');
1156 		name = ch_entry->src_devname;
1157 
1158 		/* open device */
1159 		error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc);
1160 
1161 		/* get disklabel */
1162 		error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label,
1163 		    0, NULL);
1164 		if (error) {
1165 			printf("%s: %s can't obtain disklabel\n",
1166 			    DEVNAME(sc), name);
1167 			bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
1168 			goto unwind;
1169 		}
1170 
1171 		/* make sure the partition is of the right type */
1172 		if (label.d_partitions[part].p_fstype != FS_RAID) {
1173 			printf("%s: %s partition not of type RAID (%d)\n",
1174 			    DEVNAME(sc), name,
1175 			    label.d_partitions[part].p_fstype);
1176 			bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
1177 			goto unwind;
1178 		}
1179 
1180 		/* get partition size while accounting for metadata! */
1181 		ch_entry->src_size = size =
1182 		    DL_GETPSIZE(&label.d_partitions[part]) -
1183 		    SR_META_SIZE - SR_META_OFFSET;
1184 		if (size <= 0) {
1185 			printf("%s: %s partition too small\n",
1186 			    DEVNAME(sc), name);
1187 			bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
1188 			goto unwind;
1189 		}
1190 
1191 
1192 		ch_entry->src_dev_mm = dev; /* major/minor */
1193 
1194 		DNPRINTF(SR_D_IOCTL, "%s: found %s size %d\n", DEVNAME(sc),
1195 		    name, size);
1196 	}
1197 
1198 	return (0);
1199 unwind:
1200 	printf("%s: invalid device: %s\n", DEVNAME(sc), name ? name : "nodev");
1201 	return (1);
1202 }
1203 
1204 int
1205 sr_read_meta(struct sr_discipline *sd)
1206 {
1207 	struct sr_softc		*sc = sd->sd_sc;
1208 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
1209 	struct sr_metadata	*sm = sd->sd_meta, *m;
1210 	struct sr_chunk		*ch_entry;
1211 	struct buf		b;
1212 	struct sr_vol_meta	*mv;
1213 	struct sr_chunk_meta	*mc;
1214 	struct sr_opt_meta	*mo;
1215 	size_t			sz = SR_META_SIZE * 512;
1216 	int			no_chunk = 0;
1217 	u_int32_t		volid, ondisk = 0, cid;
1218 
1219 	DNPRINTF(SR_D_META, "%s: sr_read_meta\n", DEVNAME(sc));
1220 
1221 	m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO);
1222 
1223 	SLIST_FOREACH(ch_entry, cl, src_link) {
1224 		bzero(&b, sizeof(b));
1225 
1226 		b.b_flags = B_READ;
1227 		b.b_blkno = SR_META_OFFSET;
1228 		b.b_bcount = sz;
1229 		b.b_bufsize = sz;
1230 		b.b_resid = sz;
1231 		b.b_data = (void *)m;
1232 		b.b_error = 0;
1233 		b.b_proc = curproc;
1234 		b.b_dev = ch_entry->src_dev_mm;
1235 		b.b_vp = NULL;
1236 		b.b_iodone = NULL;
1237 		LIST_INIT(&b.b_dep);
1238 		bdevsw_lookup(b.b_dev)->d_strategy(&b);
1239 		biowait(&b);
1240 
1241 		/* XXX mark chunk offline and restart metadata write */
1242 		if (b.b_flags & B_ERROR) {
1243 			printf("%s: %s i/o error on block %lld while reading "
1244 			    "metadata %d\n", DEVNAME(sc),
1245 			    ch_entry->src_devname, b.b_blkno, b.b_error);
1246 			continue;
1247 		}
1248 
1249 		if (m->ssd_magic != SR_MAGIC)
1250 			continue;
1251 
1252 		/* validate metadata */
1253 		if (sr_validate_metadata(sc, ch_entry->src_dev_mm, m)) {
1254 			printf("%s: invalid metadata\n", DEVNAME(sc));
1255 			no_chunk = -1;
1256 			goto bad;
1257 		}
1258 
1259 		mv = (struct sr_vol_meta *)(m + 1);
1260 		mc = (struct sr_chunk_meta *)(mv + 1);
1261 
1262 		/* we asssume that the first chunk has the initial metadata */
1263 		if (no_chunk++ == 0) {
1264 			bcopy(m, sm, sz);
1265 			bcopy(m, sd->sd_meta, sizeof(*sd->sd_meta));
1266 			bcopy(mv, &sd->sd_vol.sv_meta,
1267 			    sizeof(sd->sd_vol.sv_meta));
1268 
1269 			volid = m->ssd_vd_volid;
1270 			sd->sd_meta_flags = sm->ssd_flags;
1271 		}
1272 
1273 		if (bcmp(&sm->ssd_uuid, &sd->sd_vol.sv_meta.svm_uuid,
1274 		    sizeof(struct sr_uuid))) {
1275 			printf("%s: %s invalid chunk uuid ",
1276 			    DEVNAME(sc), ch_entry->src_devname);
1277 			sr_print_uuid(&sm->ssd_uuid, 0);
1278 			printf(", expected ");
1279 			sr_print_uuid(&sd->sd_vol.sv_meta.svm_uuid, 1);
1280 			no_chunk = -1;
1281 			goto bad;
1282 		}
1283 
1284 		/* we have meta data on disk */
1285 		ch_entry->src_meta_ondisk = 1;
1286 
1287 		/* make sure we are part of this vd */
1288 		if (volid != m->ssd_vd_volid) {
1289 			printf("%s: %s invalid volume id %d, expected %d\n",
1290 			    DEVNAME(sc), ch_entry->src_devname,
1291 			    volid, m->ssd_vd_volid);
1292 			no_chunk = -1;
1293 			goto bad;
1294 		}
1295 
1296 		if (m->ssd_chunk_id > m->ssd_chunk_no) {
1297 			printf("%s: %s chunk id out of range %d, expected "
1298 			    "lower than %d\n", DEVNAME(sc),
1299 			    ch_entry->src_devname,
1300 			    m->ssd_chunk_id, m->ssd_chunk_no);
1301 			no_chunk = -1;
1302 			goto bad;
1303 		}
1304 
1305 		if (sd->sd_vol.sv_chunks[m->ssd_chunk_id]) {
1306 			printf("%s: %s chunk id %d already in use\n",
1307 			    DEVNAME(sc), ch_entry->src_devname,
1308 			    m->ssd_chunk_id);
1309 			no_chunk = -1;
1310 			goto bad;
1311 		}
1312 
1313 		sd->sd_vol.sv_chunks[m->ssd_chunk_id] = ch_entry;
1314 		bcopy(mc + m->ssd_chunk_id, &ch_entry->src_meta,
1315 		    sizeof(ch_entry->src_meta));
1316 
1317 		if (ondisk == 0) {
1318 			ondisk = m->ssd_ondisk;
1319 			cid = m->ssd_chunk_id;
1320 		}
1321 
1322 		if (m->ssd_ondisk != ondisk) {
1323 			printf("%s: %s chunk id %d contains stale metadata\n",
1324 			    DEVNAME(sc), ch_entry->src_devname,
1325 			    m->ssd_ondisk < ondisk ? m->ssd_chunk_id : cid);
1326 			no_chunk = -1;
1327 			goto bad;
1328 		}
1329 
1330 		/* XXX fix this check, sd_type isnt filled in yet */
1331 		if (mv->svm_level == 'C') {
1332 			mo = (struct sr_opt_meta *)(mc + mv->svm_no_chunk);
1333 			if (m->ssd_chunk_id > 1) {
1334 				no_chunk = -1;
1335 				goto bad;
1336 			}
1337 			bcopy(&mo->som_meta,
1338 			    &sd->mds.mdd_crypto.scr_meta,
1339 			    sizeof(sd->mds.mdd_crypto.scr_meta)
1340 			    );
1341 		}
1342 	}
1343 
1344 	if (no_chunk != m->ssd_chunk_no) {
1345 		DNPRINTF(SR_D_META, "%s: not enough chunks supplied\n",
1346 		    DEVNAME(sc));
1347 		no_chunk = -1;
1348 		goto bad;
1349 	}
1350 
1351 	DNPRINTF(SR_D_META, "%s: sr_read_meta: found %d elements\n",
1352 	    DEVNAME(sc), no_chunk);
1353 
1354 	sr_print_metadata(m);
1355 
1356 bad:
1357 	/* return nr of chunks that contain metadata */
1358 	free(m, M_DEVBUF);
1359 	return (no_chunk);
1360 }
1361 
1362 int
1363 sr_create_chunk_meta(struct sr_softc *sc, struct sr_chunk_head *cl)
1364 {
1365 	struct sr_chunk		*ch_entry;
1366 	struct sr_uuid		uuid;
1367 	int			rv = 1, cid = 0;
1368 	char			*name;
1369 	u_int64_t		max_chunk_sz = 0, min_chunk_sz;
1370 
1371 	DNPRINTF(SR_D_IOCTL, "%s: sr_create_chunk_meta\n", DEVNAME(sc));
1372 
1373 	sr_get_uuid(&uuid);
1374 
1375 	/* fill out stuff and get largest chunk size while looping */
1376 	SLIST_FOREACH(ch_entry, cl, src_link) {
1377 		name = ch_entry->src_devname;
1378 		ch_entry->src_meta.scm_size = ch_entry->src_size;
1379 		ch_entry->src_meta.scm_chunk_id = cid++;
1380 		ch_entry->src_meta.scm_status = BIOC_SDONLINE;
1381 		strlcpy(ch_entry->src_meta.scm_devname, name,
1382 		    sizeof(ch_entry->src_meta.scm_devname));
1383 		bcopy(&uuid,  &ch_entry->src_meta.scm_uuid,
1384 		    sizeof(ch_entry->src_meta.scm_uuid));
1385 
1386 		if (ch_entry->src_meta.scm_size > max_chunk_sz)
1387 			max_chunk_sz = ch_entry->src_meta.scm_size;
1388 	}
1389 
1390 	/* get smallest chunk size */
1391 	min_chunk_sz = max_chunk_sz;
1392 	SLIST_FOREACH(ch_entry, cl, src_link)
1393 		if (ch_entry->src_meta.scm_size < min_chunk_sz)
1394 			min_chunk_sz = ch_entry->src_meta.scm_size;
1395 
1396 	/* equalize all sizes */
1397 	SLIST_FOREACH(ch_entry, cl, src_link)
1398 		ch_entry->src_meta.scm_coerced_size = min_chunk_sz;
1399 
1400 	/* whine if chunks are not the same size */
1401 	if (min_chunk_sz != max_chunk_sz)
1402 		printf("%s: chunk sizes are not equal; up to %llu blocks "
1403 		    "wasted per chunk\n",
1404 		    DEVNAME(sc), max_chunk_sz - min_chunk_sz);
1405 
1406 	rv = 0;
1407 
1408 	return (rv);
1409 }
1410 
1411 void
1412 sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl)
1413 {
1414 	struct sr_chunk		*ch_entry, *ch_next;
1415 	dev_t			dev;
1416 
1417 	DNPRINTF(SR_D_IOCTL, "%s: sr_unwind_chunks\n", DEVNAME(sc));
1418 
1419 	if (!cl)
1420 		return;
1421 
1422 	for (ch_entry = SLIST_FIRST(cl);
1423 	    ch_entry != SLIST_END(cl); ch_entry = ch_next) {
1424 		ch_next = SLIST_NEXT(ch_entry, src_link);
1425 
1426 		dev = ch_entry->src_dev_mm;
1427 
1428 		if (dev != NODEV)
1429 			bdevsw_lookup(dev)->d_close(dev, FWRITE, S_IFBLK,
1430 			    curproc);
1431 
1432 		free(ch_entry, M_DEVBUF);
1433 	}
1434 	SLIST_INIT(cl);
1435 }
1436 
1437 void
1438 sr_free_discipline(struct sr_discipline *sd)
1439 {
1440 	struct sr_softc		*sc = sd->sd_sc;
1441 	int			i;
1442 
1443 	if (!sd)
1444 		return;
1445 
1446 	DNPRINTF(SR_D_DIS, "%s: sr_free_discipline %s\n",
1447 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
1448 
1449 	if (sd->sd_free_resources)
1450 		sd->sd_free_resources(sd);
1451 	if (sd->sd_vol.sv_chunks)
1452 		free(sd->sd_vol.sv_chunks, M_DEVBUF);
1453 	free(sd, M_DEVBUF);
1454 
1455 	for (i = 0; i < SR_MAXSCSIBUS; i++)
1456 		if (sc->sc_dis[i] == sd) {
1457 			sc->sc_dis[i] = NULL;
1458 			break;
1459 		}
1460 }
1461 
1462 void
1463 sr_shutdown_discipline(struct sr_discipline *sd)
1464 {
1465 	struct sr_softc		*sc = sd->sd_sc;
1466 	int			s;
1467 
1468 	if (!sd || !sc)
1469 		return;
1470 
1471 	DNPRINTF(SR_D_DIS, "%s: sr_shutdown_discipline %s\n",
1472 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
1473 
1474 	s = splbio();
1475 
1476 	if (sd->sd_shutdownhook)
1477 		shutdownhook_disestablish(sd->sd_shutdownhook);
1478 
1479 	/* make sure there isn't a sync pending and yield */
1480 	wakeup(sd);
1481 	while (sd->sd_sync || sd->sd_must_flush)
1482 		if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) ==
1483 		    EWOULDBLOCK)
1484 			break;
1485 
1486 #ifndef SMALL_KERNEL
1487 	sr_delete_sensors(sd);
1488 #endif /* SMALL_KERNEL */
1489 
1490 	if (sd->sd_scsibus_dev)
1491 		config_detach(sd->sd_scsibus_dev, DETACH_FORCE);
1492 
1493 	sr_unwind_chunks(sc, &sd->sd_vol.sv_chunk_list);
1494 
1495 	if (sd)
1496 		sr_free_discipline(sd);
1497 
1498 	splx(s);
1499 }
1500 
1501 int
1502 sr_raid_inquiry(struct sr_workunit *wu)
1503 {
1504 	struct sr_discipline	*sd = wu->swu_dis;
1505 	struct scsi_xfer	*xs = wu->swu_xs;
1506 	struct scsi_inquiry_data inq;
1507 
1508 	DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc));
1509 
1510 	bzero(&inq, sizeof(inq));
1511 	inq.device = T_DIRECT;
1512 	inq.dev_qual2 = 0;
1513 	inq.version = 2;
1514 	inq.response_format = 2;
1515 	inq.additional_length = 32;
1516 	strlcpy(inq.vendor, sd->sd_vol.sv_meta.svm_vendor,
1517 	    sizeof(inq.vendor));
1518 	strlcpy(inq.product, sd->sd_vol.sv_meta.svm_product,
1519 	    sizeof(inq.product));
1520 	strlcpy(inq.revision, sd->sd_vol.sv_meta.svm_revision,
1521 	    sizeof(inq.revision));
1522 	sr_copy_internal_data(xs, &inq, sizeof(inq));
1523 
1524 	return (0);
1525 }
1526 
1527 int
1528 sr_raid_read_cap(struct sr_workunit *wu)
1529 {
1530 	struct sr_discipline	*sd = wu->swu_dis;
1531 	struct scsi_xfer	*xs = wu->swu_xs;
1532 	struct scsi_read_cap_data rcd;
1533 	struct scsi_read_cap_data_16 rcd16;
1534 	int			rv = 1;
1535 
1536 	DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc));
1537 
1538 	if (xs->cmd->opcode == READ_CAPACITY) {
1539 		bzero(&rcd, sizeof(rcd));
1540 		if (sd->sd_vol.sv_meta.svm_size > 0xffffffffllu)
1541 			_lto4b(0xffffffff, rcd.addr);
1542 		else
1543 			_lto4b(sd->sd_vol.sv_meta.svm_size, rcd.addr);
1544 		_lto4b(512, rcd.length);
1545 		sr_copy_internal_data(xs, &rcd, sizeof(rcd));
1546 		rv = 0;
1547 	} else if (xs->cmd->opcode == READ_CAPACITY_16) {
1548 		bzero(&rcd16, sizeof(rcd16));
1549 		_lto8b(sd->sd_vol.sv_meta.svm_size, rcd16.addr);
1550 		_lto4b(512, rcd16.length);
1551 		sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
1552 		rv = 0;
1553 	}
1554 
1555 	return (rv);
1556 }
1557 
1558 int
1559 sr_raid_tur(struct sr_workunit *wu)
1560 {
1561 	struct sr_discipline	*sd = wu->swu_dis;
1562 
1563 	DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
1564 
1565 	if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
1566 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
1567 		sd->sd_scsi_sense.flags = SKEY_NOT_READY;
1568 		sd->sd_scsi_sense.add_sense_code = 0x04;
1569 		sd->sd_scsi_sense.add_sense_code_qual = 0x11;
1570 		sd->sd_scsi_sense.extra_len = 4;
1571 		return (1);
1572 	} else if (sd->sd_vol.sv_meta.svm_status == BIOC_SVINVALID) {
1573 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
1574 		sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
1575 		sd->sd_scsi_sense.add_sense_code = 0x05;
1576 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
1577 		sd->sd_scsi_sense.extra_len = 4;
1578 		return (1);
1579 	}
1580 
1581 	return (0);
1582 }
1583 
1584 int
1585 sr_raid_request_sense(struct sr_workunit *wu)
1586 {
1587 	struct sr_discipline	*sd = wu->swu_dis;
1588 	struct scsi_xfer	*xs = wu->swu_xs;
1589 
1590 	DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
1591 	    DEVNAME(sd->sd_sc));
1592 
1593 	/* use latest sense data */
1594 	bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
1595 
1596 	/* clear sense data */
1597 	bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
1598 
1599 	return (0);
1600 }
1601 
1602 int
1603 sr_raid_start_stop(struct sr_workunit *wu)
1604 {
1605 	struct sr_discipline	*sd = wu->swu_dis;
1606 	struct scsi_xfer	*xs = wu->swu_xs;
1607 	struct scsi_start_stop	*ss = (struct scsi_start_stop *)xs->cmd;
1608 	int			rv = 1;
1609 
1610 	DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
1611 	    DEVNAME(sd->sd_sc));
1612 
1613 	if (!ss)
1614 		return (rv);
1615 
1616 	if (ss->byte2 == 0x00) {
1617 		/* START */
1618 		if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
1619 			/* bring volume online */
1620 			/* XXX check to see if volume can be brought online */
1621 			sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE;
1622 		}
1623 		rv = 0;
1624 	} else /* XXX is this the check? if (byte == 0x01) */ {
1625 		/* STOP */
1626 		if (sd->sd_vol.sv_meta.svm_status == BIOC_SVONLINE) {
1627 			/* bring volume offline */
1628 			sd->sd_vol.sv_meta.svm_status = BIOC_SVOFFLINE;
1629 		}
1630 		rv = 0;
1631 	}
1632 
1633 	return (rv);
1634 }
1635 
1636 int
1637 sr_raid_sync(struct sr_workunit *wu)
1638 {
1639 	struct sr_discipline	*sd = wu->swu_dis;
1640 	int			s, rv = 0, ios;
1641 
1642 	DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
1643 
1644 	/* when doing a fake sync don't coun't the wu */
1645 	ios = wu->swu_fake ? 0 : 1;
1646 
1647 	s = splbio();
1648 	sd->sd_sync = 1;
1649 
1650 	while (sd->sd_wu_pending > ios)
1651 		if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
1652 			DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
1653 			    DEVNAME(sd->sd_sc));
1654 			rv = 1;
1655 			break;
1656 		}
1657 
1658 	sd->sd_sync = 0;
1659 	splx(s);
1660 
1661 	wakeup(&sd->sd_sync);
1662 
1663 	return (rv);
1664 }
1665 
1666 void
1667 sr_raid_startwu(struct sr_workunit *wu)
1668 {
1669 	struct sr_discipline	*sd = wu->swu_dis;
1670 	struct sr_ccb		*ccb;
1671 
1672 	splassert(IPL_BIO);
1673 
1674 	if (wu->swu_state == SR_WU_RESTART)
1675 		/*
1676 		 * no need to put the wu on the pending queue since we
1677 		 * are restarting the io
1678 		 */
1679 		 ;
1680 	else
1681 		/* move wu to pending queue */
1682 		TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
1683 
1684 	/* start all individual ios */
1685 	TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
1686 		bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf);
1687 	}
1688 }
1689 
1690 u_int32_t
1691 sr_checksum(char *s, u_int32_t *p, u_int32_t size)
1692 {
1693 	u_int32_t		chk = 0;
1694 	int			i;
1695 
1696 	DNPRINTF(SR_D_MISC, "%s: sr_checksum %p %d\n", s, p, size);
1697 
1698 	if (size % sizeof(u_int32_t))
1699 		return (0); /* 0 is failure */
1700 
1701 	for (i = 0; i < size / sizeof(u_int32_t); i++)
1702 		chk ^= p[i];
1703 
1704 	return (chk);
1705 }
1706 
1707 void
1708 sr_get_uuid(struct sr_uuid *uuid)
1709 {
1710 	arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
1711 }
1712 
1713 void
1714 sr_print_uuid(struct sr_uuid *uuid, int cr)
1715 {
1716 	int			i;
1717 
1718 	for (i = 0; i < SR_UUID_MAX; i++)
1719 		printf("%x%s", uuid->sui_id[i],
1720 		    i < SR_UUID_MAX - 1 ? ":" : "");
1721 
1722 	if (cr)
1723 		printf("\n");
1724 }
1725 
1726 int
1727 sr_clear_metadata(struct sr_discipline *sd)
1728 {
1729 	struct sr_softc		*sc = sd->sd_sc;
1730 	struct sr_chunk_head	*cl = &sd->sd_vol.sv_chunk_list;
1731 	struct sr_chunk		*ch_entry;
1732 	struct buf		b;
1733 	size_t			sz = SR_META_SIZE * 512;
1734 	void			*m;
1735 	int			rv = 0;
1736 
1737 	DNPRINTF(SR_D_META, "%s: sr_clear_metadata\n", DEVNAME(sc));
1738 
1739 	m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO);
1740 
1741 	SLIST_FOREACH(ch_entry, cl, src_link) {
1742 		bzero(&b, sizeof(b));
1743 
1744 		b.b_flags = B_WRITE;
1745 		b.b_blkno = SR_META_OFFSET;
1746 		b.b_bcount = sz;
1747 		b.b_bufsize = sz;
1748 		b.b_resid = sz;
1749 		b.b_data = (void *)m;
1750 		b.b_error = 0;
1751 		b.b_proc = curproc;
1752 		b.b_dev = ch_entry->src_dev_mm;
1753 		b.b_vp = NULL;
1754 		b.b_iodone = NULL;
1755 		LIST_INIT(&b.b_dep);
1756 		bdevsw_lookup(b.b_dev)->d_strategy(&b);
1757 		biowait(&b);
1758 
1759 		if (b.b_flags & B_ERROR) {
1760 			printf("%s: %s i/o error on block %lld while clearing "
1761 			    "metadata %d\n", DEVNAME(sc),
1762 			    ch_entry->src_devname, b.b_blkno, b.b_error);
1763 			rv++;
1764 			continue;
1765 		}
1766 	}
1767 
1768 	free(m, M_DEVBUF);
1769 	return (rv);
1770 }
1771 
1772 int
1773 sr_already_assembled(struct sr_discipline *sd)
1774 {
1775 	struct sr_softc		*sc = sd->sd_sc;
1776 	int			i;
1777 
1778 	for (i = 0; i < SR_MAXSCSIBUS; i++)
1779 		if (sc->sc_dis[i])
1780 			if (!bcmp(&sd->sd_meta->ssd_uuid,
1781 			    &sc->sc_dis[i]->sd_meta->ssd_uuid,
1782 			    sizeof(sd->sd_meta->ssd_uuid)))
1783 				return (1);
1784 
1785 	return (0);
1786 }
1787 
1788 void
1789 sr_save_metadata_callback(void *arg1, void *arg2)
1790 {
1791 	struct sr_discipline	*sd = arg1;
1792 	int			s;
1793 
1794 	s = splbio();
1795 
1796 	if (sr_save_metadata(arg1, SR_VOL_DIRTY))
1797 		printf("%s: save metadata failed\n",
1798 		    DEVNAME(sd->sd_sc));
1799 
1800 	sd->sd_must_flush = 0;
1801 	splx(s);
1802 }
1803 
1804 int
1805 sr_save_metadata(struct sr_discipline *sd, u_int32_t flags)
1806 {
1807 	struct sr_softc		*sc = sd->sd_sc;
1808 	struct sr_metadata	*sm = sd->sd_meta;
1809 	struct sr_vol_meta	*sv = &sd->sd_vol.sv_meta, *im_sv;
1810 	struct sr_chunk_meta	*im_sc;
1811 	struct sr_opt_meta	*im_so;
1812 	struct sr_chunk		*src;
1813 	struct buf		b;
1814 	struct sr_workunit	wu;
1815 	int			i, rv = 1, ch = 0, no_chunk, sz_opt;
1816 	size_t			sz = SR_META_SIZE * 512;
1817 
1818 	DNPRINTF(SR_D_META, "%s: sr_save_metadata %s\n",
1819 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
1820 
1821 	if (!sm) {
1822 		printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
1823 		goto bad;
1824 	}
1825 
1826 	im_sv = (struct sr_vol_meta *)(sm + 1);
1827 	im_sc = (struct sr_chunk_meta *)(im_sv + 1);
1828 	no_chunk = sd->sd_vol.sv_meta.svm_no_chunk;
1829 	im_so = (struct sr_opt_meta *)(im_sc + no_chunk);
1830 
1831 	/* XXX this is a temporary hack until meta is properly redone */
1832 	if (sd->sd_type == SR_MD_CRYPTO)
1833 		sz_opt = sizeof(struct sr_opt_meta);
1834 	else
1835 		sz_opt = 0;
1836 
1837 	if (sizeof(struct sr_metadata) + sizeof(struct sr_vol_meta) +
1838 	    (sizeof(struct sr_chunk_meta) * no_chunk) +
1839 	    sz_opt > sz) {
1840 		printf("%s: too much metadata; metadata NOT written\n",
1841 		    DEVNAME(sc));
1842 		goto bad;
1843 	}
1844 
1845 	if (sm->ssd_magic == 0) {
1846 		/* initial metadata */
1847 		sm->ssd_magic = SR_MAGIC;
1848 		sm->ssd_version = SR_META_VERSION;
1849 		sm->ssd_size = sizeof(struct sr_metadata);
1850 		sm->ssd_ondisk = 0;
1851 		sm->ssd_flags = sd->sd_meta_flags;
1852 		/* get uuid from chunk 0 */
1853 		bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scm_uuid,
1854 		    &sm->ssd_uuid,
1855 		    sizeof(struct sr_uuid));
1856 
1857 		/* volume */
1858 		bcopy(sv, im_sv, sizeof(struct sr_vol_meta));
1859 		bcopy(&sm->ssd_uuid, &im_sv->svm_uuid,
1860 		    sizeof(im_sv->svm_uuid));
1861 		sm->ssd_vd_ver = SR_VOL_VERSION;
1862 		sm->ssd_vd_size = sizeof(struct sr_vol_meta);
1863 
1864 		/* chunk */
1865 		for (i = 0; i < no_chunk; i++)
1866 			bcopy(sd->sd_vol.sv_chunks[i], &im_sc[i],
1867 			    sizeof(struct sr_chunk_meta));
1868 
1869 		sm->ssd_chunk_ver = SR_CHUNK_VERSION;
1870 		sm->ssd_chunk_size = sizeof(struct sr_chunk_meta);
1871 		sm->ssd_chunk_no = no_chunk;
1872 
1873 		/* optional */
1874 		sm->ssd_opt_ver = SR_OPT_VERSION;
1875 		if (sd->sd_type == SR_MD_CRYPTO) {
1876 			bzero(im_so, sizeof(*im_so));
1877 			sm->ssd_opt_size = sizeof(struct sr_opt_meta);
1878 			sm->ssd_opt_no = 1;
1879 		} else {
1880 			sm->ssd_opt_size = 0;
1881 			sm->ssd_opt_no = 0;
1882 		}
1883 	}
1884 
1885 	/* from here on out metadata is updated */
1886 	sm->ssd_ondisk++;
1887 	im_sv->svm_flags |= flags;
1888 	sm->ssd_vd_chk = sr_checksum(DEVNAME(sc),
1889 	    (u_int32_t *)im_sv, sm->ssd_vd_size);
1890 
1891 	sm->ssd_chunk_chk = 0;
1892 	for (ch = 0; ch < sm->ssd_chunk_no; ch++)
1893 		sm->ssd_chunk_chk ^= sr_checksum(DEVNAME(sc),
1894 		    (u_int32_t *)&im_sc[ch], sm->ssd_chunk_size);
1895 
1896 	/* XXX do checksum on optional meta too */
1897 
1898 	sr_print_metadata(sm);
1899 
1900 	for (i = 0; i < sm->ssd_chunk_no; i++) {
1901 		memset(&b, 0, sizeof(b));
1902 
1903 		src = sd->sd_vol.sv_chunks[i];
1904 
1905 		/* skip disks that are offline */
1906 		if (src->src_meta.scm_status == BIOC_SDOFFLINE)
1907 			continue;
1908 
1909 		/* copy encrypted key / passphrase into optinal metadata area */
1910 		if (sd->sd_type == SR_MD_CRYPTO && i < 2) {
1911 			im_so->som_type = SR_OPT_CRYPTO;
1912 			bcopy(&sd->mds.mdd_crypto.scr_meta,
1913 			    &im_so->som_meta.smm_crypto,
1914 			    sizeof(im_so->som_meta.smm_crypto));
1915 		}
1916 
1917 		/* calculate metdata checksum and ids */
1918 		sm->ssd_vd_volid = im_sv->svm_volid;
1919 		sm->ssd_chunk_id = i;
1920 		sm->ssd_checksum = sr_checksum(DEVNAME(sc),
1921 		    (u_int32_t *)sm, sm->ssd_size);
1922 
1923 		DNPRINTF(SR_D_META, "%s: sr_save_metadata %s: volid: %d "
1924 		    "chunkid: %d checksum: 0x%x\n",
1925 		    DEVNAME(sc), src->src_meta.scm_devname,
1926 		    sm->ssd_vd_volid, sm->ssd_chunk_id,
1927 		    sm->ssd_checksum);
1928 
1929 		b.b_flags = B_WRITE;
1930 		b.b_blkno = SR_META_OFFSET;
1931 		b.b_bcount = sz;
1932 		b.b_bufsize = sz;
1933 		b.b_resid = sz;
1934 		b.b_data = (void *)sm;
1935 		b.b_error = 0;
1936 		b.b_proc = curproc;
1937 		b.b_dev = src->src_dev_mm;
1938 		b.b_vp = NULL;
1939 		b.b_iodone = NULL;
1940 		LIST_INIT(&b.b_dep);
1941 		bdevsw_lookup(b.b_dev)->d_strategy(&b);
1942 
1943 		biowait(&b);
1944 
1945 		/* make sure in memory copy is clean */
1946 		if (sd->sd_type == SR_MD_CRYPTO)
1947 			bzero(im_so, sizeof(*im_so));
1948 		sm->ssd_vd_volid = 0;
1949 		sm->ssd_chunk_id = 0;
1950 		sm->ssd_checksum = 0;
1951 
1952 		/* XXX do something smart here */
1953 		/* mark chunk offline and restart metadata write */
1954 		if (b.b_flags & B_ERROR) {
1955 			printf("%s: %s i/o error on block %lld while writing "
1956 			    "metadata %d\n", DEVNAME(sc),
1957 			    src->src_meta.scm_devname, b.b_blkno, b.b_error);
1958 			goto bad;
1959 		}
1960 
1961 		DNPRINTF(SR_D_META, "%s: sr_save_metadata written to %s\n",
1962 		    DEVNAME(sc), src->src_meta.scm_devname);
1963 	}
1964 
1965 	bzero(&wu, sizeof(wu));
1966 	wu.swu_fake = 1;
1967 	wu.swu_dis = sd;
1968 	sd->sd_scsi_sync(&wu);
1969 
1970 	rv = 0;
1971 bad:
1972 	return (rv);
1973 }
1974 
1975 int
1976 sr_boot_assembly(struct sr_softc *sc)
1977 {
1978 	struct device		*dv;
1979 	struct buf		*bp;
1980 	struct bdevsw		*bdsw;
1981 	struct disklabel	label;
1982 	struct sr_metadata	*sm;
1983 	struct sr_metadata_list_head mlh;
1984 	struct sr_metadata_list *mle, *mle2;
1985 	struct sr_vol_meta	*vm;
1986 	struct bioc_createraid	bc;
1987 	dev_t			dev, devr, *dt = NULL;
1988 	int			error, majdev, i, no_dev, rv = 0;
1989 	size_t			sz = SR_META_SIZE * 512;
1990 
1991 	DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
1992 
1993 	SLIST_INIT(&mlh);
1994 	bp = geteblk(sz);
1995 	if (!bp)
1996 		return (ENOMEM);
1997 
1998 	TAILQ_FOREACH(dv, &alldevs, dv_list) {
1999 		if (dv->dv_class != DV_DISK)
2000 			continue;
2001 
2002 		majdev = findblkmajor(dv);
2003 		if (majdev == -1)
2004 			continue;
2005 
2006 		bp->b_dev = dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
2007 		bdsw = &bdevsw[majdev];
2008 
2009 		/* XXX is there  a better way of excluding some devices? */
2010 		if (!strncmp(dv->dv_xname, "fd", 2) ||
2011 		    !strncmp(dv->dv_xname, "cd", 2) ||
2012 		    !strncmp(dv->dv_xname, "rx", 2))
2013 			continue;
2014 		/*
2015 		 * The devices are being opened with S_IFCHR instead of
2016 		 * S_IFBLK so that the SCSI mid-layer does not whine when
2017 		 * media is not inserted in certain devices like zip drives
2018 		 * and such.
2019 		 */
2020 
2021 		/* open device */
2022 		error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc);
2023 		if (error) {
2024 			DNPRINTF(SR_D_META, "%s: sr_boot_assembly open failed"
2025 			    "\n", DEVNAME(sc));
2026 			continue;
2027 		}
2028 
2029 		/* get disklabel */
2030 		error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label,
2031 		    FREAD, curproc);
2032 		if (error) {
2033 			DNPRINTF(SR_D_META, "%s: sr_boot_assembly ioctl "
2034 			    "failed\n", DEVNAME(sc));
2035 			error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
2036 			continue;
2037 		}
2038 
2039 		/* we are done, close device */
2040 		error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
2041 		if (error) {
2042 			DNPRINTF(SR_D_META, "%s: sr_boot_assembly close "
2043 			    "failed\n", DEVNAME(sc));
2044 			continue;
2045 		}
2046 
2047 		/* are we a softraid partition? */
2048 		for (i = 0; i < MAXPARTITIONS; i++) {
2049 			if (label.d_partitions[i].p_fstype != FS_RAID)
2050 				continue;
2051 
2052 			/* open device */
2053 			bp->b_dev = devr = MAKEDISKDEV(majdev, dv->dv_unit, i);
2054 			error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc);
2055 			if (error) {
2056 				DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
2057 				    "open failed, partition %d\n",
2058 				    DEVNAME(sc), i);
2059 				continue;
2060 			}
2061 			/* read metadat */
2062 			bp->b_flags = B_BUSY | B_READ;
2063 			bp->b_blkno = SR_META_OFFSET;
2064 			bp->b_cylinder = 0;
2065 			bp->b_bcount = sz;
2066 			bp->b_bufsize = sz;
2067 			bp->b_resid = sz;
2068 			(*bdsw->d_strategy)(bp);
2069 			if ((error = biowait(bp))) {
2070 				DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
2071 				    "strategy failed, partition %d\n",
2072 				    DEVNAME(sc));
2073 				error = (*bdsw->d_close)(devr, FREAD, S_IFCHR,
2074 				    curproc);
2075 				continue;
2076 			}
2077 
2078 			sm = (struct sr_metadata *)bp->b_data;
2079 			if (!sr_validate_metadata(sc, devr, sm)) {
2080 				/* we got one; save it off */
2081 				mle = malloc(sizeof(*mle), M_DEVBUF,
2082 				    M_WAITOK | M_ZERO);
2083 				mle->sml_metadata = malloc(sz, M_DEVBUF,
2084 				    M_WAITOK | M_ZERO);
2085 				bcopy(sm, mle->sml_metadata, sz);
2086 				mle->sml_mm = devr;
2087 				SLIST_INSERT_HEAD(&mlh, mle, sml_link);
2088 			}
2089 
2090 			/* we are done, close device */
2091 			error = (*bdsw->d_close)(devr, FREAD, S_IFCHR,
2092 			    curproc);
2093 			if (error) {
2094 				DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
2095 				    "close failed\n", DEVNAME(sc));
2096 				continue;
2097 			}
2098 		}
2099 	}
2100 
2101 	/*
2102 	 * XXX poor mans hack that doesn't keep disks in order and does not
2103 	 * roam disks correctly.  replace this with something smarter that
2104 	 * orders disks by volid, chunkid and uuid.
2105 	 */
2106 	dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK);
2107 	SLIST_FOREACH(mle, &mlh, sml_link) {
2108 		/* chunk used already? */
2109 		if (mle->sml_used)
2110 			continue;
2111 
2112 		no_dev = 0;
2113 		bzero(dt, BIOC_CRMAXLEN);
2114 		SLIST_FOREACH(mle2, &mlh, sml_link) {
2115 			/* chunk used already? */
2116 			if (mle2->sml_used)
2117 				continue;
2118 
2119 			/* are we the same volume? */
2120 			if (mle->sml_metadata->ssd_vd_volid !=
2121 			    mle2->sml_metadata->ssd_vd_volid)
2122 				continue;
2123 
2124 			/* same uuid? */
2125 			if (bcmp(&mle->sml_metadata->ssd_uuid,
2126 			    &mle2->sml_metadata->ssd_uuid,
2127 			    sizeof(mle->sml_metadata->ssd_uuid)))
2128 				continue;
2129 
2130 			/* sanity */
2131 			if (dt[mle2->sml_metadata->ssd_chunk_id]) {
2132 				printf("%s: chunk id already in use; can not "
2133 				    "assemble volume\n", DEVNAME(sc));
2134 				goto unwind;
2135 			}
2136 			dt[mle2->sml_metadata->ssd_chunk_id] = mle2->sml_mm;
2137 			no_dev++;
2138 			mle2->sml_used = 1;
2139 		}
2140 		if (mle->sml_metadata->ssd_chunk_no != no_dev) {
2141 			printf("%s: not assembling partial disk that used to "
2142 			    "be volume %d\n", DEVNAME(sc),
2143 			    mle->sml_metadata->ssd_vd_volid);
2144 			continue;
2145 		}
2146 
2147 		bzero(&bc, sizeof(bc));
2148 		vm = (struct sr_vol_meta *)(mle->sml_metadata + 1);
2149 		bc.bc_level = vm->svm_level;
2150 		bc.bc_dev_list_len = no_dev * sizeof(dev_t);
2151 		bc.bc_dev_list = dt;
2152 		bc.bc_flags = BIOC_SCDEVT;
2153 		sr_ioctl_createraid(sc, &bc, 0);
2154 		rv++;
2155 	}
2156 
2157 unwind:
2158 	if (dt)
2159 		free(dt, M_DEVBUF);
2160 
2161 	for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
2162 		mle2 = SLIST_NEXT(mle, sml_link);
2163 
2164 		free(mle->sml_metadata, M_DEVBUF);
2165 		free(mle, M_DEVBUF);
2166 	}
2167 	SLIST_INIT(&mlh);
2168 
2169 	return (rv);
2170 }
2171 
2172 int
2173 sr_validate_metadata(struct sr_softc *sc, dev_t dev, struct sr_metadata *sm)
2174 {
2175 	struct sr_vol_meta	*mv;
2176 	struct sr_chunk_meta	*mc;
2177 	char			*name, devname[32];
2178 	int			maj, part, unit;
2179 	u_int32_t		chk;
2180 
2181 	DNPRINTF(SR_D_META, "%s: sr_validate_metadata(0x%x)\n",
2182 	    DEVNAME(sc), dev);
2183 
2184 	bzero(devname, sizeof(devname));
2185 
2186 	if (sm->ssd_magic != SR_MAGIC)
2187 		goto bad;
2188 
2189 	maj = major(dev);
2190 	part = DISKPART(dev);
2191 	unit = DISKUNIT(dev);
2192 
2193 	name = findblkname(maj);
2194 	if (name == NULL)
2195 		goto bad;
2196 
2197 	snprintf(devname, sizeof(devname),
2198 	    "%s%d%c", name, unit, part + 'a');
2199 	name = devname;
2200 
2201 	/* validate metadata */
2202 	if (sm->ssd_version != SR_META_VERSION) {
2203 		printf("%s: %s can not read metadata version %d, "
2204 		    "expected %d\n", DEVNAME(sc),
2205 		    devname, sm->ssd_version,
2206 		    SR_META_VERSION);
2207 		goto bad;
2208 	}
2209 	if (sm->ssd_size != sizeof(struct sr_metadata)) {
2210 		printf("%s: %s invalid metadata size %d, "
2211 		    "expected %d\n", DEVNAME(sc),
2212 		    devname, sm->ssd_size,
2213 		    sizeof(struct sr_metadata));
2214 		goto bad;
2215 	}
2216 	chk = sr_checksum(DEVNAME(sc), (u_int32_t *)sm, sm->ssd_size);
2217 	/*
2218 	 * since the checksum value is part of the checksum a good
2219 	 * result equals 0
2220 	 */
2221 	if (chk != 0) {
2222 		printf("%s: %s invalid metadata checksum 0x%x, "
2223 		    "expected 0x%x\n", DEVNAME(sc),
2224 		    devname, sm->ssd_checksum, chk);
2225 		goto bad;
2226 	}
2227 
2228 	/* validate volume metadata */
2229 	if (sm->ssd_vd_ver != SR_VOL_VERSION) {
2230 		printf("%s: %s can not read volume metadata version "
2231 		    "%d, expected %d\n", DEVNAME(sc),
2232 		    devname, sm->ssd_vd_ver,
2233 		    SR_VOL_VERSION);
2234 		goto bad;
2235 	}
2236 	if (sm->ssd_vd_size != sizeof(struct sr_vol_meta)) {
2237 		printf("%s: %s invalid volume metadata size %d, "
2238 		    "expected %d\n", DEVNAME(sc),
2239 		    devname, sm->ssd_vd_size,
2240 		    sizeof(struct sr_vol_meta));
2241 		goto bad;
2242 	}
2243 	mv = (struct sr_vol_meta *)(sm + 1);
2244 	chk = sr_checksum(DEVNAME(sc), (u_int32_t *)mv, sm->ssd_vd_size);
2245 	if (chk != sm->ssd_vd_chk) {
2246 		printf("%s: %s invalid volume metadata checksum 0x%x, "
2247 		    "expected 0x%x\n", DEVNAME(sc),
2248 		    devname, sm->ssd_vd_chk, chk);
2249 		goto bad;
2250 	}
2251 
2252 	/* validate chunk metadata */
2253 	if (sm->ssd_chunk_ver != SR_CHUNK_VERSION) {
2254 		printf("%s: %s can not read chunk metadata version "
2255 		    "%d, expected %d\n", DEVNAME(sc),
2256 		    devname, sm->ssd_chunk_ver,
2257 		    SR_CHUNK_VERSION);
2258 		goto bad;
2259 	}
2260 	if (sm->ssd_chunk_size != sizeof(struct sr_chunk_meta)) {
2261 		printf("%s: %s invalid chunk metadata size %d, "
2262 		    "expected %d\n", DEVNAME(sc),
2263 		    devname, sm->ssd_chunk_size,
2264 		    sizeof(struct sr_chunk_meta));
2265 		goto bad;
2266 	}
2267 
2268 	mc = (struct sr_chunk_meta *)(mv + 1);
2269 	/* checksum is calculated over ALL chunks */
2270 	chk = sr_checksum(DEVNAME(sc), (u_int32_t *)(mc),
2271 	    sm->ssd_chunk_size * sm->ssd_chunk_no);
2272 
2273 	if (chk != sm->ssd_chunk_chk) {
2274 		printf("%s: %s invalid chunk metadata checksum 0x%x, "
2275 		    "expected 0x%x\n", DEVNAME(sc),
2276 		    devname, sm->ssd_chunk_chk, chk);
2277 		goto bad;
2278 	}
2279 
2280 	/* warn if disk changed order */
2281 	if (strncmp(mc[sm->ssd_chunk_id].scm_devname, name,
2282 	    sizeof(mc[sm->ssd_chunk_id].scm_devname)))
2283 		printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
2284 		    mc[sm->ssd_chunk_id].scm_devname, name);
2285 
2286 	/* we have meta data on disk */
2287 	DNPRINTF(SR_D_META, "%s: sr_validate_metadata valid metadata %s\n",
2288 	    DEVNAME(sc), devname);
2289 
2290 	return (0);
2291 bad:
2292 	DNPRINTF(SR_D_META, "%s: sr_validate_metadata invalid metadata %s\n",
2293 	    DEVNAME(sc), devname);
2294 
2295 	return (1);
2296 }
2297 
2298 int32_t
2299 sr_validate_stripsize(u_int32_t b)
2300 {
2301 	int			s = 0;
2302 
2303 	if (b % 512)
2304 		return (-1);
2305 
2306 	while ((b & 1) == 0) {
2307 		b >>= 1;
2308 		s++;
2309 	}
2310 
2311 	/* only multiple of twos */
2312 	b >>= 1;
2313 	if (b)
2314 		return(-1);
2315 
2316 	return (s);
2317 }
2318 
2319 void
2320 sr_shutdown(void *arg)
2321 {
2322 	struct sr_discipline	*sd = arg;
2323 #ifdef SR_DEBUG
2324 	struct sr_softc		*sc = sd->sd_sc;
2325 #endif
2326 	DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n",
2327 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
2328 
2329 	sr_save_metadata(sd, 0);
2330 
2331 	sr_shutdown_discipline(sd);
2332 }
2333 
2334 int
2335 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
2336 {
2337 	struct sr_discipline	*sd = wu->swu_dis;
2338 	struct scsi_xfer	*xs = wu->swu_xs;
2339 	int			rv = 1;
2340 
2341 	DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
2342 	    xs->cmd->opcode);
2343 
2344 	if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
2345 		DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
2346 		    DEVNAME(sd->sd_sc));
2347 		goto bad;
2348 	}
2349 
2350 	if (xs->datalen == 0) {
2351 		printf("%s: %s: illegal block count\n",
2352 		    DEVNAME(sd->sd_sc), func, sd->sd_vol.sv_meta.svm_devname);
2353 		goto bad;
2354 	}
2355 
2356 	if (xs->cmdlen == 10)
2357 		*blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
2358 	else if (xs->cmdlen == 16)
2359 		*blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
2360 	else if (xs->cmdlen == 6)
2361 		*blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
2362 	else {
2363 		printf("%s: %s: illegal cmdlen\n", DEVNAME(sd->sd_sc), func,
2364 		    sd->sd_vol.sv_meta.svm_devname);
2365 		goto bad;
2366 	}
2367 
2368 	wu->swu_blk_start = *blk;
2369 	wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
2370 
2371 	if (wu->swu_blk_end > sd->sd_vol.sv_meta.svm_size) {
2372 		DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
2373 		    "end: %lld length: %d\n",
2374 		    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
2375 		    wu->swu_blk_end, xs->datalen);
2376 
2377 		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
2378 		    SSD_ERRCODE_VALID;
2379 		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
2380 		sd->sd_scsi_sense.add_sense_code = 0x21;
2381 		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
2382 		sd->sd_scsi_sense.extra_len = 4;
2383 		goto bad;
2384 	}
2385 
2386 	rv = 0;
2387 bad:
2388 	return (rv);
2389 }
2390 
2391 int
2392 sr_check_io_collision(struct sr_workunit *wu)
2393 {
2394 	struct sr_discipline	*sd = wu->swu_dis;
2395 	struct sr_workunit	*wup;
2396 
2397 	splassert(IPL_BIO);
2398 
2399 	/* walk queue backwards and fill in collider if we have one */
2400 	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
2401 		if (wu->swu_blk_end < wup->swu_blk_start ||
2402 		    wup->swu_blk_end < wu->swu_blk_start)
2403 			continue;
2404 
2405 		/* we have an LBA collision, defer wu */
2406 		wu->swu_state = SR_WU_DEFERRED;
2407 		if (wup->swu_collider)
2408 			/* wu is on deferred queue, append to last wu */
2409 			while (wup->swu_collider)
2410 				wup = wup->swu_collider;
2411 
2412 		wup->swu_collider = wu;
2413 		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
2414 		sd->sd_wu_collisions++;
2415 		goto queued;
2416 	}
2417 
2418 	return (0);
2419 queued:
2420 	return (1);
2421 }
2422 
2423 #ifndef SMALL_KERNEL
2424 int
2425 sr_create_sensors(struct sr_discipline *sd)
2426 {
2427 	struct sr_softc		*sc = sd->sd_sc;
2428 	int			rv = 1;
2429 
2430 	DNPRINTF(SR_D_STATE, "%s: %s: sr_create_sensors\n",
2431 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
2432 
2433 	strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc),
2434 	    sizeof(sd->sd_vol.sv_sensordev.xname));
2435 
2436 	sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
2437 	sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
2438 	strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_vol.sv_meta.svm_devname,
2439 	    sizeof(sd->sd_vol.sv_sensor.desc));
2440 
2441 	sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor);
2442 
2443 	if (sc->sc_sensors_running == 0) {
2444 		if (sensor_task_register(sc, sr_refresh_sensors, 10) == NULL)
2445 			goto bad;
2446 		sc->sc_sensors_running = 1;
2447 	}
2448 	sensordev_install(&sd->sd_vol.sv_sensordev);
2449 
2450 	rv = 0;
2451 bad:
2452 	return (rv);
2453 }
2454 
2455 void
2456 sr_delete_sensors(struct sr_discipline *sd)
2457 {
2458 #ifdef SR_DEBUG
2459 	struct sr_softc		*sc = sd->sd_sc;
2460 #endif
2461 	DNPRINTF(SR_D_STATE, "%s: %s: sr_delete_sensors\n",
2462 	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
2463 
2464 	if (sd->sd_vol.sv_sensor_valid)
2465 		sensordev_deinstall(&sd->sd_vol.sv_sensordev);
2466 }
2467 
2468 void
2469 sr_refresh_sensors(void *arg)
2470 {
2471 	struct sr_softc		*sc = arg;
2472 	int			i, vol;
2473 	struct sr_volume	*sv;
2474 
2475 	DNPRINTF(SR_D_STATE, "%s: sr_refresh_sensors\n", DEVNAME(sc));
2476 
2477 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2478 		/* XXX this will not work when we stagger disciplines */
2479 		if (!sc->sc_dis[i])
2480 			continue;
2481 
2482 		sv = &sc->sc_dis[i]->sd_vol;
2483 
2484 		switch(sv->sv_meta.svm_status) {
2485 		case BIOC_SVOFFLINE:
2486 			sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
2487 			sv->sv_sensor.status = SENSOR_S_CRIT;
2488 			break;
2489 
2490 		case BIOC_SVDEGRADED:
2491 			sv->sv_sensor.value = SENSOR_DRIVE_PFAIL;
2492 			sv->sv_sensor.status = SENSOR_S_WARN;
2493 			break;
2494 
2495 		case BIOC_SVSCRUB:
2496 		case BIOC_SVONLINE:
2497 			sv->sv_sensor.value = SENSOR_DRIVE_ONLINE;
2498 			sv->sv_sensor.status = SENSOR_S_OK;
2499 			break;
2500 
2501 		default:
2502 			sv->sv_sensor.value = 0; /* unknown */
2503 			sv->sv_sensor.status = SENSOR_S_UNKNOWN;
2504 		}
2505 	}
2506 }
2507 #endif /* SMALL_KERNEL */
2508 
2509 #ifdef SR_FANCY_STATS
2510 void				sr_print_stats(void);
2511 
2512 void
2513 sr_print_stats(void)
2514 {
2515 	struct sr_softc		*sc;
2516 	struct sr_discipline	*sd;
2517 	int			i, vol;
2518 
2519 	for (i = 0; i < softraid_cd.cd_ndevs; i++)
2520 		if (softraid_cd.cd_devs[i]) {
2521 			sc = softraid_cd.cd_devs[i];
2522 			/* we'll only have one softc */
2523 			break;
2524 		}
2525 
2526 	if (!sc) {
2527 		printf("no softraid softc found\n");
2528 		return;
2529 	}
2530 
2531 	for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
2532 		/* XXX this will not work when we stagger disciplines */
2533 		if (!sc->sc_dis[i])
2534 			continue;
2535 
2536 		sd = sc->sc_dis[i];
2537 		printf("%s: ios pending: %d  collisions %llu\n",
2538 		    sd->sd_vol.sv_meta.svm_devname,
2539 		    sd->sd_wu_pending,
2540 		    sd->sd_wu_collisions);
2541 	}
2542 }
2543 #endif /* SR_FANCY_STATS */
2544 
2545 #ifdef SR_DEBUG
2546 void
2547 sr_print_metadata(struct sr_metadata *sm)
2548 {
2549 	struct sr_vol_meta	*im_sv;
2550 	struct sr_chunk_meta	*im_sc;
2551 	struct sr_opt_meta	*im_so;
2552 	int			ch;
2553 
2554 	if (!(sr_debug & SR_D_META))
2555 		return;
2556 
2557 	im_sv = (struct sr_vol_meta *)(sm + 1);
2558 	im_sc = (struct sr_chunk_meta *)(im_sv + 1);
2559 	im_so = (struct sr_opt_meta *)(im_sc + im_sv->svm_no_chunk);
2560 
2561 	DNPRINTF(SR_D_META, "\tmeta magic 0x%llx\n", sm->ssd_magic);
2562 	DNPRINTF(SR_D_META, "\tmeta version %d\n", sm->ssd_version);
2563 	DNPRINTF(SR_D_META, "\tmeta checksum 0x%x\n", sm->ssd_checksum);
2564 	DNPRINTF(SR_D_META, "\tmeta size %d\n", sm->ssd_size);
2565 	DNPRINTF(SR_D_META, "\tmeta on disk version %u\n", sm->ssd_ondisk);
2566 	DNPRINTF(SR_D_META, "\tmeta uuid ");
2567 	sr_print_uuid(&sm->ssd_uuid, 1);
2568 	DNPRINTF(SR_D_META, "\tvd version %d\n", sm->ssd_vd_ver);
2569 	DNPRINTF(SR_D_META, "\tvd size %lu\n", sm->ssd_vd_size);
2570 	DNPRINTF(SR_D_META, "\tvd id %u\n", sm->ssd_vd_volid);
2571 	DNPRINTF(SR_D_META, "\tvd checksum 0x%x\n", sm->ssd_vd_chk);
2572 	DNPRINTF(SR_D_META, "\tchunk version %d\n", sm->ssd_chunk_ver);
2573 	DNPRINTF(SR_D_META, "\tchunks %d\n", sm->ssd_chunk_no);
2574 	DNPRINTF(SR_D_META, "\tchunk size %u\n", sm->ssd_chunk_size);
2575 	DNPRINTF(SR_D_META, "\tchunk id %u\n", sm->ssd_chunk_id);
2576 	DNPRINTF(SR_D_META, "\tchunk checksum 0x%x\n", sm->ssd_chunk_chk);
2577 	if (sm->ssd_opt_no) {
2578 		DNPRINTF(SR_D_META, "\topt version %d\n", sm->ssd_opt_ver);
2579 		DNPRINTF(SR_D_META, "\topt items %d\n", sm->ssd_opt_no);
2580 		DNPRINTF(SR_D_META, "\topt size %d\n", sm->ssd_opt_size);
2581 		DNPRINTF(SR_D_META, "\topt chk 0x%x\n", sm->ssd_opt_chk);
2582 	}
2583 
2584 
2585 	DNPRINTF(SR_D_META, "\t\tvol id %d\n", im_sv->svm_volid);
2586 	DNPRINTF(SR_D_META, "\t\tvol status %d\n", im_sv->svm_status);
2587 	DNPRINTF(SR_D_META, "\t\tvol flags 0x%x\n", im_sv->svm_flags);
2588 	DNPRINTF(SR_D_META, "\t\tvol level %d\n", im_sv->svm_level);
2589 	DNPRINTF(SR_D_META, "\t\tvol size %lld\n", im_sv->svm_size);
2590 	DNPRINTF(SR_D_META, "\t\tvol name %s\n", im_sv->svm_devname);
2591 	DNPRINTF(SR_D_META, "\t\tvol vendor %s\n", im_sv->svm_vendor);
2592 	DNPRINTF(SR_D_META, "\t\tvol prod %s\n", im_sv->svm_product);
2593 	DNPRINTF(SR_D_META, "\t\tvol rev %s\n", im_sv->svm_revision);
2594 	DNPRINTF(SR_D_META, "\t\tvol no chunks %d\n", im_sv->svm_no_chunk);
2595 	DNPRINTF(SR_D_META, "\t\tvol uuid ");
2596 	sr_print_uuid(& im_sv->svm_uuid, 1);
2597 	DNPRINTF(SR_D_META, "\t\tvol stripsize %d\n", im_sv->svm_strip_size);
2598 
2599 	for (ch = 0; ch < im_sv->svm_no_chunk; ch++) {
2600 		DNPRINTF(SR_D_META, "\t\t\tchunk vol id %d\n",
2601 		    im_sc[ch].scm_volid);
2602 		DNPRINTF(SR_D_META, "\t\t\tchunk id %d\n",
2603 		    im_sc[ch].scm_chunk_id);
2604 		DNPRINTF(SR_D_META, "\t\t\tchunk status %d\n",
2605 		    im_sc[ch].scm_status);
2606 		DNPRINTF(SR_D_META, "\t\t\tchunk name %s\n",
2607 		    im_sc[ch].scm_devname);
2608 		DNPRINTF(SR_D_META, "\t\t\tchunk size %lld\n",
2609 		    im_sc[ch].scm_size);
2610 		DNPRINTF(SR_D_META, "\t\t\tchunk coerced size %lld\n",
2611 		    im_sc[ch].scm_coerced_size);
2612 		DNPRINTF(SR_D_META, "\t\t\tchunk uuid ");
2613 		sr_print_uuid(&im_sc[ch].scm_uuid, 1);
2614 	}
2615 }
2616 
2617 void
2618 sr_dump_mem(u_int8_t *p, int len)
2619 {
2620 	int			i;
2621 
2622 	for (i = 0; i < len; i++)
2623 		printf("%02x ", *p++);
2624 	printf("\n");
2625 }
2626 
2627 #endif /* SR_DEBUG */
2628