xref: /dflybsd-src/sys/dev/disk/ccd/ccd.c (revision bfc09ba0a4d805c1860f88e64d6ae9a407d3567d)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $
35  */
36 /*
37  * Copyright (c) 1995 Jason R. Thorpe.
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed for the NetBSD Project
51  *	by Jason R. Thorpe.
52  * 4. The name of the author may not be used to endorse or promote products
53  *    derived from this software without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
56  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
57  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
58  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
59  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
60  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
61  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
62  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
63  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 
68 /*
69  * Copyright (c) 1988 University of Utah.
70  * Copyright (c) 1990, 1993
71  *	The Regents of the University of California.  All rights reserved.
72  *
73  * This code is derived from software contributed to Berkeley by
74  * the Systems Programming Group of the University of Utah Computer
75  * Science Department.
76  *
77  * Redistribution and use in source and binary forms, with or without
78  * modification, are permitted provided that the following conditions
79  * are met:
80  * 1. Redistributions of source code must retain the above copyright
81  *    notice, this list of conditions and the following disclaimer.
82  * 2. Redistributions in binary form must reproduce the above copyright
83  *    notice, this list of conditions and the following disclaimer in the
84  *    documentation and/or other materials provided with the distribution.
85  * 3. All advertising materials mentioning features or use of this software
86  *    must display the following acknowledgement:
87  *	This product includes software developed by the University of
88  *	California, Berkeley and its contributors.
89  * 4. Neither the name of the University nor the names of its contributors
90  *    may be used to endorse or promote products derived from this software
91  *    without specific prior written permission.
92  *
93  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
94  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
95  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
96  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
97  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
98  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
99  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
101  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
102  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
103  * SUCH DAMAGE.
104  *
105  * from: Utah $Hdr: cd.c 1.6 90/11/28$
106  */
107 /*
108  * @(#)cd.c	8.2 (Berkeley) 11/16/93
109  * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
110  * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
111  * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $
112  */
113 
114 /*
115  * "Concatenated" disk driver.
116  *
117  * Original dynamic configuration support by:
118  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
119  *	Numerical Aerodynamic Simulation Facility
120  *	Mail Stop 258-6
121  *	NASA Ames Research Center
122  *	Moffett Field, CA 94035
123  */
124 
125 #include "use_ccd.h"
126 
127 #include <sys/param.h>
128 #include <sys/systm.h>
129 #include <sys/kernel.h>
130 #include <sys/module.h>
131 #include <sys/proc.h>
132 #include <sys/buf.h>
133 #include <sys/malloc.h>
134 #include <sys/nlookup.h>
135 #include <sys/conf.h>
136 #include <sys/stat.h>
137 #include <sys/sysctl.h>
138 #include <sys/disk.h>
139 #include <sys/dtype.h>
140 #include <sys/diskslice.h>
141 #include <sys/devicestat.h>
142 #include <sys/fcntl.h>
143 #include <sys/vnode.h>
144 #include <sys/buf2.h>
145 #include <sys/ccdvar.h>
146 
147 #include <vm/vm_zone.h>
148 
149 #include <vfs/ufs/dinode.h> 	/* XXX Used only for fs.h */
150 #include <vfs/ufs/fs.h> 	/* XXX used only to get BBSIZE and SBSIZE */
151 
152 #include <sys/thread2.h>
153 
154 #if defined(CCDDEBUG) && !defined(DEBUG)
155 #define DEBUG
156 #endif
157 
158 #ifdef DEBUG
159 #define CCDB_FOLLOW	0x01
160 #define CCDB_INIT	0x02
161 #define CCDB_IO		0x04
162 #define CCDB_LABEL	0x08
163 #define CCDB_VNODE	0x10
164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
165     CCDB_VNODE;
166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
167 #undef DEBUG
168 #endif
169 
170 #define	ccdunit(x)	dkunit(x)
171 #define ccdpart(x)	dkpart(x)
172 
173 /*
174    This is how mirroring works (only writes are special):
175 
176    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
177    linked together by the cb_mirror field.  "cb_pflags &
178    CCDPF_MIRROR_DONE" is set to 0 on both of them.
179 
180    When a component returns to ccdiodone(), it checks if "cb_pflags &
181    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
182    flag and returns.  If it is, it means its partner has already
183    returned, so it will go to the regular cleanup.
184 
185  */
186 
187 struct ccdbuf {
188 	struct buf	cb_buf;		/* new I/O buf */
189 	struct vnode	*cb_vp;		/* related vnode */
190 	struct bio	*cb_obio;	/* ptr. to original I/O buf */
191 	struct ccdbuf	*cb_freenext;	/* free list link */
192 	int		cb_unit;	/* target unit */
193 	int		cb_comp;	/* target component */
194 	int		cb_pflags;	/* mirror/parity status flag */
195 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
196 };
197 
198 /* bits in cb_pflags */
199 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
200 
201 static d_open_t ccdopen;
202 static d_close_t ccdclose;
203 static d_strategy_t ccdstrategy;
204 static d_ioctl_t ccdioctl;
205 static d_dump_t ccddump;
206 
207 #define NCCDFREEHIWAT	16
208 
209 #define CDEV_MAJOR 74
210 
211 static struct dev_ops ccd_ops = {
212 	{ "ccd", CDEV_MAJOR, D_DISK },
213 	.d_open =	ccdopen,
214 	.d_close =	ccdclose,
215 	.d_read =	physread,
216 	.d_write =	physwrite,
217 	.d_ioctl =	ccdioctl,
218 	.d_strategy =	ccdstrategy,
219 	.d_dump =	ccddump
220 };
221 
222 /* called during module initialization */
223 static	void ccdattach (void);
224 static	int ccddetach (void);
225 static	int ccd_modevent (module_t, int, void *);
226 
227 /* called by biodone() at interrupt time */
228 static	void ccdiodone (struct bio *bio);
229 
230 static	void ccdstart (struct ccd_softc *, struct bio *);
231 static	void ccdinterleave (struct ccd_softc *, int);
232 static	void ccdintr (struct ccd_softc *, struct bio *);
233 static	int ccdinit (struct ccddevice *, char **, struct ucred *);
234 static	int ccdlookup (char *, struct vnode **);
235 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
236 		struct bio *, off_t, caddr_t, long);
237 static	int ccdlock (struct ccd_softc *);
238 static	void ccdunlock (struct ccd_softc *);
239 
240 #ifdef DEBUG
241 static	void printiinfo (struct ccdiinfo *);
242 #endif
243 
244 /* Non-private for the benefit of libkvm. */
245 struct	ccd_softc *ccd_softc;
246 struct	ccddevice *ccddevs;
247 struct	ccdbuf *ccdfreebufs;
248 static	int numccdfreebufs;
249 static	int numccd = 0;
250 
251 /*
252  * getccdbuf() -	Allocate and zero a ccd buffer.
253  *
254  *	This routine is called at splbio().
255  */
256 
257 static __inline
258 struct ccdbuf *
259 getccdbuf(void)
260 {
261 	struct ccdbuf *cbp;
262 
263 	/*
264 	 * Allocate from freelist or malloc as necessary
265 	 */
266 	if ((cbp = ccdfreebufs) != NULL) {
267 		ccdfreebufs = cbp->cb_freenext;
268 		--numccdfreebufs;
269 		reinitbufbio(&cbp->cb_buf);
270 	} else {
271 		cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
272 		initbufbio(&cbp->cb_buf);
273 	}
274 
275 	/*
276 	 * independant struct buf initialization
277 	 */
278 	buf_dep_init(&cbp->cb_buf);
279 	BUF_LOCKINIT(&cbp->cb_buf);
280 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
281 	BUF_KERNPROC(&cbp->cb_buf);
282 	cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
283 
284 	return(cbp);
285 }
286 
287 /*
288  * putccdbuf() -	Free a ccd buffer.
289  *
290  *	This routine is called at splbio().
291  */
292 
293 static __inline
294 void
295 putccdbuf(struct ccdbuf *cbp)
296 {
297 	BUF_UNLOCK(&cbp->cb_buf);
298 	BUF_LOCKFREE(&cbp->cb_buf);
299 
300 	if (numccdfreebufs < NCCDFREEHIWAT) {
301 		cbp->cb_freenext = ccdfreebufs;
302 		ccdfreebufs = cbp;
303 		++numccdfreebufs;
304 	} else {
305 		kfree((caddr_t)cbp, M_DEVBUF);
306 	}
307 }
308 
309 /*
310  * Called by main() during pseudo-device attachment.  All we need
311  * to do is allocate enough space for devices to be configured later, and
312  * add devsw entries.
313  */
314 static void
315 ccdattach(void)
316 {
317 	struct disk_info info;
318 	struct ccd_softc *cs;
319 	int i;
320 	int num = NCCD;
321 
322 	if (num > 1)
323 		kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
324 	else
325 		kprintf("ccd0: Concatenated disk driver\n");
326 
327 	ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
328 			    M_WAITOK | M_ZERO);
329 	ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
330 			    M_WAITOK | M_ZERO);
331 	numccd = num;
332 
333 	/*
334 	 * With normal disk devices the open simply fails if the media
335 	 * is not present.  With CCD we have to be able to open the
336 	 * raw disk to use the ioctl's to set it up, so create a dummy
337 	 * disk info structure so dscheck() doesn't blow up.
338 	 */
339 	bzero(&info, sizeof(info));
340 	info.d_media_blksize = DEV_BSIZE;
341 
342 	for (i = 0; i < numccd; ++i) {
343 		cs = &ccd_softc[i];
344 		cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
345 		cs->sc_dev->si_drv1 = cs;
346 		cs->sc_dev->si_iosize_max = 256 * 512;	/* XXX */
347 		disk_setdiskinfo(&cs->sc_disk, &info);
348 	}
349 }
350 
351 static int
352 ccddetach(void)
353 {
354 	struct ccd_softc *cs;
355 	struct dev_ioctl_args ioctl_args;
356 	int i;
357 	int error = 0;
358 	int eval;
359 
360 	bzero(&ioctl_args, sizeof(ioctl_args));
361 
362 	for (i = 0; i < numccd; ++i) {
363 		cs = &ccd_softc[i];
364 		if (cs->sc_dev == NULL)
365 			continue;
366 		ioctl_args.a_head.a_dev = cs->sc_dev;
367 		ioctl_args.a_cmd = CCDIOCCLR;
368 		ioctl_args.a_fflag = FWRITE;
369 		eval = ccdioctl(&ioctl_args);
370 		if (eval && eval != ENXIO) {
371 			kprintf("ccd%d: In use, cannot detach\n", i);
372 			error = EBUSY;
373 		}
374 	}
375 	if (error == 0) {
376 		for (i = 0; i < numccd; ++i) {
377 			cs = &ccd_softc[i];
378 			if (cs->sc_dev == NULL)
379 				continue;
380 			disk_destroy(&cs->sc_disk);
381 			cs->sc_dev = NULL;
382 		}
383 		if (ccd_softc)
384 			kfree(ccd_softc, M_DEVBUF);
385 		if (ccddevs)
386 			kfree(ccddevs, M_DEVBUF);
387 	}
388 	return (error);
389 }
390 
391 static int
392 ccd_modevent(module_t mod, int type, void *data)
393 {
394 	int error = 0;
395 
396 	switch (type) {
397 	case MOD_LOAD:
398 		ccdattach();
399 		break;
400 
401 	case MOD_UNLOAD:
402 		error = ccddetach();
403 		break;
404 
405 	default:	/* MOD_SHUTDOWN etc */
406 		break;
407 	}
408 	return (error);
409 }
410 
411 DEV_MODULE(ccd, ccd_modevent, NULL);
412 
413 static int
414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
415 {
416 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
417 	struct ccdcinfo *ci = NULL;	/* XXX */
418 	int ix;
419 	struct vnode *vp;
420 	u_int64_t skip;
421 	u_int64_t size;
422 	u_int64_t minsize;
423 	int maxsecsize;
424 	struct partinfo dpart;
425 	struct ccdgeom *ccg = &cs->sc_geom;
426 	char tmppath[MAXPATHLEN];
427 	int error = 0;
428 
429 #ifdef DEBUG
430 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 		kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
432 #endif
433 
434 	cs->sc_size = 0;
435 	cs->sc_ileave = ccd->ccd_interleave;
436 	cs->sc_nccdisks = ccd->ccd_ndev;
437 
438 	/* Allocate space for the component info. */
439 	cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
440 				M_DEVBUF, M_WAITOK);
441 	cs->sc_maxiosize = MAXPHYS;
442 
443 	/*
444 	 * Verify that each component piece exists and record
445 	 * relevant information about it.
446 	 */
447 	maxsecsize = 0;
448 	minsize = 0;
449 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
450 		vp = ccd->ccd_vpp[ix];
451 		ci = &cs->sc_cinfo[ix];
452 		ci->ci_vp = vp;
453 
454 		/*
455 		 * Copy in the pathname of the component.
456 		 */
457 		bzero(tmppath, sizeof(tmppath));	/* sanity */
458 		if ((error = copyinstr(cpaths[ix], tmppath,
459 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
460 #ifdef DEBUG
461 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
462 				kprintf("ccd%d: can't copy path, error = %d\n",
463 				    ccd->ccd_unit, error);
464 #endif
465 			goto fail;
466 		}
467 		ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
468 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
469 
470 		ci->ci_dev = vn_todev(vp);
471 		if (ci->ci_dev->si_iosize_max &&
472 		    cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
473 			cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
474 		}
475 
476 		/*
477 		 * Get partition information for the component.
478 		 */
479 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, cred);
480 		if (error) {
481 #ifdef DEBUG
482 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
483 				 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
484 				     ccd->ccd_unit, ci->ci_path, error);
485 #endif
486 			goto fail;
487 		}
488 		if (dpart.fstype != FS_CCD &&
489 		    !kuuid_is_ccd(&dpart.fstype_uuid)) {
490 			kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
491 				ccd->ccd_unit, ci->ci_path);
492 			error = EFTYPE;
493 			goto fail;
494 		}
495 		if (maxsecsize < dpart.media_blksize)
496 			maxsecsize = dpart.media_blksize;
497 
498 		/*
499 		 * Skip a certain amount of storage at the beginning of
500 		 * the component to make sure we don't infringe on any
501 		 * reserved sectors.  This is handled entirely by
502 		 * dpart.reserved_blocks but we also impose a minimum
503 		 * of 16 sectors for backwards compatibility.
504 		 */
505 		skip = 16;
506 		if (skip < dpart.reserved_blocks)
507 			skip = dpart.reserved_blocks;
508 		size = dpart.media_blocks - skip;
509 
510 		/*
511 		 * Calculate the size, truncating to an interleave
512 		 * boundary if necessary.
513 		 */
514 		if (cs->sc_ileave > 1)
515 			size -= size % cs->sc_ileave;
516 
517 		if ((int64_t)size <= 0) {
518 #ifdef DEBUG
519 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
520 				kprintf("ccd%d: %s: size == 0\n",
521 				    ccd->ccd_unit, ci->ci_path);
522 #endif
523 			error = ENODEV;
524 			goto fail;
525 		}
526 
527 		/*
528 		 * Calculate the smallest uniform component, used
529 		 * elsewhere.
530 		 */
531 		if (minsize == 0 || minsize > size)
532 			minsize = size;
533 		ci->ci_skip = skip;
534 		ci->ci_size = size;
535 		cs->sc_size += size;
536 	}
537 	kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
538 		cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
539 
540 	/*
541 	 * Don't allow the interleave to be smaller than
542 	 * the biggest component sector.
543 	 */
544 	if ((cs->sc_ileave > 0) &&
545 	    (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
546 #ifdef DEBUG
547 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
548 			kprintf("ccd%d: interleave must be at least %d\n",
549 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
550 #endif
551 		error = EINVAL;
552 		goto fail;
553 	}
554 
555 	/*
556 	 * If uniform interleave is desired set all sizes to that of
557 	 * the smallest component.  This will guarentee that a single
558 	 * interleave table is generated.
559 	 *
560 	 * Lost space must be taken into account when calculating the
561 	 * overall size.  Half the space is lost when CCDF_MIRROR is
562 	 * specified.  One disk is lost when CCDF_PARITY is specified.
563 	 */
564 	if (ccd->ccd_flags & CCDF_UNIFORM) {
565 		for (ci = cs->sc_cinfo;
566 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
567 			ci->ci_size = minsize;
568 		}
569 		if (ccd->ccd_flags & CCDF_MIRROR) {
570 			/*
571 			 * Check to see if an even number of components
572 			 * have been specified.  The interleave must also
573 			 * be non-zero in order for us to be able to
574 			 * guarentee the topology.
575 			 */
576 			if (cs->sc_nccdisks % 2) {
577 				kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
578 				error = EINVAL;
579 				goto fail;
580 			}
581 			if (cs->sc_ileave == 0) {
582 				kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
583 				error = EINVAL;
584 				goto fail;
585 			}
586 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
587 		} else if (ccd->ccd_flags & CCDF_PARITY) {
588 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
589 		} else {
590 			if (cs->sc_ileave == 0) {
591 				kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
592 				error = EINVAL;
593 				goto fail;
594 			}
595 			cs->sc_size = cs->sc_nccdisks * minsize;
596 		}
597 	}
598 
599 	/*
600 	 * Construct the interleave table.
601 	 */
602 	ccdinterleave(cs, ccd->ccd_unit);
603 
604 	/*
605 	 * Create pseudo-geometry based on 1MB cylinders.  It's
606 	 * pretty close.
607 	 */
608 	ccg->ccg_secsize = maxsecsize;
609 	ccg->ccg_ntracks = 1;
610 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
611 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
612 
613 	/*
614 	 * Add an devstat entry for this device.
615 	 */
616 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
617 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
618 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
619 			  DEVSTAT_PRIORITY_ARRAY);
620 
621 	cs->sc_flags |= CCDF_INITED;
622 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
623 	cs->sc_unit = ccd->ccd_unit;
624 	return (0);
625 fail:
626 	while (ci > cs->sc_cinfo) {
627 		ci--;
628 		kfree(ci->ci_path, M_DEVBUF);
629 	}
630 	kfree(cs->sc_cinfo, M_DEVBUF);
631 	cs->sc_cinfo = NULL;
632 	return (error);
633 }
634 
635 static void
636 ccdinterleave(struct ccd_softc *cs, int unit)
637 {
638 	struct ccdcinfo *ci, *smallci;
639 	struct ccdiinfo *ii;
640 	u_int64_t bn;
641 	u_int64_t lbn;
642 	u_int64_t size;
643 	int icount;
644 	int ix;
645 
646 #ifdef DEBUG
647 	if (ccddebug & CCDB_INIT)
648 		kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
649 #endif
650 
651 	/*
652 	 * Allocate an interleave table.  The worst case occurs when each
653 	 * of N disks is of a different size, resulting in N interleave
654 	 * tables.
655 	 *
656 	 * Chances are this is too big, but we don't care.
657 	 */
658 	icount = cs->sc_nccdisks + 1;
659 	cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
660 				M_DEVBUF, M_WAITOK|M_ZERO);
661 
662 	/*
663 	 * Trivial case: no interleave (actually interleave of disk size).
664 	 * Each table entry represents a single component in its entirety.
665 	 *
666 	 * An interleave of 0 may not be used with a mirror or parity setup.
667 	 */
668 	if (cs->sc_ileave == 0) {
669 		bn = 0;
670 		ii = cs->sc_itable;
671 
672 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
673 			/* Allocate space for ii_index. */
674 			ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
675 			ii->ii_ndisk = 1;
676 			ii->ii_startblk = bn;
677 			ii->ii_startoff = 0;
678 			ii->ii_index[0] = ix;
679 			bn += cs->sc_cinfo[ix].ci_size;
680 			ii++;
681 		}
682 		ii->ii_ndisk = 0;
683 #ifdef DEBUG
684 		if (ccddebug & CCDB_INIT)
685 			printiinfo(cs->sc_itable);
686 #endif
687 		return;
688 	}
689 
690 	/*
691 	 * The following isn't fast or pretty; it doesn't have to be.
692 	 */
693 	size = 0;
694 	bn = lbn = 0;
695 	for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
696 		/*
697 		 * Allocate space for ii_index.  We might allocate more then
698 		 * we use.
699 		 */
700 		ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
701 					M_DEVBUF, M_WAITOK);
702 
703 		/*
704 		 * Locate the smallest of the remaining components
705 		 */
706 		smallci = NULL;
707 		ci = cs->sc_cinfo;
708 		while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
709 			if (ci->ci_size > size &&
710 			    (smallci == NULL ||
711 			     ci->ci_size < smallci->ci_size)) {
712 				smallci = ci;
713 			}
714 			++ci;
715 		}
716 
717 		/*
718 		 * Nobody left, all done
719 		 */
720 		if (smallci == NULL) {
721 			ii->ii_ndisk = 0;
722 			break;
723 		}
724 
725 		/*
726 		 * Record starting logical block using an sc_ileave blocksize.
727 		 */
728 		ii->ii_startblk = bn / cs->sc_ileave;
729 
730 		/*
731 		 * Record starting component block using an sc_ileave
732 		 * blocksize.  This value is relative to the beginning of
733 		 * a component disk.
734 		 */
735 		ii->ii_startoff = lbn;
736 
737 		/*
738 		 * Determine how many disks take part in this interleave
739 		 * and record their indices.
740 		 */
741 		ix = 0;
742 		for (ci = cs->sc_cinfo;
743 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
744 			if (ci->ci_size >= smallci->ci_size) {
745 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
746 			}
747 		}
748 		ii->ii_ndisk = ix;
749 
750 		/*
751 		 * Adjust for loop
752 		 */
753 		bn += ix * (smallci->ci_size - size);
754 		lbn = smallci->ci_size / cs->sc_ileave;
755 		size = smallci->ci_size;
756 	}
757 	if (ii == &cs->sc_itable[icount])
758 		panic("ccdinterlave software bug!  table exhausted");
759 #ifdef DEBUG
760 	if (ccddebug & CCDB_INIT)
761 		printiinfo(cs->sc_itable);
762 #endif
763 }
764 
765 /* ARGSUSED */
766 static int
767 ccdopen(struct dev_open_args *ap)
768 {
769 	cdev_t dev = ap->a_head.a_dev;
770 	int unit = ccdunit(dev);
771 	struct ccd_softc *cs;
772 	int error = 0;
773 
774 #ifdef DEBUG
775 	if (ccddebug & CCDB_FOLLOW)
776 		kprintf("ccdopen(%x, %x)\n", dev, flags);
777 #endif
778 	if (unit >= numccd)
779 		return (ENXIO);
780 	cs = &ccd_softc[unit];
781 
782 	if ((error = ccdlock(cs)) == 0) {
783 		ccdunlock(cs);
784 	}
785 	return (error);
786 }
787 
788 /* ARGSUSED */
789 static int
790 ccdclose(struct dev_close_args *ap)
791 {
792 	cdev_t dev = ap->a_head.a_dev;
793 	int unit = ccdunit(dev);
794 	struct ccd_softc *cs;
795 	int error = 0;
796 
797 #ifdef DEBUG
798 	if (ccddebug & CCDB_FOLLOW)
799 		kprintf("ccdclose(%x, %x)\n", dev, flags);
800 #endif
801 
802 	if (unit >= numccd)
803 		return (ENXIO);
804 	cs = &ccd_softc[unit];
805 	if ((error = ccdlock(cs)) == 0) {
806 		ccdunlock(cs);
807 	}
808 	return (error);
809 }
810 
811 static int
812 ccdstrategy(struct dev_strategy_args *ap)
813 {
814 	cdev_t dev = ap->a_head.a_dev;
815 	struct bio *bio = ap->a_bio;
816 	int unit = ccdunit(dev);
817 	struct bio *nbio;
818 	struct buf *bp = bio->bio_buf;
819 	struct ccd_softc *cs = &ccd_softc[unit];
820 	u_int64_t pbn;	/* in sc_secsize chunks */
821 	u_int32_t sz;	/* in sc_secsize chunks */
822 
823 #ifdef DEBUG
824 	if (ccddebug & CCDB_FOLLOW)
825 		kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
826 #endif
827 	if ((cs->sc_flags & CCDF_INITED) == 0) {
828 		bp->b_error = ENXIO;
829 		goto error;
830 	}
831 
832 	/* If it's a nil transfer, wake up the top half now. */
833 	if (bp->b_bcount == 0) {
834 		bp->b_resid = 0;
835 		goto done;
836 	}
837 
838 	/*
839 	 * Do bounds checking and adjust transfer.  If there's an
840 	 * error, the bounds check will flag that for us.
841 	 */
842 
843 	pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
844 	sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
845 
846 	/*
847 	 * If out of bounds return an error.  If the request goes
848 	 * past EOF, clip the request as appropriate.  If exactly
849 	 * at EOF, return success (don't clip), but with 0 bytes
850 	 * of I/O.
851 	 *
852 	 * Mark EOF B_INVAL (just like bad), indicating that the
853 	 * contents of the buffer, if any, is invalid.
854 	 */
855 	if ((int64_t)pbn < 0)
856 		goto bad;
857 	if (pbn + sz > cs->sc_size) {
858 		if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
859 			goto bad;
860 		if (pbn == cs->sc_size) {
861 			bp->b_resid = bp->b_bcount;
862 			bp->b_flags |= B_INVAL;
863 			goto done;
864 		}
865 		sz = (long)(cs->sc_size - pbn);
866 		bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
867 	}
868 	nbio = bio;
869 
870 	bp->b_resid = bp->b_bcount;
871 	nbio->bio_driver_info = dev;
872 
873 	/*
874 	 * "Start" the unit.
875 	 */
876 	crit_enter();
877 	ccdstart(cs, nbio);
878 	crit_exit();
879 	return(0);
880 
881 	/*
882 	 * note: bio, not nbio, is valid at the done label.
883 	 */
884 bad:
885 	bp->b_error = EINVAL;
886 error:
887 	bp->b_resid = bp->b_bcount;
888 	bp->b_flags |= B_ERROR | B_INVAL;
889 done:
890 	biodone(bio);
891 	return(0);
892 }
893 
894 static void
895 ccdstart(struct ccd_softc *cs, struct bio *bio)
896 {
897 	long bcount, rcount;
898 	struct ccdbuf *cbp[4];
899 	struct buf *bp = bio->bio_buf;
900 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
901 	caddr_t addr;
902 	off_t doffset;
903 
904 #ifdef DEBUG
905 	if (ccddebug & CCDB_FOLLOW)
906 		kprintf("ccdstart(%x, %x)\n", cs, bp);
907 #endif
908 
909 	/* Record the transaction start  */
910 	devstat_start_transaction(&cs->device_stats);
911 
912 	/*
913 	 * Allocate component buffers and fire off the requests
914 	 */
915 	doffset = bio->bio_offset;
916 	addr = bp->b_data;
917 
918 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
919 		ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
920 		rcount = cbp[0]->cb_buf.b_bcount;
921 
922 		if (cs->sc_cflags & CCDF_MIRROR) {
923 			/*
924 			 * Mirroring.  Writes go to both disks, reads are
925 			 * taken from whichever disk seems most appropriate.
926 			 *
927 			 * We attempt to localize reads to the disk whos arm
928 			 * is nearest the read request.  We ignore seeks due
929 			 * to writes when making this determination and we
930 			 * also try to avoid hogging.
931 			 */
932 			if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
933 				vn_strategy(cbp[0]->cb_vp,
934 					    &cbp[0]->cb_buf.b_bio1);
935 				vn_strategy(cbp[1]->cb_vp,
936 					    &cbp[1]->cb_buf.b_bio1);
937 			} else {
938 				int pick = cs->sc_pick;
939 				daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
940 				if (doffset < cs->sc_blk[pick] - range ||
941 				    doffset > cs->sc_blk[pick] + range
942 				) {
943 					cs->sc_pick = pick = 1 - pick;
944 				}
945 				cs->sc_blk[pick] = doffset + rcount;
946 				vn_strategy(cbp[pick]->cb_vp,
947 					    &cbp[pick]->cb_buf.b_bio1);
948 			}
949 		} else {
950 			/*
951 			 * Not mirroring
952 			 */
953 			vn_strategy(cbp[0]->cb_vp,
954 				     &cbp[0]->cb_buf.b_bio1);
955 		}
956 		doffset += rcount;
957 		addr += rcount;
958 	}
959 }
960 
961 /*
962  * Build a component buffer header.
963  */
964 static void
965 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
966 	  off_t doffset, caddr_t addr, long bcount)
967 {
968 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
969 	struct ccdbuf *cbp;
970 	u_int64_t bn;
971 	u_int64_t cbn;
972 	u_int64_t cboff;
973 	off_t cbc;
974 
975 #ifdef DEBUG
976 	if (ccddebug & CCDB_IO)
977 		kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
978 		       cs, bp, bn, addr, bcount);
979 #endif
980 	/*
981 	 * Determine which component bn falls in.
982 	 */
983 	bn = doffset / cs->sc_geom.ccg_secsize;
984 	cbn = bn;
985 	cboff = 0;
986 
987 	if (cs->sc_ileave == 0) {
988 		/*
989 		 * Serially concatenated and neither a mirror nor a parity
990 		 * config.  This is a special case.
991 		 */
992 		daddr_t sblk;
993 
994 		sblk = 0;
995 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
996 			sblk += ci->ci_size;
997 		cbn -= sblk;
998 	} else {
999 		struct ccdiinfo *ii;
1000 		int ccdisk, off;
1001 
1002 		/*
1003 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
1004 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
1005 		 * to cbn.
1006 		 */
1007 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
1008 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
1009 
1010 		/*
1011 		 * Figure out which interleave table to use.
1012 		 */
1013 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1014 			if (ii->ii_startblk > cbn)
1015 				break;
1016 		}
1017 		ii--;
1018 
1019 		/*
1020 		 * off is the logical superblock relative to the beginning
1021 		 * of this interleave block.
1022 		 */
1023 		off = cbn - ii->ii_startblk;
1024 
1025 		/*
1026 		 * We must calculate which disk component to use (ccdisk),
1027 		 * and recalculate cbn to be the superblock relative to
1028 		 * the beginning of the component.  This is typically done by
1029 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
1030 		 * must typically be divided by the number of components in
1031 		 * this interleave array to be properly convert it from a
1032 		 * CCD-relative logical superblock number to a
1033 		 * component-relative superblock number.
1034 		 */
1035 		if (ii->ii_ndisk == 1) {
1036 			/*
1037 			 * When we have just one disk, it can't be a mirror
1038 			 * or a parity config.
1039 			 */
1040 			ccdisk = ii->ii_index[0];
1041 			cbn = ii->ii_startoff + off;
1042 		} else {
1043 			if (cs->sc_cflags & CCDF_MIRROR) {
1044 				/*
1045 				 * We have forced a uniform mapping, resulting
1046 				 * in a single interleave array.  We double
1047 				 * up on the first half of the available
1048 				 * components and our mirror is in the second
1049 				 * half.  This only works with a single
1050 				 * interleave array because doubling up
1051 				 * doubles the number of sectors, so there
1052 				 * cannot be another interleave array because
1053 				 * the next interleave array's calculations
1054 				 * would be off.
1055 				 */
1056 				int ndisk2 = ii->ii_ndisk / 2;
1057 				ccdisk = ii->ii_index[off % ndisk2];
1058 				cbn = ii->ii_startoff + off / ndisk2;
1059 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1060 			} else if (cs->sc_cflags & CCDF_PARITY) {
1061 				/*
1062 				 * XXX not implemented yet
1063 				 */
1064 				int ndisk2 = ii->ii_ndisk - 1;
1065 				ccdisk = ii->ii_index[off % ndisk2];
1066 				cbn = ii->ii_startoff + off / ndisk2;
1067 				if (cbn % ii->ii_ndisk <= ccdisk)
1068 					ccdisk++;
1069 			} else {
1070 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1071 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1072 			}
1073 		}
1074 
1075 		ci = &cs->sc_cinfo[ccdisk];
1076 
1077 		/*
1078 		 * Convert cbn from a superblock to a normal block so it
1079 		 * can be used to calculate (along with cboff) the normal
1080 		 * block index into this particular disk.
1081 		 */
1082 		cbn *= cs->sc_ileave;
1083 	}
1084 
1085 	/*
1086 	 * Fill in the component buf structure.
1087 	 *
1088 	 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1089 	 * will be truncated on device EOF so we use b_bufsize to detect
1090 	 * the case.
1091 	 */
1092 	cbp = getccdbuf();
1093 	cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1094 	cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1095 	cbp->cb_buf.b_data = addr;
1096 	cbp->cb_vp = ci->ci_vp;
1097 	if (cs->sc_ileave == 0)
1098 		cbc = dbtob((off_t)(ci->ci_size - cbn));
1099 	else
1100 		cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1101 	if (cbc > cs->sc_maxiosize)
1102 		cbc = cs->sc_maxiosize;
1103 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1104  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1105 
1106 	cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1107 	cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1108 	cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1109 
1110 	/*
1111 	 * context for ccdiodone
1112 	 */
1113 	cbp->cb_obio = bio;
1114 	cbp->cb_unit = cs - ccd_softc;
1115 	cbp->cb_comp = ci - cs->sc_cinfo;
1116 
1117 #ifdef DEBUG
1118 	if (ccddebug & CCDB_IO)
1119 		kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1120 		       ci->ci_dev, ci-cs->sc_cinfo, cbp,
1121 		       cbp->cb_buf.b_bio1.bio_offset,
1122 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1123 #endif
1124 	cb[0] = cbp;
1125 
1126 	/*
1127 	 * Note: both I/O's setup when reading from mirror, but only one
1128 	 * will be executed.
1129 	 */
1130 	if (cs->sc_cflags & CCDF_MIRROR) {
1131 		/* mirror, setup second I/O */
1132 		cbp = getccdbuf();
1133 
1134 		cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1135 		cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1136 		cbp->cb_buf.b_data = addr;
1137 		cbp->cb_vp = ci2->ci_vp;
1138 		if (cs->sc_ileave == 0)
1139 		      cbc = dbtob((off_t)(ci->ci_size - cbn));
1140 		else
1141 		      cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1142 		if (cbc > cs->sc_maxiosize)
1143 			cbc = cs->sc_maxiosize;
1144 		cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1145 		cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1146 
1147 		cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1148 		cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1149 		cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1150 
1151 		/*
1152 		 * context for ccdiodone
1153 		 */
1154 		cbp->cb_obio = bio;
1155 		cbp->cb_unit = cs - ccd_softc;
1156 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1157 		cb[1] = cbp;
1158 		/* link together the ccdbuf's and clear "mirror done" flag */
1159 		cb[0]->cb_mirror = cb[1];
1160 		cb[1]->cb_mirror = cb[0];
1161 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1162 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1163 	}
1164 }
1165 
1166 static void
1167 ccdintr(struct ccd_softc *cs, struct bio *bio)
1168 {
1169 	struct buf *bp = bio->bio_buf;
1170 
1171 #ifdef DEBUG
1172 	if (ccddebug & CCDB_FOLLOW)
1173 		kprintf("ccdintr(%x, %x)\n", cs, bp);
1174 #endif
1175 	/*
1176 	 * Request is done for better or worse, wakeup the top half.
1177 	 */
1178 	if (bp->b_flags & B_ERROR)
1179 		bp->b_resid = bp->b_bcount;
1180 	devstat_end_transaction_buf(&cs->device_stats, bp);
1181 	biodone(bio);
1182 }
1183 
1184 /*
1185  * Called at interrupt time.
1186  * Mark the component as done and if all components are done,
1187  * take a ccd interrupt.
1188  */
1189 static void
1190 ccdiodone(struct bio *bio)
1191 {
1192 	struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1193 	struct bio *obio = cbp->cb_obio;
1194 	struct buf *obp = obio->bio_buf;
1195 	int unit = cbp->cb_unit;
1196 	int count;
1197 
1198 	/*
1199 	 * Since we do not have exclusive access to underlying devices,
1200 	 * we can't keep cache translations around.
1201 	 */
1202 	clearbiocache(bio->bio_next);
1203 
1204 	crit_enter();
1205 #ifdef DEBUG
1206 	if (ccddebug & CCDB_FOLLOW)
1207 		kprintf("ccdiodone(%x)\n", cbp);
1208 	if (ccddebug & CCDB_IO) {
1209 		kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1210 		       obp, obp->b_bcount, obp->b_resid);
1211 		kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1212 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1213 		       cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1214 		       cbp->cb_buf.b_bcount);
1215 	}
1216 #endif
1217 
1218 	/*
1219 	 * If an error occured, report it.  If this is a mirrored
1220 	 * configuration and the first of two possible reads, do not
1221 	 * set the error in the bp yet because the second read may
1222 	 * succeed.
1223 	 */
1224 	if (cbp->cb_buf.b_flags & B_ERROR) {
1225 		const char *msg = "";
1226 
1227 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1228 		    (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1229 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1230 			/*
1231 			 * We will try our read on the other disk down
1232 			 * below, also reverse the default pick so if we
1233 			 * are doing a scan we do not keep hitting the
1234 			 * bad disk first.
1235 			 */
1236 			struct ccd_softc *cs = &ccd_softc[unit];
1237 
1238 			msg = ", trying other disk";
1239 			cs->sc_pick = 1 - cs->sc_pick;
1240 			cs->sc_blk[cs->sc_pick] = obio->bio_offset;
1241 		} else {
1242 			obp->b_flags |= B_ERROR;
1243 			obp->b_error = cbp->cb_buf.b_error ?
1244 			    cbp->cb_buf.b_error : EIO;
1245 		}
1246 		kprintf("ccd%d: error %d on component %d "
1247 			"offset %jd (ccd offset %jd)%s\n",
1248 		        unit, obp->b_error, cbp->cb_comp,
1249 		        (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1250 		        (intmax_t)obio->bio_offset,
1251 		        msg);
1252 	}
1253 
1254 	/*
1255 	 * Process mirror.  If we are writing, I/O has been initiated on both
1256 	 * buffers and we fall through only after both are finished.
1257 	 *
1258 	 * If we are reading only one I/O is initiated at a time.  If an
1259 	 * error occurs we initiate the second I/O and return, otherwise
1260 	 * we free the second I/O without initiating it.
1261 	 */
1262 
1263 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1264 		if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1265 			/*
1266 			 * When writing, handshake with the second buffer
1267 			 * to determine when both are done.  If both are not
1268 			 * done, return here.
1269 			 */
1270 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1271 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1272 				putccdbuf(cbp);
1273 				crit_exit();
1274 				return;
1275 			}
1276 		} else {
1277 			/*
1278 			 * When reading, either dispose of the second buffer
1279 			 * or initiate I/O on the second buffer if an error
1280 			 * occured with this one.
1281 			 */
1282 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1283 				if (cbp->cb_buf.b_flags & B_ERROR) {
1284 					cbp->cb_mirror->cb_pflags |=
1285 					    CCDPF_MIRROR_DONE;
1286 					vn_strategy(
1287 					    cbp->cb_mirror->cb_vp,
1288 					    &cbp->cb_mirror->cb_buf.b_bio1
1289 					);
1290 					putccdbuf(cbp);
1291 					crit_exit();
1292 					return;
1293 				} else {
1294 					putccdbuf(cbp->cb_mirror);
1295 					/* fall through */
1296 				}
1297 			}
1298 		}
1299 	}
1300 
1301 	/*
1302 	 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1303 	 */
1304 	count = cbp->cb_buf.b_bufsize;
1305 	putccdbuf(cbp);
1306 
1307 	/*
1308 	 * If all done, "interrupt".
1309 	 */
1310 	obp->b_resid -= count;
1311 	if (obp->b_resid < 0)
1312 		panic("ccdiodone: count");
1313 	if (obp->b_resid == 0)
1314 		ccdintr(&ccd_softc[unit], obio);
1315 	crit_exit();
1316 }
1317 
1318 static int
1319 ccdioctl(struct dev_ioctl_args *ap)
1320 {
1321 	cdev_t dev = ap->a_head.a_dev;
1322 	int unit = ccdunit(dev);
1323 	int i, j, lookedup = 0, error = 0;
1324 	struct ccd_softc *cs;
1325 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1326 	struct ccddevice ccd;
1327 	struct disk_info info;
1328 	char **cpp;
1329 	struct vnode **vpp;
1330 
1331 	if (unit >= numccd)
1332 		return (ENXIO);
1333 	cs = &ccd_softc[unit];
1334 
1335 	bzero(&ccd, sizeof(ccd));
1336 
1337 	switch (ap->a_cmd) {
1338 	case CCDIOCSET:
1339 		if (cs->sc_flags & CCDF_INITED)
1340 			return (EBUSY);
1341 
1342 		if ((ap->a_fflag & FWRITE) == 0)
1343 			return (EBADF);
1344 
1345 		if ((error = ccdlock(cs)) != 0)
1346 			return (error);
1347 
1348 		if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1349 			ccdunlock(cs);
1350 			return (EINVAL);
1351 		}
1352 
1353 		/* Fill in some important bits. */
1354 		ccd.ccd_unit = unit;
1355 		ccd.ccd_interleave = ccio->ccio_ileave;
1356 		if (ccd.ccd_interleave == 0 &&
1357 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1358 		     (ccio->ccio_flags & CCDF_PARITY))) {
1359 			kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1360 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1361 		}
1362 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1363 		    (ccio->ccio_flags & CCDF_PARITY)) {
1364 			kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1365 			ccio->ccio_flags &= ~CCDF_PARITY;
1366 		}
1367 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1368 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1369 			kprintf("ccd%d: mirror/parity forces uniform flag\n",
1370 			       unit);
1371 			ccio->ccio_flags |= CCDF_UNIFORM;
1372 		}
1373 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1374 
1375 		/*
1376 		 * Allocate space for and copy in the array of
1377 		 * componet pathnames and device numbers.
1378 		 */
1379 		cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1380 		    M_DEVBUF, M_WAITOK);
1381 		vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1382 		    M_DEVBUF, M_WAITOK);
1383 
1384 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1385 				ccio->ccio_ndisks * sizeof(char **));
1386 		if (error) {
1387 			kfree(vpp, M_DEVBUF);
1388 			kfree(cpp, M_DEVBUF);
1389 			ccdunlock(cs);
1390 			return (error);
1391 		}
1392 
1393 #ifdef DEBUG
1394 		if (ccddebug & CCDB_INIT) {
1395 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1396 				kprintf("ccdioctl: component %d: 0x%x\n",
1397 				    i, cpp[i]);
1398 		}
1399 #endif
1400 
1401 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1402 #ifdef DEBUG
1403 			if (ccddebug & CCDB_INIT)
1404 				kprintf("ccdioctl: lookedup = %d\n", lookedup);
1405 #endif
1406 			if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1407 				for (j = 0; j < lookedup; ++j)
1408 					(void)vn_close(vpp[j], FREAD|FWRITE);
1409 				kfree(vpp, M_DEVBUF);
1410 				kfree(cpp, M_DEVBUF);
1411 				ccdunlock(cs);
1412 				return (error);
1413 			}
1414 			++lookedup;
1415 		}
1416 		ccd.ccd_cpp = cpp;
1417 		ccd.ccd_vpp = vpp;
1418 		ccd.ccd_ndev = ccio->ccio_ndisks;
1419 
1420 		/*
1421 		 * Initialize the ccd.  Fills in the softc for us.
1422 		 */
1423 		if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1424 			for (j = 0; j < lookedup; ++j)
1425 				(void)vn_close(vpp[j], FREAD|FWRITE);
1426 			kfree(vpp, M_DEVBUF);
1427 			kfree(cpp, M_DEVBUF);
1428 			ccdunlock(cs);
1429 			return (error);
1430 		}
1431 
1432 		/*
1433 		 * The ccd has been successfully initialized, so
1434 		 * we can place it into the array and read the disklabel.
1435 		 */
1436 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1437 		ccio->ccio_unit = unit;
1438 		ccio->ccio_size = cs->sc_size;
1439 
1440 		bzero(&info, sizeof(info));
1441 		info.d_media_blksize = cs->sc_geom.ccg_secsize;
1442 		info.d_media_blocks  = cs->sc_size;
1443 		info.d_nheads	     = cs->sc_geom.ccg_ntracks;
1444 		info.d_secpertrack   = cs->sc_geom.ccg_nsectors;
1445 		info.d_ncylinders    = cs->sc_geom.ccg_ncylinders;
1446 		info.d_secpercyl     = info.d_nheads * info.d_secpertrack;
1447 
1448 		/*
1449 		 * For cases where a label is directly applied to the ccd,
1450 		 * without slices, DSO_COMPATMBR forces one sector be
1451 		 * reserved for backwards compatibility.
1452 		 */
1453 		info.d_dsflags	     = DSO_COMPATMBR;
1454 		disk_setdiskinfo(&cs->sc_disk, &info);
1455 
1456 		ccdunlock(cs);
1457 
1458 		break;
1459 
1460 	case CCDIOCCLR:
1461 		if ((cs->sc_flags & CCDF_INITED) == 0)
1462 			return (ENXIO);
1463 
1464 		if ((ap->a_fflag & FWRITE) == 0)
1465 			return (EBADF);
1466 
1467 		if ((error = ccdlock(cs)) != 0)
1468 			return (error);
1469 
1470 		if (dev_drefs(cs->sc_dev) > 1) {
1471 			ccdunlock(cs);
1472 			return (EBUSY);
1473 		}
1474 
1475 		/*
1476 		 * Free ccd_softc information and clear entry.
1477 		 */
1478 
1479 		/* Close the components and free their pathnames. */
1480 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1481 			/*
1482 			 * XXX: this close could potentially fail and
1483 			 * cause Bad Things.  Maybe we need to force
1484 			 * the close to happen?
1485 			 */
1486 #ifdef DEBUG
1487 			if (ccddebug & CCDB_VNODE)
1488 				vprint("CCDIOCCLR: vnode info",
1489 				    cs->sc_cinfo[i].ci_vp);
1490 #endif
1491 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
1492 			kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1493 		}
1494 
1495 		/* Free interleave index. */
1496 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1497 			kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1498 
1499 		/* Free component info and interleave table. */
1500 		kfree(cs->sc_cinfo, M_DEVBUF);
1501 		kfree(cs->sc_itable, M_DEVBUF);
1502 		cs->sc_cinfo = NULL;
1503 		cs->sc_itable = NULL;
1504 		cs->sc_flags &= ~CCDF_INITED;
1505 
1506 		/*
1507 		 * Free ccddevice information and clear entry.
1508 		 */
1509 		kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1510 		kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1511 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1512 
1513 		/*
1514 		 * And remove the devstat entry.
1515 		 */
1516 		devstat_remove_entry(&cs->device_stats);
1517 
1518 		/* This must be atomic. */
1519 		crit_enter();
1520 		ccdunlock(cs);
1521 		crit_exit();
1522 
1523 		break;
1524 
1525 	default:
1526 		return (ENOTTY);
1527 	}
1528 
1529 	return (0);
1530 }
1531 
1532 static int
1533 ccddump(struct dev_dump_args *ap)
1534 {
1535 	/* Not implemented. */
1536 	return ENXIO;
1537 }
1538 
1539 /*
1540  * Lookup the provided name in the filesystem.  If the file exists,
1541  * is a valid block device, and isn't being used by anyone else,
1542  * set *vpp to the file's vnode.
1543  */
1544 static int
1545 ccdlookup(char *path, struct vnode **vpp)
1546 {
1547 	struct nlookupdata nd;
1548 	struct vnode *vp;
1549 	int error;
1550 
1551 	*vpp = NULL;
1552 
1553 	error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1554 	if (error)
1555 		return (error);
1556 	if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1557 #ifdef DEBUG
1558 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1559 			kprintf("ccdlookup: vn_open error = %d\n", error);
1560 #endif
1561 		goto done;
1562 	}
1563 	vp = nd.nl_open_vp;
1564 
1565 	if (vp->v_opencount > 1) {
1566 		error = EBUSY;
1567 		goto done;
1568 	}
1569 
1570 	if (!vn_isdisk(vp, &error))
1571 		goto done;
1572 
1573 #ifdef DEBUG
1574 	if (ccddebug & CCDB_VNODE)
1575 		vprint("ccdlookup: vnode info", vp);
1576 #endif
1577 
1578 	vn_unlock(vp);
1579 	nd.nl_open_vp = NULL;
1580 	nlookup_done(&nd);
1581 	*vpp = vp;				/* leave ref intact  */
1582 	return (0);
1583 done:
1584 	nlookup_done(&nd);
1585 	return (error);
1586 }
1587 
1588 /*
1589  * Wait interruptibly for an exclusive lock.
1590  *
1591  * XXX
1592  * Several drivers do this; it should be abstracted and made MP-safe.
1593  */
1594 static int
1595 ccdlock(struct ccd_softc *cs)
1596 {
1597 	int error;
1598 
1599 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1600 		cs->sc_flags |= CCDF_WANTED;
1601 		if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1602 			return (error);
1603 	}
1604 	cs->sc_flags |= CCDF_LOCKED;
1605 	return (0);
1606 }
1607 
1608 /*
1609  * Unlock and wake up any waiters.
1610  */
1611 static void
1612 ccdunlock(struct ccd_softc *cs)
1613 {
1614 
1615 	cs->sc_flags &= ~CCDF_LOCKED;
1616 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1617 		cs->sc_flags &= ~CCDF_WANTED;
1618 		wakeup(cs);
1619 	}
1620 }
1621 
1622 #ifdef DEBUG
1623 static void
1624 printiinfo(struct ccdiinfo *ii)
1625 {
1626 	int ix, i;
1627 
1628 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1629 		kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1630 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1631 		for (i = 0; i < ii->ii_ndisk; i++)
1632 			kprintf(" %d", ii->ii_index[i]);
1633 		kprintf("\n");
1634 	}
1635 }
1636 #endif
1637 
1638 
1639 /* Local Variables: */
1640 /* c-argdecl-indent: 8 */
1641 /* c-continued-statement-offset: 8 */
1642 /* c-indent-level: 8 */
1643 /* End: */
1644