1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/buf2.h> 145 #include <sys/ccdvar.h> 146 147 #include <vm/vm_zone.h> 148 149 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 150 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 151 152 #include <sys/thread2.h> 153 154 #if defined(CCDDEBUG) && !defined(DEBUG) 155 #define DEBUG 156 #endif 157 158 #ifdef DEBUG 159 #define CCDB_FOLLOW 0x01 160 #define CCDB_INIT 0x02 161 #define CCDB_IO 0x04 162 #define CCDB_LABEL 0x08 163 #define CCDB_VNODE 0x10 164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 165 CCDB_VNODE; 166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 167 #undef DEBUG 168 #endif 169 170 #define ccdunit(x) dkunit(x) 171 #define ccdpart(x) dkpart(x) 172 173 /* 174 This is how mirroring works (only writes are special): 175 176 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 177 linked together by the cb_mirror field. "cb_pflags & 178 CCDPF_MIRROR_DONE" is set to 0 on both of them. 179 180 When a component returns to ccdiodone(), it checks if "cb_pflags & 181 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 182 flag and returns. If it is, it means its partner has already 183 returned, so it will go to the regular cleanup. 184 185 */ 186 187 struct ccdbuf { 188 struct buf cb_buf; /* new I/O buf */ 189 struct vnode *cb_vp; /* related vnode */ 190 struct bio *cb_obio; /* ptr. to original I/O buf */ 191 struct ccdbuf *cb_freenext; /* free list link */ 192 int cb_unit; /* target unit */ 193 int cb_comp; /* target component */ 194 int cb_pflags; /* mirror/parity status flag */ 195 struct ccdbuf *cb_mirror; /* mirror counterpart */ 196 }; 197 198 /* bits in cb_pflags */ 199 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 200 201 static d_open_t ccdopen; 202 static d_close_t ccdclose; 203 static d_strategy_t ccdstrategy; 204 static d_ioctl_t ccdioctl; 205 static d_dump_t ccddump; 206 207 #define NCCDFREEHIWAT 16 208 209 #define CDEV_MAJOR 74 210 211 static struct dev_ops ccd_ops = { 212 { "ccd", CDEV_MAJOR, D_DISK }, 213 .d_open = ccdopen, 214 .d_close = ccdclose, 215 .d_read = physread, 216 .d_write = physwrite, 217 .d_ioctl = ccdioctl, 218 .d_strategy = ccdstrategy, 219 .d_dump = ccddump 220 }; 221 222 /* called during module initialization */ 223 static void ccdattach (void); 224 static int ccddetach (void); 225 static int ccd_modevent (module_t, int, void *); 226 227 /* called by biodone() at interrupt time */ 228 static void ccdiodone (struct bio *bio); 229 230 static void ccdstart (struct ccd_softc *, struct bio *); 231 static void ccdinterleave (struct ccd_softc *, int); 232 static void ccdintr (struct ccd_softc *, struct bio *); 233 static int ccdinit (struct ccddevice *, char **, struct ucred *); 234 static int ccdlookup (char *, struct vnode **); 235 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 236 struct bio *, off_t, caddr_t, long); 237 static int ccdlock (struct ccd_softc *); 238 static void ccdunlock (struct ccd_softc *); 239 240 #ifdef DEBUG 241 static void printiinfo (struct ccdiinfo *); 242 #endif 243 244 /* Non-private for the benefit of libkvm. */ 245 struct ccd_softc *ccd_softc; 246 struct ccddevice *ccddevs; 247 struct ccdbuf *ccdfreebufs; 248 static int numccdfreebufs; 249 static int numccd = 0; 250 251 /* 252 * getccdbuf() - Allocate and zero a ccd buffer. 253 * 254 * This routine is called at splbio(). 255 */ 256 257 static __inline 258 struct ccdbuf * 259 getccdbuf(void) 260 { 261 struct ccdbuf *cbp; 262 263 /* 264 * Allocate from freelist or malloc as necessary 265 */ 266 if ((cbp = ccdfreebufs) != NULL) { 267 ccdfreebufs = cbp->cb_freenext; 268 --numccdfreebufs; 269 reinitbufbio(&cbp->cb_buf); 270 } else { 271 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 272 initbufbio(&cbp->cb_buf); 273 } 274 275 /* 276 * independant struct buf initialization 277 */ 278 buf_dep_init(&cbp->cb_buf); 279 BUF_LOCKINIT(&cbp->cb_buf); 280 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 281 BUF_KERNPROC(&cbp->cb_buf); 282 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 283 284 return(cbp); 285 } 286 287 /* 288 * putccdbuf() - Free a ccd buffer. 289 * 290 * This routine is called at splbio(). 291 */ 292 293 static __inline 294 void 295 putccdbuf(struct ccdbuf *cbp) 296 { 297 BUF_UNLOCK(&cbp->cb_buf); 298 BUF_LOCKFREE(&cbp->cb_buf); 299 300 if (numccdfreebufs < NCCDFREEHIWAT) { 301 cbp->cb_freenext = ccdfreebufs; 302 ccdfreebufs = cbp; 303 ++numccdfreebufs; 304 } else { 305 kfree((caddr_t)cbp, M_DEVBUF); 306 } 307 } 308 309 /* 310 * Called by main() during pseudo-device attachment. All we need 311 * to do is allocate enough space for devices to be configured later, and 312 * add devsw entries. 313 */ 314 static void 315 ccdattach(void) 316 { 317 struct disk_info info; 318 struct ccd_softc *cs; 319 int i; 320 int num = NCCD; 321 322 if (num > 1) 323 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 324 else 325 kprintf("ccd0: Concatenated disk driver\n"); 326 327 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 328 M_WAITOK | M_ZERO); 329 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 330 M_WAITOK | M_ZERO); 331 numccd = num; 332 333 /* 334 * With normal disk devices the open simply fails if the media 335 * is not present. With CCD we have to be able to open the 336 * raw disk to use the ioctl's to set it up, so create a dummy 337 * disk info structure so dscheck() doesn't blow up. 338 */ 339 bzero(&info, sizeof(info)); 340 info.d_media_blksize = DEV_BSIZE; 341 342 for (i = 0; i < numccd; ++i) { 343 cs = &ccd_softc[i]; 344 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 345 cs->sc_dev->si_drv1 = cs; 346 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 347 disk_setdiskinfo(&cs->sc_disk, &info); 348 } 349 } 350 351 static int 352 ccddetach(void) 353 { 354 struct ccd_softc *cs; 355 struct dev_ioctl_args ioctl_args; 356 int i; 357 int error = 0; 358 int eval; 359 360 bzero(&ioctl_args, sizeof(ioctl_args)); 361 362 for (i = 0; i < numccd; ++i) { 363 cs = &ccd_softc[i]; 364 if (cs->sc_dev == NULL) 365 continue; 366 ioctl_args.a_head.a_dev = cs->sc_dev; 367 ioctl_args.a_cmd = CCDIOCCLR; 368 ioctl_args.a_fflag = FWRITE; 369 eval = ccdioctl(&ioctl_args); 370 if (eval && eval != ENXIO) { 371 kprintf("ccd%d: In use, cannot detach\n", i); 372 error = EBUSY; 373 } 374 } 375 if (error == 0) { 376 for (i = 0; i < numccd; ++i) { 377 cs = &ccd_softc[i]; 378 if (cs->sc_dev == NULL) 379 continue; 380 disk_destroy(&cs->sc_disk); 381 cs->sc_dev = NULL; 382 } 383 if (ccd_softc) 384 kfree(ccd_softc, M_DEVBUF); 385 if (ccddevs) 386 kfree(ccddevs, M_DEVBUF); 387 } 388 return (error); 389 } 390 391 static int 392 ccd_modevent(module_t mod, int type, void *data) 393 { 394 int error = 0; 395 396 switch (type) { 397 case MOD_LOAD: 398 ccdattach(); 399 break; 400 401 case MOD_UNLOAD: 402 error = ccddetach(); 403 break; 404 405 default: /* MOD_SHUTDOWN etc */ 406 break; 407 } 408 return (error); 409 } 410 411 DEV_MODULE(ccd, ccd_modevent, NULL); 412 413 static int 414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 415 { 416 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 417 struct ccdcinfo *ci = NULL; /* XXX */ 418 int ix; 419 struct vnode *vp; 420 u_int64_t skip; 421 u_int64_t size; 422 u_int64_t minsize; 423 int maxsecsize; 424 struct partinfo dpart; 425 struct ccdgeom *ccg = &cs->sc_geom; 426 char tmppath[MAXPATHLEN]; 427 int error = 0; 428 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 432 #endif 433 434 cs->sc_size = 0; 435 cs->sc_ileave = ccd->ccd_interleave; 436 cs->sc_nccdisks = ccd->ccd_ndev; 437 438 /* Allocate space for the component info. */ 439 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 440 M_DEVBUF, M_WAITOK); 441 cs->sc_maxiosize = MAXPHYS; 442 443 /* 444 * Verify that each component piece exists and record 445 * relevant information about it. 446 */ 447 maxsecsize = 0; 448 minsize = 0; 449 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 450 vp = ccd->ccd_vpp[ix]; 451 ci = &cs->sc_cinfo[ix]; 452 ci->ci_vp = vp; 453 454 /* 455 * Copy in the pathname of the component. 456 */ 457 bzero(tmppath, sizeof(tmppath)); /* sanity */ 458 if ((error = copyinstr(cpaths[ix], tmppath, 459 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 kprintf("ccd%d: can't copy path, error = %d\n", 463 ccd->ccd_unit, error); 464 #endif 465 goto fail; 466 } 467 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 468 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 469 470 ci->ci_dev = vn_todev(vp); 471 if (ci->ci_dev->si_iosize_max && 472 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 473 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 474 } 475 476 /* 477 * Get partition information for the component. 478 */ 479 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, cred); 480 if (error) { 481 #ifdef DEBUG 482 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 483 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 484 ccd->ccd_unit, ci->ci_path, error); 485 #endif 486 goto fail; 487 } 488 if (dpart.fstype != FS_CCD && 489 !kuuid_is_ccd(&dpart.fstype_uuid)) { 490 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 491 ccd->ccd_unit, ci->ci_path); 492 error = EFTYPE; 493 goto fail; 494 } 495 if (maxsecsize < dpart.media_blksize) 496 maxsecsize = dpart.media_blksize; 497 498 /* 499 * Skip a certain amount of storage at the beginning of 500 * the component to make sure we don't infringe on any 501 * reserved sectors. This is handled entirely by 502 * dpart.reserved_blocks but we also impose a minimum 503 * of 16 sectors for backwards compatibility. 504 */ 505 skip = 16; 506 if (skip < dpart.reserved_blocks) 507 skip = dpart.reserved_blocks; 508 size = dpart.media_blocks - skip; 509 510 /* 511 * Calculate the size, truncating to an interleave 512 * boundary if necessary. 513 */ 514 if (cs->sc_ileave > 1) 515 size -= size % cs->sc_ileave; 516 517 if ((int64_t)size <= 0) { 518 #ifdef DEBUG 519 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 520 kprintf("ccd%d: %s: size == 0\n", 521 ccd->ccd_unit, ci->ci_path); 522 #endif 523 error = ENODEV; 524 goto fail; 525 } 526 527 /* 528 * Calculate the smallest uniform component, used 529 * elsewhere. 530 */ 531 if (minsize == 0 || minsize > size) 532 minsize = size; 533 ci->ci_skip = skip; 534 ci->ci_size = size; 535 cs->sc_size += size; 536 } 537 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 538 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 539 540 /* 541 * Don't allow the interleave to be smaller than 542 * the biggest component sector. 543 */ 544 if ((cs->sc_ileave > 0) && 545 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 546 #ifdef DEBUG 547 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 548 kprintf("ccd%d: interleave must be at least %d\n", 549 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 550 #endif 551 error = EINVAL; 552 goto fail; 553 } 554 555 /* 556 * If uniform interleave is desired set all sizes to that of 557 * the smallest component. This will guarentee that a single 558 * interleave table is generated. 559 * 560 * Lost space must be taken into account when calculating the 561 * overall size. Half the space is lost when CCDF_MIRROR is 562 * specified. One disk is lost when CCDF_PARITY is specified. 563 */ 564 if (ccd->ccd_flags & CCDF_UNIFORM) { 565 for (ci = cs->sc_cinfo; 566 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 567 ci->ci_size = minsize; 568 } 569 if (ccd->ccd_flags & CCDF_MIRROR) { 570 /* 571 * Check to see if an even number of components 572 * have been specified. The interleave must also 573 * be non-zero in order for us to be able to 574 * guarentee the topology. 575 */ 576 if (cs->sc_nccdisks % 2) { 577 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 578 error = EINVAL; 579 goto fail; 580 } 581 if (cs->sc_ileave == 0) { 582 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 583 error = EINVAL; 584 goto fail; 585 } 586 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 587 } else if (ccd->ccd_flags & CCDF_PARITY) { 588 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 589 } else { 590 if (cs->sc_ileave == 0) { 591 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 592 error = EINVAL; 593 goto fail; 594 } 595 cs->sc_size = cs->sc_nccdisks * minsize; 596 } 597 } 598 599 /* 600 * Construct the interleave table. 601 */ 602 ccdinterleave(cs, ccd->ccd_unit); 603 604 /* 605 * Create pseudo-geometry based on 1MB cylinders. It's 606 * pretty close. 607 */ 608 ccg->ccg_secsize = maxsecsize; 609 ccg->ccg_ntracks = 1; 610 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 611 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 612 613 /* 614 * Add an devstat entry for this device. 615 */ 616 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 617 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 618 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 619 DEVSTAT_PRIORITY_ARRAY); 620 621 cs->sc_flags |= CCDF_INITED; 622 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 623 cs->sc_unit = ccd->ccd_unit; 624 return (0); 625 fail: 626 while (ci > cs->sc_cinfo) { 627 ci--; 628 kfree(ci->ci_path, M_DEVBUF); 629 } 630 kfree(cs->sc_cinfo, M_DEVBUF); 631 cs->sc_cinfo = NULL; 632 return (error); 633 } 634 635 static void 636 ccdinterleave(struct ccd_softc *cs, int unit) 637 { 638 struct ccdcinfo *ci, *smallci; 639 struct ccdiinfo *ii; 640 u_int64_t bn; 641 u_int64_t lbn; 642 u_int64_t size; 643 int icount; 644 int ix; 645 646 #ifdef DEBUG 647 if (ccddebug & CCDB_INIT) 648 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 649 #endif 650 651 /* 652 * Allocate an interleave table. The worst case occurs when each 653 * of N disks is of a different size, resulting in N interleave 654 * tables. 655 * 656 * Chances are this is too big, but we don't care. 657 */ 658 icount = cs->sc_nccdisks + 1; 659 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 660 M_DEVBUF, M_WAITOK|M_ZERO); 661 662 /* 663 * Trivial case: no interleave (actually interleave of disk size). 664 * Each table entry represents a single component in its entirety. 665 * 666 * An interleave of 0 may not be used with a mirror or parity setup. 667 */ 668 if (cs->sc_ileave == 0) { 669 bn = 0; 670 ii = cs->sc_itable; 671 672 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 673 /* Allocate space for ii_index. */ 674 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 675 ii->ii_ndisk = 1; 676 ii->ii_startblk = bn; 677 ii->ii_startoff = 0; 678 ii->ii_index[0] = ix; 679 bn += cs->sc_cinfo[ix].ci_size; 680 ii++; 681 } 682 ii->ii_ndisk = 0; 683 #ifdef DEBUG 684 if (ccddebug & CCDB_INIT) 685 printiinfo(cs->sc_itable); 686 #endif 687 return; 688 } 689 690 /* 691 * The following isn't fast or pretty; it doesn't have to be. 692 */ 693 size = 0; 694 bn = lbn = 0; 695 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 696 /* 697 * Allocate space for ii_index. We might allocate more then 698 * we use. 699 */ 700 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 701 M_DEVBUF, M_WAITOK); 702 703 /* 704 * Locate the smallest of the remaining components 705 */ 706 smallci = NULL; 707 ci = cs->sc_cinfo; 708 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 709 if (ci->ci_size > size && 710 (smallci == NULL || 711 ci->ci_size < smallci->ci_size)) { 712 smallci = ci; 713 } 714 ++ci; 715 } 716 717 /* 718 * Nobody left, all done 719 */ 720 if (smallci == NULL) { 721 ii->ii_ndisk = 0; 722 break; 723 } 724 725 /* 726 * Record starting logical block using an sc_ileave blocksize. 727 */ 728 ii->ii_startblk = bn / cs->sc_ileave; 729 730 /* 731 * Record starting component block using an sc_ileave 732 * blocksize. This value is relative to the beginning of 733 * a component disk. 734 */ 735 ii->ii_startoff = lbn; 736 737 /* 738 * Determine how many disks take part in this interleave 739 * and record their indices. 740 */ 741 ix = 0; 742 for (ci = cs->sc_cinfo; 743 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 744 if (ci->ci_size >= smallci->ci_size) { 745 ii->ii_index[ix++] = ci - cs->sc_cinfo; 746 } 747 } 748 ii->ii_ndisk = ix; 749 750 /* 751 * Adjust for loop 752 */ 753 bn += ix * (smallci->ci_size - size); 754 lbn = smallci->ci_size / cs->sc_ileave; 755 size = smallci->ci_size; 756 } 757 if (ii == &cs->sc_itable[icount]) 758 panic("ccdinterlave software bug! table exhausted"); 759 #ifdef DEBUG 760 if (ccddebug & CCDB_INIT) 761 printiinfo(cs->sc_itable); 762 #endif 763 } 764 765 /* ARGSUSED */ 766 static int 767 ccdopen(struct dev_open_args *ap) 768 { 769 cdev_t dev = ap->a_head.a_dev; 770 int unit = ccdunit(dev); 771 struct ccd_softc *cs; 772 int error = 0; 773 774 #ifdef DEBUG 775 if (ccddebug & CCDB_FOLLOW) 776 kprintf("ccdopen(%x, %x)\n", dev, flags); 777 #endif 778 if (unit >= numccd) 779 return (ENXIO); 780 cs = &ccd_softc[unit]; 781 782 if ((error = ccdlock(cs)) == 0) { 783 ccdunlock(cs); 784 } 785 return (error); 786 } 787 788 /* ARGSUSED */ 789 static int 790 ccdclose(struct dev_close_args *ap) 791 { 792 cdev_t dev = ap->a_head.a_dev; 793 int unit = ccdunit(dev); 794 struct ccd_softc *cs; 795 int error = 0; 796 797 #ifdef DEBUG 798 if (ccddebug & CCDB_FOLLOW) 799 kprintf("ccdclose(%x, %x)\n", dev, flags); 800 #endif 801 802 if (unit >= numccd) 803 return (ENXIO); 804 cs = &ccd_softc[unit]; 805 if ((error = ccdlock(cs)) == 0) { 806 ccdunlock(cs); 807 } 808 return (error); 809 } 810 811 static int 812 ccdstrategy(struct dev_strategy_args *ap) 813 { 814 cdev_t dev = ap->a_head.a_dev; 815 struct bio *bio = ap->a_bio; 816 int unit = ccdunit(dev); 817 struct bio *nbio; 818 struct buf *bp = bio->bio_buf; 819 struct ccd_softc *cs = &ccd_softc[unit]; 820 u_int64_t pbn; /* in sc_secsize chunks */ 821 u_int32_t sz; /* in sc_secsize chunks */ 822 823 #ifdef DEBUG 824 if (ccddebug & CCDB_FOLLOW) 825 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 826 #endif 827 if ((cs->sc_flags & CCDF_INITED) == 0) { 828 bp->b_error = ENXIO; 829 goto error; 830 } 831 832 /* If it's a nil transfer, wake up the top half now. */ 833 if (bp->b_bcount == 0) { 834 bp->b_resid = 0; 835 goto done; 836 } 837 838 /* 839 * Do bounds checking and adjust transfer. If there's an 840 * error, the bounds check will flag that for us. 841 */ 842 843 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 844 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 845 846 /* 847 * If out of bounds return an error. If the request goes 848 * past EOF, clip the request as appropriate. If exactly 849 * at EOF, return success (don't clip), but with 0 bytes 850 * of I/O. 851 * 852 * Mark EOF B_INVAL (just like bad), indicating that the 853 * contents of the buffer, if any, is invalid. 854 */ 855 if ((int64_t)pbn < 0) 856 goto bad; 857 if (pbn + sz > cs->sc_size) { 858 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 859 goto bad; 860 if (pbn == cs->sc_size) { 861 bp->b_resid = bp->b_bcount; 862 bp->b_flags |= B_INVAL; 863 goto done; 864 } 865 sz = (long)(cs->sc_size - pbn); 866 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 867 } 868 nbio = bio; 869 870 bp->b_resid = bp->b_bcount; 871 nbio->bio_driver_info = dev; 872 873 /* 874 * "Start" the unit. 875 */ 876 crit_enter(); 877 ccdstart(cs, nbio); 878 crit_exit(); 879 return(0); 880 881 /* 882 * note: bio, not nbio, is valid at the done label. 883 */ 884 bad: 885 bp->b_error = EINVAL; 886 error: 887 bp->b_resid = bp->b_bcount; 888 bp->b_flags |= B_ERROR | B_INVAL; 889 done: 890 biodone(bio); 891 return(0); 892 } 893 894 static void 895 ccdstart(struct ccd_softc *cs, struct bio *bio) 896 { 897 long bcount, rcount; 898 struct ccdbuf *cbp[4]; 899 struct buf *bp = bio->bio_buf; 900 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 901 caddr_t addr; 902 off_t doffset; 903 904 #ifdef DEBUG 905 if (ccddebug & CCDB_FOLLOW) 906 kprintf("ccdstart(%x, %x)\n", cs, bp); 907 #endif 908 909 /* Record the transaction start */ 910 devstat_start_transaction(&cs->device_stats); 911 912 /* 913 * Allocate component buffers and fire off the requests 914 */ 915 doffset = bio->bio_offset; 916 addr = bp->b_data; 917 918 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 919 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 920 rcount = cbp[0]->cb_buf.b_bcount; 921 922 if (cs->sc_cflags & CCDF_MIRROR) { 923 /* 924 * Mirroring. Writes go to both disks, reads are 925 * taken from whichever disk seems most appropriate. 926 * 927 * We attempt to localize reads to the disk whos arm 928 * is nearest the read request. We ignore seeks due 929 * to writes when making this determination and we 930 * also try to avoid hogging. 931 */ 932 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 933 vn_strategy(cbp[0]->cb_vp, 934 &cbp[0]->cb_buf.b_bio1); 935 vn_strategy(cbp[1]->cb_vp, 936 &cbp[1]->cb_buf.b_bio1); 937 } else { 938 int pick = cs->sc_pick; 939 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 940 if (doffset < cs->sc_blk[pick] - range || 941 doffset > cs->sc_blk[pick] + range 942 ) { 943 cs->sc_pick = pick = 1 - pick; 944 } 945 cs->sc_blk[pick] = doffset + rcount; 946 vn_strategy(cbp[pick]->cb_vp, 947 &cbp[pick]->cb_buf.b_bio1); 948 } 949 } else { 950 /* 951 * Not mirroring 952 */ 953 vn_strategy(cbp[0]->cb_vp, 954 &cbp[0]->cb_buf.b_bio1); 955 } 956 doffset += rcount; 957 addr += rcount; 958 } 959 } 960 961 /* 962 * Build a component buffer header. 963 */ 964 static void 965 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 966 off_t doffset, caddr_t addr, long bcount) 967 { 968 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 969 struct ccdbuf *cbp; 970 u_int64_t bn; 971 u_int64_t cbn; 972 u_int64_t cboff; 973 off_t cbc; 974 975 #ifdef DEBUG 976 if (ccddebug & CCDB_IO) 977 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 978 cs, bp, bn, addr, bcount); 979 #endif 980 /* 981 * Determine which component bn falls in. 982 */ 983 bn = doffset / cs->sc_geom.ccg_secsize; 984 cbn = bn; 985 cboff = 0; 986 987 if (cs->sc_ileave == 0) { 988 /* 989 * Serially concatenated and neither a mirror nor a parity 990 * config. This is a special case. 991 */ 992 daddr_t sblk; 993 994 sblk = 0; 995 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 996 sblk += ci->ci_size; 997 cbn -= sblk; 998 } else { 999 struct ccdiinfo *ii; 1000 int ccdisk, off; 1001 1002 /* 1003 * Calculate cbn, the logical superblock (sc_ileave chunks), 1004 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1005 * to cbn. 1006 */ 1007 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1008 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1009 1010 /* 1011 * Figure out which interleave table to use. 1012 */ 1013 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1014 if (ii->ii_startblk > cbn) 1015 break; 1016 } 1017 ii--; 1018 1019 /* 1020 * off is the logical superblock relative to the beginning 1021 * of this interleave block. 1022 */ 1023 off = cbn - ii->ii_startblk; 1024 1025 /* 1026 * We must calculate which disk component to use (ccdisk), 1027 * and recalculate cbn to be the superblock relative to 1028 * the beginning of the component. This is typically done by 1029 * adding 'off' and ii->ii_startoff together. However, 'off' 1030 * must typically be divided by the number of components in 1031 * this interleave array to be properly convert it from a 1032 * CCD-relative logical superblock number to a 1033 * component-relative superblock number. 1034 */ 1035 if (ii->ii_ndisk == 1) { 1036 /* 1037 * When we have just one disk, it can't be a mirror 1038 * or a parity config. 1039 */ 1040 ccdisk = ii->ii_index[0]; 1041 cbn = ii->ii_startoff + off; 1042 } else { 1043 if (cs->sc_cflags & CCDF_MIRROR) { 1044 /* 1045 * We have forced a uniform mapping, resulting 1046 * in a single interleave array. We double 1047 * up on the first half of the available 1048 * components and our mirror is in the second 1049 * half. This only works with a single 1050 * interleave array because doubling up 1051 * doubles the number of sectors, so there 1052 * cannot be another interleave array because 1053 * the next interleave array's calculations 1054 * would be off. 1055 */ 1056 int ndisk2 = ii->ii_ndisk / 2; 1057 ccdisk = ii->ii_index[off % ndisk2]; 1058 cbn = ii->ii_startoff + off / ndisk2; 1059 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1060 } else if (cs->sc_cflags & CCDF_PARITY) { 1061 /* 1062 * XXX not implemented yet 1063 */ 1064 int ndisk2 = ii->ii_ndisk - 1; 1065 ccdisk = ii->ii_index[off % ndisk2]; 1066 cbn = ii->ii_startoff + off / ndisk2; 1067 if (cbn % ii->ii_ndisk <= ccdisk) 1068 ccdisk++; 1069 } else { 1070 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1071 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1072 } 1073 } 1074 1075 ci = &cs->sc_cinfo[ccdisk]; 1076 1077 /* 1078 * Convert cbn from a superblock to a normal block so it 1079 * can be used to calculate (along with cboff) the normal 1080 * block index into this particular disk. 1081 */ 1082 cbn *= cs->sc_ileave; 1083 } 1084 1085 /* 1086 * Fill in the component buf structure. 1087 * 1088 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1089 * will be truncated on device EOF so we use b_bufsize to detect 1090 * the case. 1091 */ 1092 cbp = getccdbuf(); 1093 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1094 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1095 cbp->cb_buf.b_data = addr; 1096 cbp->cb_vp = ci->ci_vp; 1097 if (cs->sc_ileave == 0) 1098 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1099 else 1100 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1101 if (cbc > cs->sc_maxiosize) 1102 cbc = cs->sc_maxiosize; 1103 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1104 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1105 1106 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1107 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1108 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1109 1110 /* 1111 * context for ccdiodone 1112 */ 1113 cbp->cb_obio = bio; 1114 cbp->cb_unit = cs - ccd_softc; 1115 cbp->cb_comp = ci - cs->sc_cinfo; 1116 1117 #ifdef DEBUG 1118 if (ccddebug & CCDB_IO) 1119 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1120 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1121 cbp->cb_buf.b_bio1.bio_offset, 1122 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1123 #endif 1124 cb[0] = cbp; 1125 1126 /* 1127 * Note: both I/O's setup when reading from mirror, but only one 1128 * will be executed. 1129 */ 1130 if (cs->sc_cflags & CCDF_MIRROR) { 1131 /* mirror, setup second I/O */ 1132 cbp = getccdbuf(); 1133 1134 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1135 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1136 cbp->cb_buf.b_data = addr; 1137 cbp->cb_vp = ci2->ci_vp; 1138 if (cs->sc_ileave == 0) 1139 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1140 else 1141 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1142 if (cbc > cs->sc_maxiosize) 1143 cbc = cs->sc_maxiosize; 1144 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1145 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1146 1147 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1148 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1149 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1150 1151 /* 1152 * context for ccdiodone 1153 */ 1154 cbp->cb_obio = bio; 1155 cbp->cb_unit = cs - ccd_softc; 1156 cbp->cb_comp = ci2 - cs->sc_cinfo; 1157 cb[1] = cbp; 1158 /* link together the ccdbuf's and clear "mirror done" flag */ 1159 cb[0]->cb_mirror = cb[1]; 1160 cb[1]->cb_mirror = cb[0]; 1161 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1162 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1163 } 1164 } 1165 1166 static void 1167 ccdintr(struct ccd_softc *cs, struct bio *bio) 1168 { 1169 struct buf *bp = bio->bio_buf; 1170 1171 #ifdef DEBUG 1172 if (ccddebug & CCDB_FOLLOW) 1173 kprintf("ccdintr(%x, %x)\n", cs, bp); 1174 #endif 1175 /* 1176 * Request is done for better or worse, wakeup the top half. 1177 */ 1178 if (bp->b_flags & B_ERROR) 1179 bp->b_resid = bp->b_bcount; 1180 devstat_end_transaction_buf(&cs->device_stats, bp); 1181 biodone(bio); 1182 } 1183 1184 /* 1185 * Called at interrupt time. 1186 * Mark the component as done and if all components are done, 1187 * take a ccd interrupt. 1188 */ 1189 static void 1190 ccdiodone(struct bio *bio) 1191 { 1192 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1193 struct bio *obio = cbp->cb_obio; 1194 struct buf *obp = obio->bio_buf; 1195 int unit = cbp->cb_unit; 1196 int count; 1197 1198 /* 1199 * Since we do not have exclusive access to underlying devices, 1200 * we can't keep cache translations around. 1201 */ 1202 clearbiocache(bio->bio_next); 1203 1204 crit_enter(); 1205 #ifdef DEBUG 1206 if (ccddebug & CCDB_FOLLOW) 1207 kprintf("ccdiodone(%x)\n", cbp); 1208 if (ccddebug & CCDB_IO) { 1209 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1210 obp, obp->b_bcount, obp->b_resid); 1211 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1212 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1213 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1214 cbp->cb_buf.b_bcount); 1215 } 1216 #endif 1217 1218 /* 1219 * If an error occured, report it. If this is a mirrored 1220 * configuration and the first of two possible reads, do not 1221 * set the error in the bp yet because the second read may 1222 * succeed. 1223 */ 1224 if (cbp->cb_buf.b_flags & B_ERROR) { 1225 const char *msg = ""; 1226 1227 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1228 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1229 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1230 /* 1231 * We will try our read on the other disk down 1232 * below, also reverse the default pick so if we 1233 * are doing a scan we do not keep hitting the 1234 * bad disk first. 1235 */ 1236 struct ccd_softc *cs = &ccd_softc[unit]; 1237 1238 msg = ", trying other disk"; 1239 cs->sc_pick = 1 - cs->sc_pick; 1240 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1241 } else { 1242 obp->b_flags |= B_ERROR; 1243 obp->b_error = cbp->cb_buf.b_error ? 1244 cbp->cb_buf.b_error : EIO; 1245 } 1246 kprintf("ccd%d: error %d on component %d " 1247 "offset %jd (ccd offset %jd)%s\n", 1248 unit, obp->b_error, cbp->cb_comp, 1249 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1250 (intmax_t)obio->bio_offset, 1251 msg); 1252 } 1253 1254 /* 1255 * Process mirror. If we are writing, I/O has been initiated on both 1256 * buffers and we fall through only after both are finished. 1257 * 1258 * If we are reading only one I/O is initiated at a time. If an 1259 * error occurs we initiate the second I/O and return, otherwise 1260 * we free the second I/O without initiating it. 1261 */ 1262 1263 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1264 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1265 /* 1266 * When writing, handshake with the second buffer 1267 * to determine when both are done. If both are not 1268 * done, return here. 1269 */ 1270 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1271 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1272 putccdbuf(cbp); 1273 crit_exit(); 1274 return; 1275 } 1276 } else { 1277 /* 1278 * When reading, either dispose of the second buffer 1279 * or initiate I/O on the second buffer if an error 1280 * occured with this one. 1281 */ 1282 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1283 if (cbp->cb_buf.b_flags & B_ERROR) { 1284 cbp->cb_mirror->cb_pflags |= 1285 CCDPF_MIRROR_DONE; 1286 vn_strategy( 1287 cbp->cb_mirror->cb_vp, 1288 &cbp->cb_mirror->cb_buf.b_bio1 1289 ); 1290 putccdbuf(cbp); 1291 crit_exit(); 1292 return; 1293 } else { 1294 putccdbuf(cbp->cb_mirror); 1295 /* fall through */ 1296 } 1297 } 1298 } 1299 } 1300 1301 /* 1302 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1303 */ 1304 count = cbp->cb_buf.b_bufsize; 1305 putccdbuf(cbp); 1306 1307 /* 1308 * If all done, "interrupt". 1309 */ 1310 obp->b_resid -= count; 1311 if (obp->b_resid < 0) 1312 panic("ccdiodone: count"); 1313 if (obp->b_resid == 0) 1314 ccdintr(&ccd_softc[unit], obio); 1315 crit_exit(); 1316 } 1317 1318 static int 1319 ccdioctl(struct dev_ioctl_args *ap) 1320 { 1321 cdev_t dev = ap->a_head.a_dev; 1322 int unit = ccdunit(dev); 1323 int i, j, lookedup = 0, error = 0; 1324 struct ccd_softc *cs; 1325 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1326 struct ccddevice ccd; 1327 struct disk_info info; 1328 char **cpp; 1329 struct vnode **vpp; 1330 1331 if (unit >= numccd) 1332 return (ENXIO); 1333 cs = &ccd_softc[unit]; 1334 1335 bzero(&ccd, sizeof(ccd)); 1336 1337 switch (ap->a_cmd) { 1338 case CCDIOCSET: 1339 if (cs->sc_flags & CCDF_INITED) 1340 return (EBUSY); 1341 1342 if ((ap->a_fflag & FWRITE) == 0) 1343 return (EBADF); 1344 1345 if ((error = ccdlock(cs)) != 0) 1346 return (error); 1347 1348 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1349 ccdunlock(cs); 1350 return (EINVAL); 1351 } 1352 1353 /* Fill in some important bits. */ 1354 ccd.ccd_unit = unit; 1355 ccd.ccd_interleave = ccio->ccio_ileave; 1356 if (ccd.ccd_interleave == 0 && 1357 ((ccio->ccio_flags & CCDF_MIRROR) || 1358 (ccio->ccio_flags & CCDF_PARITY))) { 1359 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1360 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1361 } 1362 if ((ccio->ccio_flags & CCDF_MIRROR) && 1363 (ccio->ccio_flags & CCDF_PARITY)) { 1364 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1365 ccio->ccio_flags &= ~CCDF_PARITY; 1366 } 1367 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1368 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1369 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1370 unit); 1371 ccio->ccio_flags |= CCDF_UNIFORM; 1372 } 1373 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1374 1375 /* 1376 * Allocate space for and copy in the array of 1377 * componet pathnames and device numbers. 1378 */ 1379 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1380 M_DEVBUF, M_WAITOK); 1381 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1382 M_DEVBUF, M_WAITOK); 1383 1384 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1385 ccio->ccio_ndisks * sizeof(char **)); 1386 if (error) { 1387 kfree(vpp, M_DEVBUF); 1388 kfree(cpp, M_DEVBUF); 1389 ccdunlock(cs); 1390 return (error); 1391 } 1392 1393 #ifdef DEBUG 1394 if (ccddebug & CCDB_INIT) { 1395 for (i = 0; i < ccio->ccio_ndisks; ++i) 1396 kprintf("ccdioctl: component %d: 0x%x\n", 1397 i, cpp[i]); 1398 } 1399 #endif 1400 1401 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1402 #ifdef DEBUG 1403 if (ccddebug & CCDB_INIT) 1404 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1405 #endif 1406 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1407 for (j = 0; j < lookedup; ++j) 1408 (void)vn_close(vpp[j], FREAD|FWRITE); 1409 kfree(vpp, M_DEVBUF); 1410 kfree(cpp, M_DEVBUF); 1411 ccdunlock(cs); 1412 return (error); 1413 } 1414 ++lookedup; 1415 } 1416 ccd.ccd_cpp = cpp; 1417 ccd.ccd_vpp = vpp; 1418 ccd.ccd_ndev = ccio->ccio_ndisks; 1419 1420 /* 1421 * Initialize the ccd. Fills in the softc for us. 1422 */ 1423 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1424 for (j = 0; j < lookedup; ++j) 1425 (void)vn_close(vpp[j], FREAD|FWRITE); 1426 kfree(vpp, M_DEVBUF); 1427 kfree(cpp, M_DEVBUF); 1428 ccdunlock(cs); 1429 return (error); 1430 } 1431 1432 /* 1433 * The ccd has been successfully initialized, so 1434 * we can place it into the array and read the disklabel. 1435 */ 1436 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1437 ccio->ccio_unit = unit; 1438 ccio->ccio_size = cs->sc_size; 1439 1440 bzero(&info, sizeof(info)); 1441 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1442 info.d_media_blocks = cs->sc_size; 1443 info.d_nheads = cs->sc_geom.ccg_ntracks; 1444 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1445 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1446 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1447 1448 /* 1449 * For cases where a label is directly applied to the ccd, 1450 * without slices, DSO_COMPATMBR forces one sector be 1451 * reserved for backwards compatibility. 1452 */ 1453 info.d_dsflags = DSO_COMPATMBR; 1454 disk_setdiskinfo(&cs->sc_disk, &info); 1455 1456 ccdunlock(cs); 1457 1458 break; 1459 1460 case CCDIOCCLR: 1461 if ((cs->sc_flags & CCDF_INITED) == 0) 1462 return (ENXIO); 1463 1464 if ((ap->a_fflag & FWRITE) == 0) 1465 return (EBADF); 1466 1467 if ((error = ccdlock(cs)) != 0) 1468 return (error); 1469 1470 if (dev_drefs(cs->sc_dev) > 1) { 1471 ccdunlock(cs); 1472 return (EBUSY); 1473 } 1474 1475 /* 1476 * Free ccd_softc information and clear entry. 1477 */ 1478 1479 /* Close the components and free their pathnames. */ 1480 for (i = 0; i < cs->sc_nccdisks; ++i) { 1481 /* 1482 * XXX: this close could potentially fail and 1483 * cause Bad Things. Maybe we need to force 1484 * the close to happen? 1485 */ 1486 #ifdef DEBUG 1487 if (ccddebug & CCDB_VNODE) 1488 vprint("CCDIOCCLR: vnode info", 1489 cs->sc_cinfo[i].ci_vp); 1490 #endif 1491 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1492 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1493 } 1494 1495 /* Free interleave index. */ 1496 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1497 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1498 1499 /* Free component info and interleave table. */ 1500 kfree(cs->sc_cinfo, M_DEVBUF); 1501 kfree(cs->sc_itable, M_DEVBUF); 1502 cs->sc_cinfo = NULL; 1503 cs->sc_itable = NULL; 1504 cs->sc_flags &= ~CCDF_INITED; 1505 1506 /* 1507 * Free ccddevice information and clear entry. 1508 */ 1509 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1510 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1511 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1512 1513 /* 1514 * And remove the devstat entry. 1515 */ 1516 devstat_remove_entry(&cs->device_stats); 1517 1518 /* This must be atomic. */ 1519 crit_enter(); 1520 ccdunlock(cs); 1521 crit_exit(); 1522 1523 break; 1524 1525 default: 1526 return (ENOTTY); 1527 } 1528 1529 return (0); 1530 } 1531 1532 static int 1533 ccddump(struct dev_dump_args *ap) 1534 { 1535 /* Not implemented. */ 1536 return ENXIO; 1537 } 1538 1539 /* 1540 * Lookup the provided name in the filesystem. If the file exists, 1541 * is a valid block device, and isn't being used by anyone else, 1542 * set *vpp to the file's vnode. 1543 */ 1544 static int 1545 ccdlookup(char *path, struct vnode **vpp) 1546 { 1547 struct nlookupdata nd; 1548 struct vnode *vp; 1549 int error; 1550 1551 *vpp = NULL; 1552 1553 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1554 if (error) 1555 return (error); 1556 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1557 #ifdef DEBUG 1558 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1559 kprintf("ccdlookup: vn_open error = %d\n", error); 1560 #endif 1561 goto done; 1562 } 1563 vp = nd.nl_open_vp; 1564 1565 if (vp->v_opencount > 1) { 1566 error = EBUSY; 1567 goto done; 1568 } 1569 1570 if (!vn_isdisk(vp, &error)) 1571 goto done; 1572 1573 #ifdef DEBUG 1574 if (ccddebug & CCDB_VNODE) 1575 vprint("ccdlookup: vnode info", vp); 1576 #endif 1577 1578 vn_unlock(vp); 1579 nd.nl_open_vp = NULL; 1580 nlookup_done(&nd); 1581 *vpp = vp; /* leave ref intact */ 1582 return (0); 1583 done: 1584 nlookup_done(&nd); 1585 return (error); 1586 } 1587 1588 /* 1589 * Wait interruptibly for an exclusive lock. 1590 * 1591 * XXX 1592 * Several drivers do this; it should be abstracted and made MP-safe. 1593 */ 1594 static int 1595 ccdlock(struct ccd_softc *cs) 1596 { 1597 int error; 1598 1599 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1600 cs->sc_flags |= CCDF_WANTED; 1601 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1602 return (error); 1603 } 1604 cs->sc_flags |= CCDF_LOCKED; 1605 return (0); 1606 } 1607 1608 /* 1609 * Unlock and wake up any waiters. 1610 */ 1611 static void 1612 ccdunlock(struct ccd_softc *cs) 1613 { 1614 1615 cs->sc_flags &= ~CCDF_LOCKED; 1616 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1617 cs->sc_flags &= ~CCDF_WANTED; 1618 wakeup(cs); 1619 } 1620 } 1621 1622 #ifdef DEBUG 1623 static void 1624 printiinfo(struct ccdiinfo *ii) 1625 { 1626 int ix, i; 1627 1628 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1629 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1630 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1631 for (i = 0; i < ii->ii_ndisk; i++) 1632 kprintf(" %d", ii->ii_index[i]); 1633 kprintf("\n"); 1634 } 1635 } 1636 #endif 1637 1638 1639 /* Local Variables: */ 1640 /* c-argdecl-indent: 8 */ 1641 /* c-continued-statement-offset: 8 */ 1642 /* c-indent-level: 8 */ 1643 /* End: */ 1644