1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/ccdvar.h> 145 146 #include <vm/vm_zone.h> 147 148 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 149 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 150 151 #include <sys/thread2.h> 152 #include <sys/buf2.h> 153 #include <sys/mplock2.h> 154 155 #if defined(CCDDEBUG) && !defined(DEBUG) 156 #define DEBUG 157 #endif 158 159 #ifdef DEBUG 160 #define CCDB_FOLLOW 0x01 161 #define CCDB_INIT 0x02 162 #define CCDB_IO 0x04 163 #define CCDB_LABEL 0x08 164 #define CCDB_VNODE 0x10 165 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 166 CCDB_VNODE; 167 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 168 #undef DEBUG 169 #endif 170 171 #define ccdunit(x) dkunit(x) 172 #define ccdpart(x) dkpart(x) 173 174 /* 175 This is how mirroring works (only writes are special): 176 177 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 178 linked together by the cb_mirror field. "cb_pflags & 179 CCDPF_MIRROR_DONE" is set to 0 on both of them. 180 181 When a component returns to ccdiodone(), it checks if "cb_pflags & 182 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 183 flag and returns. If it is, it means its partner has already 184 returned, so it will go to the regular cleanup. 185 186 */ 187 188 struct ccdbuf { 189 struct buf cb_buf; /* new I/O buf */ 190 struct vnode *cb_vp; /* related vnode */ 191 struct bio *cb_obio; /* ptr. to original I/O buf */ 192 struct ccdbuf *cb_freenext; /* free list link */ 193 int cb_unit; /* target unit */ 194 int cb_comp; /* target component */ 195 int cb_pflags; /* mirror/parity status flag */ 196 struct ccdbuf *cb_mirror; /* mirror counterpart */ 197 }; 198 199 /* bits in cb_pflags */ 200 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 201 202 static d_open_t ccdopen; 203 static d_close_t ccdclose; 204 static d_strategy_t ccdstrategy; 205 static d_ioctl_t ccdioctl; 206 static d_dump_t ccddump; 207 208 #define NCCDFREEHIWAT 16 209 210 #define CDEV_MAJOR 74 211 212 static struct dev_ops ccd_ops = { 213 { "ccd", CDEV_MAJOR, D_DISK }, 214 .d_open = ccdopen, 215 .d_close = ccdclose, 216 .d_read = physread, 217 .d_write = physwrite, 218 .d_ioctl = ccdioctl, 219 .d_strategy = ccdstrategy, 220 .d_dump = ccddump 221 }; 222 223 /* called during module initialization */ 224 static void ccdattach (void); 225 static int ccddetach (void); 226 static int ccd_modevent (module_t, int, void *); 227 228 /* called by biodone() at interrupt time */ 229 static void ccdiodone (struct bio *bio); 230 231 static void ccdstart (struct ccd_softc *, struct bio *); 232 static void ccdinterleave (struct ccd_softc *, int); 233 static void ccdintr (struct ccd_softc *, struct bio *); 234 static int ccdinit (struct ccddevice *, char **, struct ucred *); 235 static int ccdlookup (char *, struct vnode **); 236 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 237 struct bio *, off_t, caddr_t, long); 238 static int ccdlock (struct ccd_softc *); 239 static void ccdunlock (struct ccd_softc *); 240 241 #ifdef DEBUG 242 static void printiinfo (struct ccdiinfo *); 243 #endif 244 245 /* Non-private for the benefit of libkvm. */ 246 struct ccd_softc *ccd_softc; 247 struct ccddevice *ccddevs; 248 struct ccdbuf *ccdfreebufs; 249 static int numccdfreebufs; 250 static int numccd = 0; 251 252 /* 253 * getccdbuf() - Allocate and zero a ccd buffer. 254 * 255 * This routine is called at splbio(). 256 */ 257 258 static __inline 259 struct ccdbuf * 260 getccdbuf(void) 261 { 262 struct ccdbuf *cbp; 263 264 /* 265 * Allocate from freelist or malloc as necessary 266 */ 267 if ((cbp = ccdfreebufs) != NULL) { 268 ccdfreebufs = cbp->cb_freenext; 269 --numccdfreebufs; 270 reinitbufbio(&cbp->cb_buf); 271 } else { 272 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 273 initbufbio(&cbp->cb_buf); 274 } 275 276 /* 277 * independant struct buf initialization 278 */ 279 buf_dep_init(&cbp->cb_buf); 280 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 281 BUF_KERNPROC(&cbp->cb_buf); 282 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 283 284 return(cbp); 285 } 286 287 /* 288 * putccdbuf() - Free a ccd buffer. 289 * 290 * This routine is called at splbio(). 291 */ 292 293 static __inline 294 void 295 putccdbuf(struct ccdbuf *cbp) 296 { 297 BUF_UNLOCK(&cbp->cb_buf); 298 299 if (numccdfreebufs < NCCDFREEHIWAT) { 300 cbp->cb_freenext = ccdfreebufs; 301 ccdfreebufs = cbp; 302 ++numccdfreebufs; 303 } else { 304 uninitbufbio(&cbp->cb_buf); 305 kfree((caddr_t)cbp, M_DEVBUF); 306 } 307 } 308 309 /* 310 * Called by main() during pseudo-device attachment. All we need 311 * to do is allocate enough space for devices to be configured later, and 312 * add devsw entries. 313 */ 314 static void 315 ccdattach(void) 316 { 317 struct disk_info info; 318 struct ccd_softc *cs; 319 int i; 320 int num = NCCD; 321 322 if (num > 1) 323 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 324 else 325 kprintf("ccd0: Concatenated disk driver\n"); 326 327 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 328 M_WAITOK | M_ZERO); 329 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 330 M_WAITOK | M_ZERO); 331 numccd = num; 332 333 /* 334 * With normal disk devices the open simply fails if the media 335 * is not present. With CCD we have to be able to open the 336 * raw disk to use the ioctl's to set it up, so create a dummy 337 * disk info structure so dscheck() doesn't blow up. 338 */ 339 bzero(&info, sizeof(info)); 340 info.d_media_blksize = DEV_BSIZE; 341 342 for (i = 0; i < numccd; ++i) { 343 cs = &ccd_softc[i]; 344 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 345 cs->sc_dev->si_drv1 = cs; 346 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 347 disk_setdiskinfo(&cs->sc_disk, &info); 348 } 349 } 350 351 static int 352 ccddetach(void) 353 { 354 struct ccd_softc *cs; 355 struct dev_ioctl_args ioctl_args; 356 int i; 357 int error = 0; 358 int eval; 359 360 bzero(&ioctl_args, sizeof(ioctl_args)); 361 362 for (i = 0; i < numccd; ++i) { 363 cs = &ccd_softc[i]; 364 if (cs->sc_dev == NULL) 365 continue; 366 ioctl_args.a_head.a_dev = cs->sc_dev; 367 ioctl_args.a_cmd = CCDIOCCLR; 368 ioctl_args.a_fflag = FWRITE; 369 eval = ccdioctl(&ioctl_args); 370 if (eval && eval != ENXIO) { 371 kprintf("ccd%d: In use, cannot detach\n", i); 372 error = EBUSY; 373 } 374 } 375 if (error == 0) { 376 for (i = 0; i < numccd; ++i) { 377 cs = &ccd_softc[i]; 378 if (cs->sc_dev == NULL) 379 continue; 380 disk_destroy(&cs->sc_disk); 381 cs->sc_dev = NULL; 382 } 383 if (ccd_softc) 384 kfree(ccd_softc, M_DEVBUF); 385 if (ccddevs) 386 kfree(ccddevs, M_DEVBUF); 387 } 388 return (error); 389 } 390 391 static int 392 ccd_modevent(module_t mod, int type, void *data) 393 { 394 int error = 0; 395 396 switch (type) { 397 case MOD_LOAD: 398 ccdattach(); 399 break; 400 401 case MOD_UNLOAD: 402 error = ccddetach(); 403 break; 404 405 default: /* MOD_SHUTDOWN etc */ 406 break; 407 } 408 return (error); 409 } 410 411 DEV_MODULE(ccd, ccd_modevent, NULL); 412 413 static int 414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 415 { 416 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 417 struct ccdcinfo *ci = NULL; /* XXX */ 418 int ix; 419 struct vnode *vp; 420 u_int64_t skip; 421 u_int64_t size; 422 u_int64_t minsize; 423 int maxsecsize; 424 struct partinfo dpart; 425 struct ccdgeom *ccg = &cs->sc_geom; 426 char tmppath[MAXPATHLEN]; 427 int error = 0; 428 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 432 #endif 433 434 cs->sc_size = 0; 435 cs->sc_ileave = ccd->ccd_interleave; 436 cs->sc_nccdisks = ccd->ccd_ndev; 437 438 /* Allocate space for the component info. */ 439 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 440 M_DEVBUF, M_WAITOK); 441 cs->sc_maxiosize = MAXPHYS; 442 443 /* 444 * Verify that each component piece exists and record 445 * relevant information about it. 446 */ 447 maxsecsize = 0; 448 minsize = 0; 449 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 450 vp = ccd->ccd_vpp[ix]; 451 ci = &cs->sc_cinfo[ix]; 452 ci->ci_vp = vp; 453 454 /* 455 * Copy in the pathname of the component. 456 */ 457 bzero(tmppath, sizeof(tmppath)); /* sanity */ 458 if ((error = copyinstr(cpaths[ix], tmppath, 459 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 kprintf("ccd%d: can't copy path, error = %d\n", 463 ccd->ccd_unit, error); 464 #endif 465 goto fail; 466 } 467 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 468 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 469 470 ci->ci_dev = vn_todev(vp); 471 if (ci->ci_dev->si_iosize_max && 472 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 473 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 474 } 475 476 /* 477 * Get partition information for the component. 478 */ 479 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 480 cred, NULL); 481 if (error) { 482 #ifdef DEBUG 483 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 484 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 485 ccd->ccd_unit, ci->ci_path, error); 486 #endif 487 goto fail; 488 } 489 if (dpart.fstype != FS_CCD && 490 !kuuid_is_ccd(&dpart.fstype_uuid)) { 491 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 492 ccd->ccd_unit, ci->ci_path); 493 error = EFTYPE; 494 goto fail; 495 } 496 if (maxsecsize < dpart.media_blksize) 497 maxsecsize = dpart.media_blksize; 498 499 /* 500 * Skip a certain amount of storage at the beginning of 501 * the component to make sure we don't infringe on any 502 * reserved sectors. This is handled entirely by 503 * dpart.reserved_blocks but we also impose a minimum 504 * of 16 sectors for backwards compatibility. 505 */ 506 skip = 16; 507 if (skip < dpart.reserved_blocks) 508 skip = dpart.reserved_blocks; 509 size = dpart.media_blocks - skip; 510 511 /* 512 * Calculate the size, truncating to an interleave 513 * boundary if necessary. 514 */ 515 if (cs->sc_ileave > 1) 516 size -= size % cs->sc_ileave; 517 518 if ((int64_t)size <= 0) { 519 #ifdef DEBUG 520 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 521 kprintf("ccd%d: %s: size == 0\n", 522 ccd->ccd_unit, ci->ci_path); 523 #endif 524 error = ENODEV; 525 goto fail; 526 } 527 528 /* 529 * Calculate the smallest uniform component, used 530 * elsewhere. 531 */ 532 if (minsize == 0 || minsize > size) 533 minsize = size; 534 ci->ci_skip = skip; 535 ci->ci_size = size; 536 cs->sc_size += size; 537 } 538 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 539 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 540 541 /* 542 * Don't allow the interleave to be smaller than 543 * the biggest component sector. 544 */ 545 if ((cs->sc_ileave > 0) && 546 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 547 #ifdef DEBUG 548 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 549 kprintf("ccd%d: interleave must be at least %d\n", 550 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 551 #endif 552 error = EINVAL; 553 goto fail; 554 } 555 556 /* 557 * If uniform interleave is desired set all sizes to that of 558 * the smallest component. This will guarentee that a single 559 * interleave table is generated. 560 * 561 * Lost space must be taken into account when calculating the 562 * overall size. Half the space is lost when CCDF_MIRROR is 563 * specified. One disk is lost when CCDF_PARITY is specified. 564 */ 565 if (ccd->ccd_flags & CCDF_UNIFORM) { 566 for (ci = cs->sc_cinfo; 567 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 568 ci->ci_size = minsize; 569 } 570 if (ccd->ccd_flags & CCDF_MIRROR) { 571 /* 572 * Check to see if an even number of components 573 * have been specified. The interleave must also 574 * be non-zero in order for us to be able to 575 * guarentee the topology. 576 */ 577 if (cs->sc_nccdisks % 2) { 578 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 579 error = EINVAL; 580 goto fail; 581 } 582 if (cs->sc_ileave == 0) { 583 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 584 error = EINVAL; 585 goto fail; 586 } 587 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 588 } else if (ccd->ccd_flags & CCDF_PARITY) { 589 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 590 } else { 591 if (cs->sc_ileave == 0) { 592 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 593 error = EINVAL; 594 goto fail; 595 } 596 cs->sc_size = cs->sc_nccdisks * minsize; 597 } 598 } 599 600 /* 601 * Construct the interleave table. 602 */ 603 ccdinterleave(cs, ccd->ccd_unit); 604 605 /* 606 * Create pseudo-geometry based on 1MB cylinders. It's 607 * pretty close. 608 */ 609 ccg->ccg_secsize = maxsecsize; 610 ccg->ccg_ntracks = 1; 611 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 612 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 613 614 /* 615 * Add an devstat entry for this device. 616 */ 617 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 618 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 619 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 620 DEVSTAT_PRIORITY_ARRAY); 621 622 cs->sc_flags |= CCDF_INITED; 623 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 624 cs->sc_unit = ccd->ccd_unit; 625 return (0); 626 fail: 627 while (ci > cs->sc_cinfo) { 628 ci--; 629 kfree(ci->ci_path, M_DEVBUF); 630 } 631 kfree(cs->sc_cinfo, M_DEVBUF); 632 cs->sc_cinfo = NULL; 633 return (error); 634 } 635 636 static void 637 ccdinterleave(struct ccd_softc *cs, int unit) 638 { 639 struct ccdcinfo *ci, *smallci; 640 struct ccdiinfo *ii; 641 u_int64_t bn; 642 u_int64_t lbn; 643 u_int64_t size; 644 int icount; 645 int ix; 646 647 #ifdef DEBUG 648 if (ccddebug & CCDB_INIT) 649 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 650 #endif 651 652 /* 653 * Allocate an interleave table. The worst case occurs when each 654 * of N disks is of a different size, resulting in N interleave 655 * tables. 656 * 657 * Chances are this is too big, but we don't care. 658 */ 659 icount = cs->sc_nccdisks + 1; 660 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 661 M_DEVBUF, M_WAITOK|M_ZERO); 662 663 /* 664 * Trivial case: no interleave (actually interleave of disk size). 665 * Each table entry represents a single component in its entirety. 666 * 667 * An interleave of 0 may not be used with a mirror or parity setup. 668 */ 669 if (cs->sc_ileave == 0) { 670 bn = 0; 671 ii = cs->sc_itable; 672 673 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 674 /* Allocate space for ii_index. */ 675 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 676 ii->ii_ndisk = 1; 677 ii->ii_startblk = bn; 678 ii->ii_startoff = 0; 679 ii->ii_index[0] = ix; 680 bn += cs->sc_cinfo[ix].ci_size; 681 ii++; 682 } 683 ii->ii_ndisk = 0; 684 #ifdef DEBUG 685 if (ccddebug & CCDB_INIT) 686 printiinfo(cs->sc_itable); 687 #endif 688 return; 689 } 690 691 /* 692 * The following isn't fast or pretty; it doesn't have to be. 693 */ 694 size = 0; 695 bn = lbn = 0; 696 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 697 /* 698 * Allocate space for ii_index. We might allocate more then 699 * we use. 700 */ 701 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 702 M_DEVBUF, M_WAITOK); 703 704 /* 705 * Locate the smallest of the remaining components 706 */ 707 smallci = NULL; 708 ci = cs->sc_cinfo; 709 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 710 if (ci->ci_size > size && 711 (smallci == NULL || 712 ci->ci_size < smallci->ci_size)) { 713 smallci = ci; 714 } 715 ++ci; 716 } 717 718 /* 719 * Nobody left, all done 720 */ 721 if (smallci == NULL) { 722 ii->ii_ndisk = 0; 723 break; 724 } 725 726 /* 727 * Record starting logical block using an sc_ileave blocksize. 728 */ 729 ii->ii_startblk = bn / cs->sc_ileave; 730 731 /* 732 * Record starting component block using an sc_ileave 733 * blocksize. This value is relative to the beginning of 734 * a component disk. 735 */ 736 ii->ii_startoff = lbn; 737 738 /* 739 * Determine how many disks take part in this interleave 740 * and record their indices. 741 */ 742 ix = 0; 743 for (ci = cs->sc_cinfo; 744 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 745 if (ci->ci_size >= smallci->ci_size) { 746 ii->ii_index[ix++] = ci - cs->sc_cinfo; 747 } 748 } 749 ii->ii_ndisk = ix; 750 751 /* 752 * Adjust for loop 753 */ 754 bn += ix * (smallci->ci_size - size); 755 lbn = smallci->ci_size / cs->sc_ileave; 756 size = smallci->ci_size; 757 } 758 if (ii == &cs->sc_itable[icount]) 759 panic("ccdinterlave software bug! table exhausted"); 760 #ifdef DEBUG 761 if (ccddebug & CCDB_INIT) 762 printiinfo(cs->sc_itable); 763 #endif 764 } 765 766 /* ARGSUSED */ 767 static int 768 ccdopen(struct dev_open_args *ap) 769 { 770 cdev_t dev = ap->a_head.a_dev; 771 int unit = ccdunit(dev); 772 struct ccd_softc *cs; 773 int error = 0; 774 775 #ifdef DEBUG 776 if (ccddebug & CCDB_FOLLOW) 777 kprintf("ccdopen(%x, %x)\n", dev, flags); 778 #endif 779 if (unit >= numccd) 780 return (ENXIO); 781 cs = &ccd_softc[unit]; 782 783 if ((error = ccdlock(cs)) == 0) { 784 ccdunlock(cs); 785 } 786 return (error); 787 } 788 789 /* ARGSUSED */ 790 static int 791 ccdclose(struct dev_close_args *ap) 792 { 793 cdev_t dev = ap->a_head.a_dev; 794 int unit = ccdunit(dev); 795 struct ccd_softc *cs; 796 int error = 0; 797 798 #ifdef DEBUG 799 if (ccddebug & CCDB_FOLLOW) 800 kprintf("ccdclose(%x, %x)\n", dev, flags); 801 #endif 802 803 if (unit >= numccd) 804 return (ENXIO); 805 cs = &ccd_softc[unit]; 806 if ((error = ccdlock(cs)) == 0) { 807 ccdunlock(cs); 808 } 809 return (error); 810 } 811 812 static int 813 ccdstrategy(struct dev_strategy_args *ap) 814 { 815 cdev_t dev = ap->a_head.a_dev; 816 struct bio *bio = ap->a_bio; 817 int unit = ccdunit(dev); 818 struct bio *nbio; 819 struct buf *bp = bio->bio_buf; 820 struct ccd_softc *cs = &ccd_softc[unit]; 821 u_int64_t pbn; /* in sc_secsize chunks */ 822 u_int32_t sz; /* in sc_secsize chunks */ 823 824 #ifdef DEBUG 825 if (ccddebug & CCDB_FOLLOW) 826 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 827 #endif 828 if ((cs->sc_flags & CCDF_INITED) == 0) { 829 bp->b_error = ENXIO; 830 goto error; 831 } 832 833 /* If it's a nil transfer, wake up the top half now. */ 834 if (bp->b_bcount == 0) { 835 bp->b_resid = 0; 836 goto done; 837 } 838 839 /* 840 * Do bounds checking and adjust transfer. If there's an 841 * error, the bounds check will flag that for us. 842 */ 843 844 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 845 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 846 847 /* 848 * If out of bounds return an error. If the request goes 849 * past EOF, clip the request as appropriate. If exactly 850 * at EOF, return success (don't clip), but with 0 bytes 851 * of I/O. 852 * 853 * Mark EOF B_INVAL (just like bad), indicating that the 854 * contents of the buffer, if any, is invalid. 855 */ 856 if ((int64_t)pbn < 0) 857 goto bad; 858 if (pbn + sz > cs->sc_size) { 859 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 860 goto bad; 861 if (pbn == cs->sc_size) { 862 bp->b_resid = bp->b_bcount; 863 bp->b_flags |= B_INVAL; 864 goto done; 865 } 866 sz = (long)(cs->sc_size - pbn); 867 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 868 } 869 nbio = bio; 870 871 bp->b_resid = bp->b_bcount; 872 nbio->bio_driver_info = dev; 873 874 /* 875 * "Start" the unit. 876 */ 877 crit_enter(); 878 ccdstart(cs, nbio); 879 crit_exit(); 880 return(0); 881 882 /* 883 * note: bio, not nbio, is valid at the done label. 884 */ 885 bad: 886 bp->b_error = EINVAL; 887 error: 888 bp->b_resid = bp->b_bcount; 889 bp->b_flags |= B_ERROR | B_INVAL; 890 done: 891 biodone(bio); 892 return(0); 893 } 894 895 static void 896 ccdstart(struct ccd_softc *cs, struct bio *bio) 897 { 898 long bcount, rcount; 899 struct ccdbuf *cbp[4]; 900 struct buf *bp = bio->bio_buf; 901 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 902 caddr_t addr; 903 off_t doffset; 904 905 #ifdef DEBUG 906 if (ccddebug & CCDB_FOLLOW) 907 kprintf("ccdstart(%x, %x)\n", cs, bp); 908 #endif 909 910 /* Record the transaction start */ 911 devstat_start_transaction(&cs->device_stats); 912 913 /* 914 * Allocate component buffers and fire off the requests 915 */ 916 doffset = bio->bio_offset; 917 addr = bp->b_data; 918 919 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 920 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 921 rcount = cbp[0]->cb_buf.b_bcount; 922 923 if (cs->sc_cflags & CCDF_MIRROR) { 924 /* 925 * Mirroring. Writes go to both disks, reads are 926 * taken from whichever disk seems most appropriate. 927 * 928 * We attempt to localize reads to the disk whos arm 929 * is nearest the read request. We ignore seeks due 930 * to writes when making this determination and we 931 * also try to avoid hogging. 932 */ 933 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 934 vn_strategy(cbp[0]->cb_vp, 935 &cbp[0]->cb_buf.b_bio1); 936 vn_strategy(cbp[1]->cb_vp, 937 &cbp[1]->cb_buf.b_bio1); 938 } else { 939 int pick = cs->sc_pick; 940 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 941 if (doffset < cs->sc_blk[pick] - range || 942 doffset > cs->sc_blk[pick] + range 943 ) { 944 cs->sc_pick = pick = 1 - pick; 945 } 946 cs->sc_blk[pick] = doffset + rcount; 947 vn_strategy(cbp[pick]->cb_vp, 948 &cbp[pick]->cb_buf.b_bio1); 949 } 950 } else { 951 /* 952 * Not mirroring 953 */ 954 vn_strategy(cbp[0]->cb_vp, 955 &cbp[0]->cb_buf.b_bio1); 956 } 957 doffset += rcount; 958 addr += rcount; 959 } 960 } 961 962 /* 963 * Build a component buffer header. 964 */ 965 static void 966 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 967 off_t doffset, caddr_t addr, long bcount) 968 { 969 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 970 struct ccdbuf *cbp; 971 u_int64_t bn; 972 u_int64_t cbn; 973 u_int64_t cboff; 974 off_t cbc; 975 976 #ifdef DEBUG 977 if (ccddebug & CCDB_IO) 978 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 979 cs, bp, bn, addr, bcount); 980 #endif 981 /* 982 * Determine which component bn falls in. 983 */ 984 bn = doffset / cs->sc_geom.ccg_secsize; 985 cbn = bn; 986 cboff = 0; 987 988 if (cs->sc_ileave == 0) { 989 /* 990 * Serially concatenated and neither a mirror nor a parity 991 * config. This is a special case. 992 */ 993 daddr_t sblk; 994 995 sblk = 0; 996 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 997 sblk += ci->ci_size; 998 cbn -= sblk; 999 } else { 1000 struct ccdiinfo *ii; 1001 int ccdisk, off; 1002 1003 /* 1004 * Calculate cbn, the logical superblock (sc_ileave chunks), 1005 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1006 * to cbn. 1007 */ 1008 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1009 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1010 1011 /* 1012 * Figure out which interleave table to use. 1013 */ 1014 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1015 if (ii->ii_startblk > cbn) 1016 break; 1017 } 1018 ii--; 1019 1020 /* 1021 * off is the logical superblock relative to the beginning 1022 * of this interleave block. 1023 */ 1024 off = cbn - ii->ii_startblk; 1025 1026 /* 1027 * We must calculate which disk component to use (ccdisk), 1028 * and recalculate cbn to be the superblock relative to 1029 * the beginning of the component. This is typically done by 1030 * adding 'off' and ii->ii_startoff together. However, 'off' 1031 * must typically be divided by the number of components in 1032 * this interleave array to be properly convert it from a 1033 * CCD-relative logical superblock number to a 1034 * component-relative superblock number. 1035 */ 1036 if (ii->ii_ndisk == 1) { 1037 /* 1038 * When we have just one disk, it can't be a mirror 1039 * or a parity config. 1040 */ 1041 ccdisk = ii->ii_index[0]; 1042 cbn = ii->ii_startoff + off; 1043 } else { 1044 if (cs->sc_cflags & CCDF_MIRROR) { 1045 /* 1046 * We have forced a uniform mapping, resulting 1047 * in a single interleave array. We double 1048 * up on the first half of the available 1049 * components and our mirror is in the second 1050 * half. This only works with a single 1051 * interleave array because doubling up 1052 * doubles the number of sectors, so there 1053 * cannot be another interleave array because 1054 * the next interleave array's calculations 1055 * would be off. 1056 */ 1057 int ndisk2 = ii->ii_ndisk / 2; 1058 ccdisk = ii->ii_index[off % ndisk2]; 1059 cbn = ii->ii_startoff + off / ndisk2; 1060 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1061 } else if (cs->sc_cflags & CCDF_PARITY) { 1062 /* 1063 * XXX not implemented yet 1064 */ 1065 int ndisk2 = ii->ii_ndisk - 1; 1066 ccdisk = ii->ii_index[off % ndisk2]; 1067 cbn = ii->ii_startoff + off / ndisk2; 1068 if (cbn % ii->ii_ndisk <= ccdisk) 1069 ccdisk++; 1070 } else { 1071 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1072 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1073 } 1074 } 1075 1076 ci = &cs->sc_cinfo[ccdisk]; 1077 1078 /* 1079 * Convert cbn from a superblock to a normal block so it 1080 * can be used to calculate (along with cboff) the normal 1081 * block index into this particular disk. 1082 */ 1083 cbn *= cs->sc_ileave; 1084 } 1085 1086 /* 1087 * Fill in the component buf structure. 1088 * 1089 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1090 * will be truncated on device EOF so we use b_bufsize to detect 1091 * the case. 1092 */ 1093 cbp = getccdbuf(); 1094 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1095 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1096 cbp->cb_buf.b_data = addr; 1097 cbp->cb_vp = ci->ci_vp; 1098 if (cs->sc_ileave == 0) 1099 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1100 else 1101 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1102 if (cbc > cs->sc_maxiosize) 1103 cbc = cs->sc_maxiosize; 1104 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1105 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1106 1107 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1108 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1109 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1110 1111 /* 1112 * context for ccdiodone 1113 */ 1114 cbp->cb_obio = bio; 1115 cbp->cb_unit = cs - ccd_softc; 1116 cbp->cb_comp = ci - cs->sc_cinfo; 1117 1118 #ifdef DEBUG 1119 if (ccddebug & CCDB_IO) 1120 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1121 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1122 cbp->cb_buf.b_bio1.bio_offset, 1123 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1124 #endif 1125 cb[0] = cbp; 1126 1127 /* 1128 * Note: both I/O's setup when reading from mirror, but only one 1129 * will be executed. 1130 */ 1131 if (cs->sc_cflags & CCDF_MIRROR) { 1132 /* mirror, setup second I/O */ 1133 cbp = getccdbuf(); 1134 1135 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1136 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1137 cbp->cb_buf.b_data = addr; 1138 cbp->cb_vp = ci2->ci_vp; 1139 if (cs->sc_ileave == 0) 1140 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1141 else 1142 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1143 if (cbc > cs->sc_maxiosize) 1144 cbc = cs->sc_maxiosize; 1145 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1146 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1147 1148 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1149 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1150 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1151 1152 /* 1153 * context for ccdiodone 1154 */ 1155 cbp->cb_obio = bio; 1156 cbp->cb_unit = cs - ccd_softc; 1157 cbp->cb_comp = ci2 - cs->sc_cinfo; 1158 cb[1] = cbp; 1159 /* link together the ccdbuf's and clear "mirror done" flag */ 1160 cb[0]->cb_mirror = cb[1]; 1161 cb[1]->cb_mirror = cb[0]; 1162 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1163 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1164 } 1165 } 1166 1167 static void 1168 ccdintr(struct ccd_softc *cs, struct bio *bio) 1169 { 1170 struct buf *bp = bio->bio_buf; 1171 1172 #ifdef DEBUG 1173 if (ccddebug & CCDB_FOLLOW) 1174 kprintf("ccdintr(%x, %x)\n", cs, bp); 1175 #endif 1176 /* 1177 * Request is done for better or worse, wakeup the top half. 1178 */ 1179 if (bp->b_flags & B_ERROR) 1180 bp->b_resid = bp->b_bcount; 1181 devstat_end_transaction_buf(&cs->device_stats, bp); 1182 biodone(bio); 1183 } 1184 1185 /* 1186 * Called at interrupt time. 1187 * 1188 * Mark the component as done and if all components are done, 1189 * take a ccd interrupt. 1190 */ 1191 static void 1192 ccdiodone(struct bio *bio) 1193 { 1194 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1195 struct bio *obio = cbp->cb_obio; 1196 struct buf *obp = obio->bio_buf; 1197 int unit = cbp->cb_unit; 1198 int count; 1199 1200 /* 1201 * Since we do not have exclusive access to underlying devices, 1202 * we can't keep cache translations around. 1203 */ 1204 clearbiocache(bio->bio_next); 1205 1206 get_mplock(); 1207 crit_enter(); 1208 #ifdef DEBUG 1209 if (ccddebug & CCDB_FOLLOW) 1210 kprintf("ccdiodone(%x)\n", cbp); 1211 if (ccddebug & CCDB_IO) { 1212 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1213 obp, obp->b_bcount, obp->b_resid); 1214 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1215 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1216 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1217 cbp->cb_buf.b_bcount); 1218 } 1219 #endif 1220 1221 /* 1222 * If an error occured, report it. If this is a mirrored 1223 * configuration and the first of two possible reads, do not 1224 * set the error in the bp yet because the second read may 1225 * succeed. 1226 */ 1227 if (cbp->cb_buf.b_flags & B_ERROR) { 1228 const char *msg = ""; 1229 1230 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1231 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1232 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1233 /* 1234 * We will try our read on the other disk down 1235 * below, also reverse the default pick so if we 1236 * are doing a scan we do not keep hitting the 1237 * bad disk first. 1238 */ 1239 struct ccd_softc *cs = &ccd_softc[unit]; 1240 1241 msg = ", trying other disk"; 1242 cs->sc_pick = 1 - cs->sc_pick; 1243 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1244 } else { 1245 obp->b_flags |= B_ERROR; 1246 obp->b_error = cbp->cb_buf.b_error ? 1247 cbp->cb_buf.b_error : EIO; 1248 } 1249 kprintf("ccd%d: error %d on component %d " 1250 "offset %jd (ccd offset %jd)%s\n", 1251 unit, obp->b_error, cbp->cb_comp, 1252 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1253 (intmax_t)obio->bio_offset, 1254 msg); 1255 } 1256 1257 /* 1258 * Process mirror. If we are writing, I/O has been initiated on both 1259 * buffers and we fall through only after both are finished. 1260 * 1261 * If we are reading only one I/O is initiated at a time. If an 1262 * error occurs we initiate the second I/O and return, otherwise 1263 * we free the second I/O without initiating it. 1264 */ 1265 1266 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1267 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1268 /* 1269 * When writing, handshake with the second buffer 1270 * to determine when both are done. If both are not 1271 * done, return here. 1272 */ 1273 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1274 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1275 putccdbuf(cbp); 1276 crit_exit(); 1277 rel_mplock(); 1278 return; 1279 } 1280 } else { 1281 /* 1282 * When reading, either dispose of the second buffer 1283 * or initiate I/O on the second buffer if an error 1284 * occured with this one. 1285 */ 1286 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1287 if (cbp->cb_buf.b_flags & B_ERROR) { 1288 cbp->cb_mirror->cb_pflags |= 1289 CCDPF_MIRROR_DONE; 1290 vn_strategy( 1291 cbp->cb_mirror->cb_vp, 1292 &cbp->cb_mirror->cb_buf.b_bio1 1293 ); 1294 putccdbuf(cbp); 1295 crit_exit(); 1296 rel_mplock(); 1297 return; 1298 } else { 1299 putccdbuf(cbp->cb_mirror); 1300 /* fall through */ 1301 } 1302 } 1303 } 1304 } 1305 1306 /* 1307 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1308 */ 1309 count = cbp->cb_buf.b_bufsize; 1310 putccdbuf(cbp); 1311 1312 /* 1313 * If all done, "interrupt". 1314 */ 1315 obp->b_resid -= count; 1316 if (obp->b_resid < 0) 1317 panic("ccdiodone: count"); 1318 if (obp->b_resid == 0) 1319 ccdintr(&ccd_softc[unit], obio); 1320 crit_exit(); 1321 rel_mplock(); 1322 } 1323 1324 static int 1325 ccdioctl(struct dev_ioctl_args *ap) 1326 { 1327 cdev_t dev = ap->a_head.a_dev; 1328 int unit = ccdunit(dev); 1329 int i, j, lookedup = 0, error = 0; 1330 struct ccd_softc *cs; 1331 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1332 struct ccddevice ccd; 1333 struct disk_info info; 1334 char **cpp; 1335 struct vnode **vpp; 1336 1337 if (unit >= numccd) 1338 return (ENXIO); 1339 cs = &ccd_softc[unit]; 1340 1341 bzero(&ccd, sizeof(ccd)); 1342 1343 switch (ap->a_cmd) { 1344 case CCDIOCSET: 1345 if (cs->sc_flags & CCDF_INITED) 1346 return (EBUSY); 1347 1348 if ((ap->a_fflag & FWRITE) == 0) 1349 return (EBADF); 1350 1351 if ((error = ccdlock(cs)) != 0) 1352 return (error); 1353 1354 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1355 ccdunlock(cs); 1356 return (EINVAL); 1357 } 1358 1359 /* Fill in some important bits. */ 1360 ccd.ccd_unit = unit; 1361 ccd.ccd_interleave = ccio->ccio_ileave; 1362 if (ccd.ccd_interleave == 0 && 1363 ((ccio->ccio_flags & CCDF_MIRROR) || 1364 (ccio->ccio_flags & CCDF_PARITY))) { 1365 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1366 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1367 } 1368 if ((ccio->ccio_flags & CCDF_MIRROR) && 1369 (ccio->ccio_flags & CCDF_PARITY)) { 1370 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1371 ccio->ccio_flags &= ~CCDF_PARITY; 1372 } 1373 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1374 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1375 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1376 unit); 1377 ccio->ccio_flags |= CCDF_UNIFORM; 1378 } 1379 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1380 1381 /* 1382 * Allocate space for and copy in the array of 1383 * componet pathnames and device numbers. 1384 */ 1385 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1386 M_DEVBUF, M_WAITOK); 1387 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1388 M_DEVBUF, M_WAITOK); 1389 1390 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1391 ccio->ccio_ndisks * sizeof(char **)); 1392 if (error) { 1393 kfree(vpp, M_DEVBUF); 1394 kfree(cpp, M_DEVBUF); 1395 ccdunlock(cs); 1396 return (error); 1397 } 1398 1399 #ifdef DEBUG 1400 if (ccddebug & CCDB_INIT) { 1401 for (i = 0; i < ccio->ccio_ndisks; ++i) 1402 kprintf("ccdioctl: component %d: 0x%x\n", 1403 i, cpp[i]); 1404 } 1405 #endif 1406 1407 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1408 #ifdef DEBUG 1409 if (ccddebug & CCDB_INIT) 1410 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1411 #endif 1412 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1413 for (j = 0; j < lookedup; ++j) 1414 (void)vn_close(vpp[j], FREAD|FWRITE); 1415 kfree(vpp, M_DEVBUF); 1416 kfree(cpp, M_DEVBUF); 1417 ccdunlock(cs); 1418 return (error); 1419 } 1420 ++lookedup; 1421 } 1422 ccd.ccd_cpp = cpp; 1423 ccd.ccd_vpp = vpp; 1424 ccd.ccd_ndev = ccio->ccio_ndisks; 1425 1426 /* 1427 * Initialize the ccd. Fills in the softc for us. 1428 */ 1429 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1430 for (j = 0; j < lookedup; ++j) 1431 (void)vn_close(vpp[j], FREAD|FWRITE); 1432 kfree(vpp, M_DEVBUF); 1433 kfree(cpp, M_DEVBUF); 1434 ccdunlock(cs); 1435 return (error); 1436 } 1437 1438 /* 1439 * The ccd has been successfully initialized, so 1440 * we can place it into the array and read the disklabel. 1441 */ 1442 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1443 ccio->ccio_unit = unit; 1444 ccio->ccio_size = cs->sc_size; 1445 1446 bzero(&info, sizeof(info)); 1447 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1448 info.d_media_blocks = cs->sc_size; 1449 info.d_nheads = cs->sc_geom.ccg_ntracks; 1450 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1451 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1452 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1453 1454 /* 1455 * For cases where a label is directly applied to the ccd, 1456 * without slices, DSO_COMPATMBR forces one sector be 1457 * reserved for backwards compatibility. 1458 */ 1459 info.d_dsflags = DSO_COMPATMBR; 1460 disk_setdiskinfo(&cs->sc_disk, &info); 1461 1462 ccdunlock(cs); 1463 1464 break; 1465 1466 case CCDIOCCLR: 1467 if ((cs->sc_flags & CCDF_INITED) == 0) 1468 return (ENXIO); 1469 1470 if ((ap->a_fflag & FWRITE) == 0) 1471 return (EBADF); 1472 1473 if ((error = ccdlock(cs)) != 0) 1474 return (error); 1475 1476 if (dev_drefs(cs->sc_dev) > 1) { 1477 ccdunlock(cs); 1478 return (EBUSY); 1479 } 1480 1481 /* 1482 * Free ccd_softc information and clear entry. 1483 */ 1484 1485 /* Close the components and free their pathnames. */ 1486 for (i = 0; i < cs->sc_nccdisks; ++i) { 1487 /* 1488 * XXX: this close could potentially fail and 1489 * cause Bad Things. Maybe we need to force 1490 * the close to happen? 1491 */ 1492 #ifdef DEBUG 1493 if (ccddebug & CCDB_VNODE) 1494 vprint("CCDIOCCLR: vnode info", 1495 cs->sc_cinfo[i].ci_vp); 1496 #endif 1497 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1498 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1499 } 1500 1501 /* Free interleave index. */ 1502 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1503 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1504 1505 /* Free component info and interleave table. */ 1506 kfree(cs->sc_cinfo, M_DEVBUF); 1507 kfree(cs->sc_itable, M_DEVBUF); 1508 cs->sc_cinfo = NULL; 1509 cs->sc_itable = NULL; 1510 cs->sc_flags &= ~CCDF_INITED; 1511 1512 /* 1513 * Free ccddevice information and clear entry. 1514 */ 1515 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1516 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1517 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1518 1519 /* 1520 * And remove the devstat entry. 1521 */ 1522 devstat_remove_entry(&cs->device_stats); 1523 1524 /* This must be atomic. */ 1525 crit_enter(); 1526 ccdunlock(cs); 1527 crit_exit(); 1528 1529 break; 1530 1531 default: 1532 return (ENOTTY); 1533 } 1534 1535 return (0); 1536 } 1537 1538 static int 1539 ccddump(struct dev_dump_args *ap) 1540 { 1541 /* Not implemented. */ 1542 return ENXIO; 1543 } 1544 1545 /* 1546 * Lookup the provided name in the filesystem. If the file exists, 1547 * is a valid block device, and isn't being used by anyone else, 1548 * set *vpp to the file's vnode. 1549 */ 1550 static int 1551 ccdlookup(char *path, struct vnode **vpp) 1552 { 1553 struct nlookupdata nd; 1554 struct vnode *vp; 1555 int error; 1556 1557 *vpp = NULL; 1558 1559 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1560 if (error) 1561 return (error); 1562 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1563 #ifdef DEBUG 1564 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1565 kprintf("ccdlookup: vn_open error = %d\n", error); 1566 #endif 1567 goto done; 1568 } 1569 vp = nd.nl_open_vp; 1570 1571 if (vp->v_opencount > 1) { 1572 error = EBUSY; 1573 goto done; 1574 } 1575 1576 if (!vn_isdisk(vp, &error)) 1577 goto done; 1578 1579 #ifdef DEBUG 1580 if (ccddebug & CCDB_VNODE) 1581 vprint("ccdlookup: vnode info", vp); 1582 #endif 1583 1584 vn_unlock(vp); 1585 nd.nl_open_vp = NULL; 1586 nlookup_done(&nd); 1587 *vpp = vp; /* leave ref intact */ 1588 return (0); 1589 done: 1590 nlookup_done(&nd); 1591 return (error); 1592 } 1593 1594 /* 1595 * Wait interruptibly for an exclusive lock. 1596 * 1597 * XXX 1598 * Several drivers do this; it should be abstracted and made MP-safe. 1599 */ 1600 static int 1601 ccdlock(struct ccd_softc *cs) 1602 { 1603 int error; 1604 1605 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1606 cs->sc_flags |= CCDF_WANTED; 1607 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1608 return (error); 1609 } 1610 cs->sc_flags |= CCDF_LOCKED; 1611 return (0); 1612 } 1613 1614 /* 1615 * Unlock and wake up any waiters. 1616 */ 1617 static void 1618 ccdunlock(struct ccd_softc *cs) 1619 { 1620 1621 cs->sc_flags &= ~CCDF_LOCKED; 1622 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1623 cs->sc_flags &= ~CCDF_WANTED; 1624 wakeup(cs); 1625 } 1626 } 1627 1628 #ifdef DEBUG 1629 static void 1630 printiinfo(struct ccdiinfo *ii) 1631 { 1632 int ix, i; 1633 1634 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1635 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1636 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1637 for (i = 0; i < ii->ii_ndisk; i++) 1638 kprintf(" %d", ii->ii_index[i]); 1639 kprintf("\n"); 1640 } 1641 } 1642 #endif 1643 1644 1645 /* Local Variables: */ 1646 /* c-argdecl-indent: 8 */ 1647 /* c-continued-statement-offset: 8 */ 1648 /* c-indent-level: 8 */ 1649 /* End: */ 1650