1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.25 2006/04/03 02:02:32 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 #include <sys/buf2.h> 109 #include <sys/ccdvar.h> 110 111 #include <vm/vm_zone.h> 112 113 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 114 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 115 116 #include <sys/thread2.h> 117 118 #if defined(CCDDEBUG) && !defined(DEBUG) 119 #define DEBUG 120 #endif 121 122 #ifdef DEBUG 123 #define CCDB_FOLLOW 0x01 124 #define CCDB_INIT 0x02 125 #define CCDB_IO 0x04 126 #define CCDB_LABEL 0x08 127 #define CCDB_VNODE 0x10 128 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 129 CCDB_VNODE; 130 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 131 #undef DEBUG 132 #endif 133 134 #define ccdunit(x) dkunit(x) 135 #define ccdpart(x) dkpart(x) 136 137 /* 138 This is how mirroring works (only writes are special): 139 140 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 141 linked together by the cb_mirror field. "cb_pflags & 142 CCDPF_MIRROR_DONE" is set to 0 on both of them. 143 144 When a component returns to ccdiodone(), it checks if "cb_pflags & 145 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 146 flag and returns. If it is, it means its partner has already 147 returned, so it will go to the regular cleanup. 148 149 */ 150 151 struct ccdbuf { 152 struct buf cb_buf; /* new I/O buf */ 153 struct bio *cb_obio; /* ptr. to original I/O buf */ 154 struct ccdbuf *cb_freenext; /* free list link */ 155 int cb_unit; /* target unit */ 156 int cb_comp; /* target component */ 157 int cb_pflags; /* mirror/parity status flag */ 158 struct ccdbuf *cb_mirror; /* mirror counterpart */ 159 }; 160 161 /* bits in cb_pflags */ 162 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 163 164 #define CCDLABELDEV(dev) \ 165 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 166 167 static d_open_t ccdopen; 168 static d_close_t ccdclose; 169 static d_strategy_t ccdstrategy; 170 static d_ioctl_t ccdioctl; 171 static d_dump_t ccddump; 172 static d_psize_t ccdsize; 173 174 #define NCCDFREEHIWAT 16 175 176 #define CDEV_MAJOR 74 177 178 static struct cdevsw ccd_cdevsw = { 179 /* name */ "ccd", 180 /* maj */ CDEV_MAJOR, 181 /* flags */ D_DISK, 182 /* port */ NULL, 183 /* clone */ NULL, 184 185 /* open */ ccdopen, 186 /* close */ ccdclose, 187 /* read */ physread, 188 /* write */ physwrite, 189 /* ioctl */ ccdioctl, 190 /* poll */ nopoll, 191 /* mmap */ nommap, 192 /* strategy */ ccdstrategy, 193 /* dump */ ccddump, 194 /* psize */ ccdsize 195 }; 196 197 /* called during module initialization */ 198 static void ccdattach (void); 199 static int ccd_modevent (module_t, int, void *); 200 201 /* called by biodone() at interrupt time */ 202 static void ccdiodone (struct bio *bio); 203 204 static void ccdstart (struct ccd_softc *, struct bio *); 205 static void ccdinterleave (struct ccd_softc *, int); 206 static void ccdintr (struct ccd_softc *, struct bio *); 207 static int ccdinit (struct ccddevice *, char **, struct thread *); 208 static int ccdlookup (char *, struct thread *td, struct vnode **); 209 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 210 struct bio *, off_t, caddr_t, long); 211 static void ccdgetdisklabel (dev_t); 212 static void ccdmakedisklabel (struct ccd_softc *); 213 static int ccdlock (struct ccd_softc *); 214 static void ccdunlock (struct ccd_softc *); 215 216 #ifdef DEBUG 217 static void printiinfo (struct ccdiinfo *); 218 #endif 219 220 /* Non-private for the benefit of libkvm. */ 221 struct ccd_softc *ccd_softc; 222 struct ccddevice *ccddevs; 223 struct ccdbuf *ccdfreebufs; 224 static int numccdfreebufs; 225 static int numccd = 0; 226 227 /* 228 * getccdbuf() - Allocate and zero a ccd buffer. 229 * 230 * This routine is called at splbio(). 231 */ 232 233 static __inline 234 struct ccdbuf * 235 getccdbuf(void) 236 { 237 struct ccdbuf *cbp; 238 239 /* 240 * Allocate from freelist or malloc as necessary 241 */ 242 if ((cbp = ccdfreebufs) != NULL) { 243 ccdfreebufs = cbp->cb_freenext; 244 --numccdfreebufs; 245 reinitbufbio(&cbp->cb_buf); 246 } else { 247 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 248 initbufbio(&cbp->cb_buf); 249 } 250 251 /* 252 * independant struct buf initialization 253 */ 254 LIST_INIT(&cbp->cb_buf.b_dep); 255 BUF_LOCKINIT(&cbp->cb_buf); 256 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 257 BUF_KERNPROC(&cbp->cb_buf); 258 259 return(cbp); 260 } 261 262 /* 263 * putccdbuf() - Free a ccd buffer. 264 * 265 * This routine is called at splbio(). 266 */ 267 268 static __inline 269 void 270 putccdbuf(struct ccdbuf *cbp) 271 { 272 BUF_UNLOCK(&cbp->cb_buf); 273 BUF_LOCKFREE(&cbp->cb_buf); 274 275 if (numccdfreebufs < NCCDFREEHIWAT) { 276 cbp->cb_freenext = ccdfreebufs; 277 ccdfreebufs = cbp; 278 ++numccdfreebufs; 279 } else { 280 free((caddr_t)cbp, M_DEVBUF); 281 } 282 } 283 284 285 /* 286 * Number of blocks to untouched in front of a component partition. 287 * This is to avoid violating its disklabel area when it starts at the 288 * beginning of the slice. 289 */ 290 #if !defined(CCD_OFFSET) 291 #define CCD_OFFSET 16 292 #endif 293 294 /* 295 * Called by main() during pseudo-device attachment. All we need 296 * to do is allocate enough space for devices to be configured later, and 297 * add devsw entries. 298 */ 299 static void 300 ccdattach(void) 301 { 302 int i; 303 int num = NCCD; 304 305 if (num > 1) 306 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 307 else 308 printf("ccd0: Concatenated disk driver\n"); 309 310 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 311 M_WAITOK | M_ZERO); 312 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 313 M_WAITOK | M_ZERO); 314 numccd = num; 315 316 cdevsw_add(&ccd_cdevsw, 0, 0); 317 /* XXX: is this necessary? */ 318 for (i = 0; i < numccd; ++i) 319 ccddevs[i].ccd_dk = -1; 320 } 321 322 static int 323 ccd_modevent(module_t mod, int type, void *data) 324 { 325 int error = 0; 326 327 switch (type) { 328 case MOD_LOAD: 329 ccdattach(); 330 break; 331 332 case MOD_UNLOAD: 333 printf("ccd0: Unload not supported!\n"); 334 error = EOPNOTSUPP; 335 break; 336 337 default: /* MOD_SHUTDOWN etc */ 338 break; 339 } 340 return (error); 341 } 342 343 DEV_MODULE(ccd, ccd_modevent, NULL); 344 345 static int 346 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 347 { 348 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 349 struct ccdcinfo *ci = NULL; /* XXX */ 350 size_t size; 351 int ix; 352 struct vnode *vp; 353 size_t minsize; 354 int maxsecsize; 355 struct partinfo dpart; 356 struct ccdgeom *ccg = &cs->sc_geom; 357 char tmppath[MAXPATHLEN]; 358 int error = 0; 359 struct ucred *cred; 360 361 KKASSERT(td->td_proc); 362 cred = td->td_proc->p_ucred; 363 364 #ifdef DEBUG 365 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 366 printf("ccdinit: unit %d\n", ccd->ccd_unit); 367 #endif 368 369 cs->sc_size = 0; 370 cs->sc_ileave = ccd->ccd_interleave; 371 cs->sc_nccdisks = ccd->ccd_ndev; 372 373 /* Allocate space for the component info. */ 374 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 375 M_DEVBUF, M_WAITOK); 376 377 /* 378 * Verify that each component piece exists and record 379 * relevant information about it. 380 */ 381 maxsecsize = 0; 382 minsize = 0; 383 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 384 vp = ccd->ccd_vpp[ix]; 385 ci = &cs->sc_cinfo[ix]; 386 ci->ci_vp = vp; 387 388 /* 389 * Copy in the pathname of the component. 390 */ 391 bzero(tmppath, sizeof(tmppath)); /* sanity */ 392 if ((error = copyinstr(cpaths[ix], tmppath, 393 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 394 #ifdef DEBUG 395 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 396 printf("ccd%d: can't copy path, error = %d\n", 397 ccd->ccd_unit, error); 398 #endif 399 goto fail; 400 } 401 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 402 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 403 404 ci->ci_dev = vn_todev(vp); 405 406 /* 407 * Get partition information for the component. 408 */ 409 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 410 FREAD, cred, td)) != 0) { 411 #ifdef DEBUG 412 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 413 printf("ccd%d: %s: ioctl failed, error = %d\n", 414 ccd->ccd_unit, ci->ci_path, error); 415 #endif 416 goto fail; 417 } 418 if (dpart.part->p_fstype == FS_BSDFFS) { 419 maxsecsize = 420 ((dpart.disklab->d_secsize > maxsecsize) ? 421 dpart.disklab->d_secsize : maxsecsize); 422 size = dpart.part->p_size - CCD_OFFSET; 423 } else { 424 #ifdef DEBUG 425 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 426 printf("ccd%d: %s: incorrect partition type\n", 427 ccd->ccd_unit, ci->ci_path); 428 #endif 429 error = EFTYPE; 430 goto fail; 431 } 432 433 /* 434 * Calculate the size, truncating to an interleave 435 * boundary if necessary. 436 */ 437 438 if (cs->sc_ileave > 1) 439 size -= size % cs->sc_ileave; 440 441 if (size == 0) { 442 #ifdef DEBUG 443 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 444 printf("ccd%d: %s: size == 0\n", 445 ccd->ccd_unit, ci->ci_path); 446 #endif 447 error = ENODEV; 448 goto fail; 449 } 450 451 if (minsize == 0 || size < minsize) 452 minsize = size; 453 ci->ci_size = size; 454 cs->sc_size += size; 455 } 456 457 /* 458 * Don't allow the interleave to be smaller than 459 * the biggest component sector. 460 */ 461 if ((cs->sc_ileave > 0) && 462 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 463 #ifdef DEBUG 464 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 465 printf("ccd%d: interleave must be at least %d\n", 466 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 467 #endif 468 error = EINVAL; 469 goto fail; 470 } 471 472 /* 473 * If uniform interleave is desired set all sizes to that of 474 * the smallest component. This will guarentee that a single 475 * interleave table is generated. 476 * 477 * Lost space must be taken into account when calculating the 478 * overall size. Half the space is lost when CCDF_MIRROR is 479 * specified. One disk is lost when CCDF_PARITY is specified. 480 */ 481 if (ccd->ccd_flags & CCDF_UNIFORM) { 482 for (ci = cs->sc_cinfo; 483 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 484 ci->ci_size = minsize; 485 } 486 if (ccd->ccd_flags & CCDF_MIRROR) { 487 /* 488 * Check to see if an even number of components 489 * have been specified. The interleave must also 490 * be non-zero in order for us to be able to 491 * guarentee the topology. 492 */ 493 if (cs->sc_nccdisks % 2) { 494 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 495 error = EINVAL; 496 goto fail; 497 } 498 if (cs->sc_ileave == 0) { 499 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 500 error = EINVAL; 501 goto fail; 502 } 503 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 504 } else if (ccd->ccd_flags & CCDF_PARITY) { 505 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 506 } else { 507 if (cs->sc_ileave == 0) { 508 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 509 error = EINVAL; 510 goto fail; 511 } 512 cs->sc_size = cs->sc_nccdisks * minsize; 513 } 514 } 515 516 /* 517 * Construct the interleave table. 518 */ 519 ccdinterleave(cs, ccd->ccd_unit); 520 521 /* 522 * Create pseudo-geometry based on 1MB cylinders. It's 523 * pretty close. 524 */ 525 ccg->ccg_secsize = maxsecsize; 526 ccg->ccg_ntracks = 1; 527 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 528 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 529 530 /* 531 * Add an devstat entry for this device. 532 */ 533 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 534 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 535 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 536 DEVSTAT_PRIORITY_ARRAY); 537 538 cs->sc_flags |= CCDF_INITED; 539 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 540 cs->sc_unit = ccd->ccd_unit; 541 return (0); 542 fail: 543 while (ci > cs->sc_cinfo) { 544 ci--; 545 free(ci->ci_path, M_DEVBUF); 546 } 547 free(cs->sc_cinfo, M_DEVBUF); 548 return (error); 549 } 550 551 static void 552 ccdinterleave(struct ccd_softc *cs, int unit) 553 { 554 struct ccdcinfo *ci, *smallci; 555 struct ccdiinfo *ii; 556 daddr_t bn, lbn; 557 int ix; 558 u_long size; 559 560 #ifdef DEBUG 561 if (ccddebug & CCDB_INIT) 562 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 563 #endif 564 565 /* 566 * Allocate an interleave table. The worst case occurs when each 567 * of N disks is of a different size, resulting in N interleave 568 * tables. 569 * 570 * Chances are this is too big, but we don't care. 571 */ 572 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 573 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 574 bzero((caddr_t)cs->sc_itable, size); 575 576 /* 577 * Trivial case: no interleave (actually interleave of disk size). 578 * Each table entry represents a single component in its entirety. 579 * 580 * An interleave of 0 may not be used with a mirror or parity setup. 581 */ 582 if (cs->sc_ileave == 0) { 583 bn = 0; 584 ii = cs->sc_itable; 585 586 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 587 /* Allocate space for ii_index. */ 588 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 589 ii->ii_ndisk = 1; 590 ii->ii_startblk = bn; 591 ii->ii_startoff = 0; 592 ii->ii_index[0] = ix; 593 bn += cs->sc_cinfo[ix].ci_size; 594 ii++; 595 } 596 ii->ii_ndisk = 0; 597 #ifdef DEBUG 598 if (ccddebug & CCDB_INIT) 599 printiinfo(cs->sc_itable); 600 #endif 601 return; 602 } 603 604 /* 605 * The following isn't fast or pretty; it doesn't have to be. 606 */ 607 size = 0; 608 bn = lbn = 0; 609 for (ii = cs->sc_itable; ; ii++) { 610 /* 611 * Allocate space for ii_index. We might allocate more then 612 * we use. 613 */ 614 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 615 M_DEVBUF, M_WAITOK); 616 617 /* 618 * Locate the smallest of the remaining components 619 */ 620 smallci = NULL; 621 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 622 ci++) { 623 if (ci->ci_size > size && 624 (smallci == NULL || 625 ci->ci_size < smallci->ci_size)) { 626 smallci = ci; 627 } 628 } 629 630 /* 631 * Nobody left, all done 632 */ 633 if (smallci == NULL) { 634 ii->ii_ndisk = 0; 635 break; 636 } 637 638 /* 639 * Record starting logical block using an sc_ileave blocksize. 640 */ 641 ii->ii_startblk = bn / cs->sc_ileave; 642 643 /* 644 * Record starting comopnent block using an sc_ileave 645 * blocksize. This value is relative to the beginning of 646 * a component disk. 647 */ 648 ii->ii_startoff = lbn; 649 650 /* 651 * Determine how many disks take part in this interleave 652 * and record their indices. 653 */ 654 ix = 0; 655 for (ci = cs->sc_cinfo; 656 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 657 if (ci->ci_size >= smallci->ci_size) { 658 ii->ii_index[ix++] = ci - cs->sc_cinfo; 659 } 660 } 661 ii->ii_ndisk = ix; 662 bn += ix * (smallci->ci_size - size); 663 lbn = smallci->ci_size / cs->sc_ileave; 664 size = smallci->ci_size; 665 } 666 #ifdef DEBUG 667 if (ccddebug & CCDB_INIT) 668 printiinfo(cs->sc_itable); 669 #endif 670 } 671 672 /* ARGSUSED */ 673 static int 674 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 675 { 676 int unit = ccdunit(dev); 677 struct ccd_softc *cs; 678 struct disklabel *lp; 679 int error = 0, part, pmask; 680 681 #ifdef DEBUG 682 if (ccddebug & CCDB_FOLLOW) 683 printf("ccdopen(%x, %x)\n", dev, flags); 684 #endif 685 if (unit >= numccd) 686 return (ENXIO); 687 cs = &ccd_softc[unit]; 688 689 if ((error = ccdlock(cs)) != 0) 690 return (error); 691 692 lp = &cs->sc_label; 693 694 part = ccdpart(dev); 695 pmask = (1 << part); 696 697 /* 698 * If we're initialized, check to see if there are any other 699 * open partitions. If not, then it's safe to update 700 * the in-core disklabel. 701 */ 702 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 703 ccdgetdisklabel(dev); 704 705 /* Check that the partition exists. */ 706 if (part != RAW_PART && ((part >= lp->d_npartitions) || 707 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 708 error = ENXIO; 709 goto done; 710 } 711 712 cs->sc_openmask |= pmask; 713 done: 714 ccdunlock(cs); 715 return (0); 716 } 717 718 /* ARGSUSED */ 719 static int 720 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 721 { 722 int unit = ccdunit(dev); 723 struct ccd_softc *cs; 724 int error = 0, part; 725 726 #ifdef DEBUG 727 if (ccddebug & CCDB_FOLLOW) 728 printf("ccdclose(%x, %x)\n", dev, flags); 729 #endif 730 731 if (unit >= numccd) 732 return (ENXIO); 733 cs = &ccd_softc[unit]; 734 735 if ((error = ccdlock(cs)) != 0) 736 return (error); 737 738 part = ccdpart(dev); 739 740 /* ...that much closer to allowing unconfiguration... */ 741 cs->sc_openmask &= ~(1 << part); 742 ccdunlock(cs); 743 return (0); 744 } 745 746 static void 747 ccdstrategy(dev_t dev, struct bio *bio) 748 { 749 int unit = ccdunit(dev); 750 struct bio *nbio; 751 struct buf *bp = bio->bio_buf; 752 struct ccd_softc *cs = &ccd_softc[unit]; 753 int wlabel; 754 struct disklabel *lp; 755 756 #ifdef DEBUG 757 if (ccddebug & CCDB_FOLLOW) 758 printf("ccdstrategy(%x): unit %d\n", bp, unit); 759 #endif 760 if ((cs->sc_flags & CCDF_INITED) == 0) { 761 bp->b_error = ENXIO; 762 bp->b_flags |= B_ERROR; 763 goto done; 764 } 765 766 /* If it's a nil transfer, wake up the top half now. */ 767 if (bp->b_bcount == 0) 768 goto done; 769 770 lp = &cs->sc_label; 771 772 /* 773 * Do bounds checking and adjust transfer. If there's an 774 * error, the bounds check will flag that for us. 775 */ 776 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 777 if (ccdpart(dev) != RAW_PART) { 778 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 779 if (nbio == NULL) 780 goto done; 781 } else { 782 int pbn; /* in sc_secsize chunks */ 783 long sz; /* in sc_secsize chunks */ 784 785 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize); 786 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 787 788 /* 789 * If out of bounds return an error. If at the EOF point, 790 * simply read or write less. 791 */ 792 793 if (pbn < 0 || pbn >= cs->sc_size) { 794 bp->b_resid = bp->b_bcount; 795 if (pbn != cs->sc_size) { 796 bp->b_error = EINVAL; 797 bp->b_flags |= B_ERROR | B_INVAL; 798 } 799 goto done; 800 } 801 802 /* 803 * If the request crosses EOF, truncate the request. 804 */ 805 if (pbn + sz > cs->sc_size) { 806 bp->b_bcount = (cs->sc_size - pbn) * 807 cs->sc_geom.ccg_secsize; 808 } 809 nbio = bio; 810 } 811 812 bp->b_resid = bp->b_bcount; 813 nbio->bio_driver_info = dev; 814 815 /* 816 * "Start" the unit. 817 */ 818 crit_enter(); 819 ccdstart(cs, nbio); 820 crit_exit(); 821 return; 822 823 /* 824 * note: bio, not nbio, is valid at the done label. 825 */ 826 done: 827 biodone(bio); 828 } 829 830 static void 831 ccdstart(struct ccd_softc *cs, struct bio *bio) 832 { 833 long bcount, rcount; 834 struct ccdbuf *cbp[4]; 835 struct buf *bp = bio->bio_buf; 836 dev_t dev = bio->bio_driver_info; 837 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 838 caddr_t addr; 839 off_t doffset; 840 struct partition *pp; 841 842 #ifdef DEBUG 843 if (ccddebug & CCDB_FOLLOW) 844 printf("ccdstart(%x, %x)\n", cs, bp); 845 #endif 846 847 /* Record the transaction start */ 848 devstat_start_transaction(&cs->device_stats); 849 850 /* 851 * Translate the partition-relative block number to an absolute. 852 */ 853 doffset = bio->bio_offset; 854 if (ccdpart(dev) != RAW_PART) { 855 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 856 doffset += pp->p_offset * cs->sc_label.d_secsize; 857 } 858 859 /* 860 * Allocate component buffers and fire off the requests 861 */ 862 addr = bp->b_data; 863 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 864 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 865 rcount = cbp[0]->cb_buf.b_bcount; 866 867 if (cs->sc_cflags & CCDF_MIRROR) { 868 /* 869 * Mirroring. Writes go to both disks, reads are 870 * taken from whichever disk seems most appropriate. 871 * 872 * We attempt to localize reads to the disk whos arm 873 * is nearest the read request. We ignore seeks due 874 * to writes when making this determination and we 875 * also try to avoid hogging. 876 */ 877 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 878 vn_strategy(cbp[0]->cb_buf.b_vp, 879 &cbp[0]->cb_buf.b_bio1); 880 vn_strategy(cbp[1]->cb_buf.b_vp, 881 &cbp[1]->cb_buf.b_bio1); 882 } else { 883 int pick = cs->sc_pick; 884 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize; 885 886 if (doffset < cs->sc_blk[pick] - range || 887 doffset > cs->sc_blk[pick] + range 888 ) { 889 cs->sc_pick = pick = 1 - pick; 890 } 891 cs->sc_blk[pick] = doffset + rcount; 892 vn_strategy(cbp[pick]->cb_buf.b_vp, 893 &cbp[pick]->cb_buf.b_bio1); 894 } 895 } else { 896 /* 897 * Not mirroring 898 */ 899 vn_strategy(cbp[0]->cb_buf.b_vp, 900 &cbp[0]->cb_buf.b_bio1); 901 } 902 doffset += rcount; 903 addr += rcount; 904 } 905 } 906 907 /* 908 * Build a component buffer header. 909 */ 910 static void 911 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 912 off_t doffset, caddr_t addr, long bcount) 913 { 914 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 915 struct ccdbuf *cbp; 916 daddr_t bn, cbn, cboff; 917 off_t cbc; 918 919 #ifdef DEBUG 920 if (ccddebug & CCDB_IO) 921 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 922 cs, bp, bn, addr, bcount); 923 #endif 924 /* 925 * Determine which component bn falls in. 926 */ 927 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize); 928 cbn = bn; 929 cboff = 0; 930 931 if (cs->sc_ileave == 0) { 932 /* 933 * Serially concatenated and neither a mirror nor a parity 934 * config. This is a special case. 935 */ 936 daddr_t sblk; 937 938 sblk = 0; 939 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 940 sblk += ci->ci_size; 941 cbn -= sblk; 942 } else { 943 struct ccdiinfo *ii; 944 int ccdisk, off; 945 946 /* 947 * Calculate cbn, the logical superblock (sc_ileave chunks), 948 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 949 * to cbn. 950 */ 951 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 952 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 953 954 /* 955 * Figure out which interleave table to use. 956 */ 957 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 958 if (ii->ii_startblk > cbn) 959 break; 960 } 961 ii--; 962 963 /* 964 * off is the logical superblock relative to the beginning 965 * of this interleave block. 966 */ 967 off = cbn - ii->ii_startblk; 968 969 /* 970 * We must calculate which disk component to use (ccdisk), 971 * and recalculate cbn to be the superblock relative to 972 * the beginning of the component. This is typically done by 973 * adding 'off' and ii->ii_startoff together. However, 'off' 974 * must typically be divided by the number of components in 975 * this interleave array to be properly convert it from a 976 * CCD-relative logical superblock number to a 977 * component-relative superblock number. 978 */ 979 if (ii->ii_ndisk == 1) { 980 /* 981 * When we have just one disk, it can't be a mirror 982 * or a parity config. 983 */ 984 ccdisk = ii->ii_index[0]; 985 cbn = ii->ii_startoff + off; 986 } else { 987 if (cs->sc_cflags & CCDF_MIRROR) { 988 /* 989 * We have forced a uniform mapping, resulting 990 * in a single interleave array. We double 991 * up on the first half of the available 992 * components and our mirror is in the second 993 * half. This only works with a single 994 * interleave array because doubling up 995 * doubles the number of sectors, so there 996 * cannot be another interleave array because 997 * the next interleave array's calculations 998 * would be off. 999 */ 1000 int ndisk2 = ii->ii_ndisk / 2; 1001 ccdisk = ii->ii_index[off % ndisk2]; 1002 cbn = ii->ii_startoff + off / ndisk2; 1003 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1004 } else if (cs->sc_cflags & CCDF_PARITY) { 1005 /* 1006 * XXX not implemented yet 1007 */ 1008 int ndisk2 = ii->ii_ndisk - 1; 1009 ccdisk = ii->ii_index[off % ndisk2]; 1010 cbn = ii->ii_startoff + off / ndisk2; 1011 if (cbn % ii->ii_ndisk <= ccdisk) 1012 ccdisk++; 1013 } else { 1014 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1015 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1016 } 1017 } 1018 1019 ci = &cs->sc_cinfo[ccdisk]; 1020 1021 /* 1022 * Convert cbn from a superblock to a normal block so it 1023 * can be used to calculate (along with cboff) the normal 1024 * block index into this particular disk. 1025 */ 1026 cbn *= cs->sc_ileave; 1027 } 1028 1029 /* 1030 * Fill in the component buf structure. 1031 */ 1032 cbp = getccdbuf(); 1033 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1034 cbp->cb_buf.b_data = addr; 1035 cbp->cb_buf.b_vp = ci->ci_vp; 1036 if (cs->sc_ileave == 0) 1037 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1038 else 1039 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1040 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1041 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1042 1043 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1044 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1045 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1046 1047 /* 1048 * context for ccdiodone 1049 */ 1050 cbp->cb_obio = bio; 1051 cbp->cb_unit = cs - ccd_softc; 1052 cbp->cb_comp = ci - cs->sc_cinfo; 1053 1054 #ifdef DEBUG 1055 if (ccddebug & CCDB_IO) 1056 printf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1057 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1058 cbp->cb_buf.b_bio1.bio_offset, 1059 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1060 #endif 1061 cb[0] = cbp; 1062 1063 /* 1064 * Note: both I/O's setup when reading from mirror, but only one 1065 * will be executed. 1066 */ 1067 if (cs->sc_cflags & CCDF_MIRROR) { 1068 /* mirror, setup second I/O */ 1069 cbp = getccdbuf(); 1070 1071 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1072 cbp->cb_buf.b_data = addr; 1073 cbp->cb_buf.b_vp = ci2->ci_vp; 1074 if (cs->sc_ileave == 0) 1075 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1076 else 1077 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1078 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1079 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1080 1081 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1082 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1083 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1084 1085 /* 1086 * context for ccdiodone 1087 */ 1088 cbp->cb_obio = bio; 1089 cbp->cb_unit = cs - ccd_softc; 1090 cbp->cb_comp = ci2 - cs->sc_cinfo; 1091 cb[1] = cbp; 1092 /* link together the ccdbuf's and clear "mirror done" flag */ 1093 cb[0]->cb_mirror = cb[1]; 1094 cb[1]->cb_mirror = cb[0]; 1095 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1097 } 1098 } 1099 1100 static void 1101 ccdintr(struct ccd_softc *cs, struct bio *bio) 1102 { 1103 struct buf *bp = bio->bio_buf; 1104 1105 #ifdef DEBUG 1106 if (ccddebug & CCDB_FOLLOW) 1107 printf("ccdintr(%x, %x)\n", cs, bp); 1108 #endif 1109 /* 1110 * Request is done for better or worse, wakeup the top half. 1111 */ 1112 if (bp->b_flags & B_ERROR) 1113 bp->b_resid = bp->b_bcount; 1114 devstat_end_transaction_buf(&cs->device_stats, bp); 1115 biodone(bio); 1116 } 1117 1118 /* 1119 * Called at interrupt time. 1120 * Mark the component as done and if all components are done, 1121 * take a ccd interrupt. 1122 */ 1123 static void 1124 ccdiodone(struct bio *bio) 1125 { 1126 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1127 struct bio *obio = cbp->cb_obio; 1128 struct buf *obp = obio->bio_buf; 1129 int unit = cbp->cb_unit; 1130 int count; 1131 1132 /* 1133 * Since we do not have exclusive access to underlying devices, 1134 * we can't keep cache translations around. 1135 */ 1136 clearbiocache(bio->bio_next); 1137 1138 crit_enter(); 1139 #ifdef DEBUG 1140 if (ccddebug & CCDB_FOLLOW) 1141 printf("ccdiodone(%x)\n", cbp); 1142 if (ccddebug & CCDB_IO) { 1143 printf("ccdiodone: bp %x bcount %d resid %d\n", 1144 obp, obp->b_bcount, obp->b_resid); 1145 printf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1146 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1147 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1148 cbp->cb_buf.b_bcount); 1149 } 1150 #endif 1151 /* 1152 * If an error occured, report it. If this is a mirrored 1153 * configuration and the first of two possible reads, do not 1154 * set the error in the bp yet because the second read may 1155 * succeed. 1156 */ 1157 if (cbp->cb_buf.b_flags & B_ERROR) { 1158 const char *msg = ""; 1159 1160 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1161 (cbp->cb_buf.b_flags & B_READ) && 1162 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1163 /* 1164 * We will try our read on the other disk down 1165 * below, also reverse the default pick so if we 1166 * are doing a scan we do not keep hitting the 1167 * bad disk first. 1168 */ 1169 struct ccd_softc *cs = &ccd_softc[unit]; 1170 1171 msg = ", trying other disk"; 1172 cs->sc_pick = 1 - cs->sc_pick; 1173 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1174 } else { 1175 obp->b_flags |= B_ERROR; 1176 obp->b_error = cbp->cb_buf.b_error ? 1177 cbp->cb_buf.b_error : EIO; 1178 } 1179 printf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1180 unit, obp->b_error, cbp->cb_comp, 1181 cbp->cb_buf.b_bio2.bio_offset, 1182 obio->bio_offset, msg); 1183 } 1184 1185 /* 1186 * Process mirror. If we are writing, I/O has been initiated on both 1187 * buffers and we fall through only after both are finished. 1188 * 1189 * If we are reading only one I/O is initiated at a time. If an 1190 * error occurs we initiate the second I/O and return, otherwise 1191 * we free the second I/O without initiating it. 1192 */ 1193 1194 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1195 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1196 /* 1197 * When writing, handshake with the second buffer 1198 * to determine when both are done. If both are not 1199 * done, return here. 1200 */ 1201 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1203 putccdbuf(cbp); 1204 crit_exit(); 1205 return; 1206 } 1207 } else { 1208 /* 1209 * When reading, either dispose of the second buffer 1210 * or initiate I/O on the second buffer if an error 1211 * occured with this one. 1212 */ 1213 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1214 if (cbp->cb_buf.b_flags & B_ERROR) { 1215 cbp->cb_mirror->cb_pflags |= 1216 CCDPF_MIRROR_DONE; 1217 vn_strategy( 1218 cbp->cb_mirror->cb_buf.b_vp, 1219 &cbp->cb_mirror->cb_buf.b_bio1 1220 ); 1221 putccdbuf(cbp); 1222 crit_exit(); 1223 return; 1224 } else { 1225 putccdbuf(cbp->cb_mirror); 1226 /* fall through */ 1227 } 1228 } 1229 } 1230 } 1231 1232 /* 1233 * use b_bufsize to determine how big the original request was rather 1234 * then b_bcount, because b_bcount may have been truncated for EOF. 1235 * 1236 * XXX We check for an error, but we do not test the resid for an 1237 * aligned EOF condition. This may result in character & block 1238 * device access not recognizing EOF properly when read or written 1239 * sequentially, but will not effect filesystems. 1240 */ 1241 count = cbp->cb_buf.b_bufsize; 1242 putccdbuf(cbp); 1243 1244 /* 1245 * If all done, "interrupt". 1246 */ 1247 obp->b_resid -= count; 1248 if (obp->b_resid < 0) 1249 panic("ccdiodone: count"); 1250 if (obp->b_resid == 0) 1251 ccdintr(&ccd_softc[unit], obio); 1252 crit_exit(); 1253 } 1254 1255 static int 1256 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1257 { 1258 int unit = ccdunit(dev); 1259 int i, j, lookedup = 0, error = 0; 1260 int part, pmask; 1261 struct ccd_softc *cs; 1262 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1263 struct ccddevice ccd; 1264 char **cpp; 1265 struct vnode **vpp; 1266 struct ucred *cred; 1267 1268 KKASSERT(td->td_proc != NULL); 1269 cred = td->td_proc->p_ucred; 1270 1271 if (unit >= numccd) 1272 return (ENXIO); 1273 cs = &ccd_softc[unit]; 1274 1275 bzero(&ccd, sizeof(ccd)); 1276 1277 switch (cmd) { 1278 case CCDIOCSET: 1279 if (cs->sc_flags & CCDF_INITED) 1280 return (EBUSY); 1281 1282 if ((flag & FWRITE) == 0) 1283 return (EBADF); 1284 1285 if ((error = ccdlock(cs)) != 0) 1286 return (error); 1287 1288 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1289 return (EINVAL); 1290 1291 /* Fill in some important bits. */ 1292 ccd.ccd_unit = unit; 1293 ccd.ccd_interleave = ccio->ccio_ileave; 1294 if (ccd.ccd_interleave == 0 && 1295 ((ccio->ccio_flags & CCDF_MIRROR) || 1296 (ccio->ccio_flags & CCDF_PARITY))) { 1297 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1298 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1299 } 1300 if ((ccio->ccio_flags & CCDF_MIRROR) && 1301 (ccio->ccio_flags & CCDF_PARITY)) { 1302 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1303 ccio->ccio_flags &= ~CCDF_PARITY; 1304 } 1305 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1306 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1307 printf("ccd%d: mirror/parity forces uniform flag\n", 1308 unit); 1309 ccio->ccio_flags |= CCDF_UNIFORM; 1310 } 1311 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1312 1313 /* 1314 * Allocate space for and copy in the array of 1315 * componet pathnames and device numbers. 1316 */ 1317 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1318 M_DEVBUF, M_WAITOK); 1319 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1320 M_DEVBUF, M_WAITOK); 1321 1322 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1323 ccio->ccio_ndisks * sizeof(char **)); 1324 if (error) { 1325 free(vpp, M_DEVBUF); 1326 free(cpp, M_DEVBUF); 1327 ccdunlock(cs); 1328 return (error); 1329 } 1330 1331 #ifdef DEBUG 1332 if (ccddebug & CCDB_INIT) 1333 for (i = 0; i < ccio->ccio_ndisks; ++i) 1334 printf("ccdioctl: component %d: 0x%x\n", 1335 i, cpp[i]); 1336 #endif 1337 1338 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1339 #ifdef DEBUG 1340 if (ccddebug & CCDB_INIT) 1341 printf("ccdioctl: lookedup = %d\n", lookedup); 1342 #endif 1343 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1344 for (j = 0; j < lookedup; ++j) 1345 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1346 free(vpp, M_DEVBUF); 1347 free(cpp, M_DEVBUF); 1348 ccdunlock(cs); 1349 return (error); 1350 } 1351 ++lookedup; 1352 } 1353 ccd.ccd_cpp = cpp; 1354 ccd.ccd_vpp = vpp; 1355 ccd.ccd_ndev = ccio->ccio_ndisks; 1356 1357 /* 1358 * Initialize the ccd. Fills in the softc for us. 1359 */ 1360 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1361 for (j = 0; j < lookedup; ++j) 1362 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1363 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1364 free(vpp, M_DEVBUF); 1365 free(cpp, M_DEVBUF); 1366 ccdunlock(cs); 1367 return (error); 1368 } 1369 1370 /* 1371 * The ccd has been successfully initialized, so 1372 * we can place it into the array and read the disklabel. 1373 */ 1374 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1375 ccio->ccio_unit = unit; 1376 ccio->ccio_size = cs->sc_size; 1377 ccdgetdisklabel(dev); 1378 1379 ccdunlock(cs); 1380 1381 break; 1382 1383 case CCDIOCCLR: 1384 if ((cs->sc_flags & CCDF_INITED) == 0) 1385 return (ENXIO); 1386 1387 if ((flag & FWRITE) == 0) 1388 return (EBADF); 1389 1390 if ((error = ccdlock(cs)) != 0) 1391 return (error); 1392 1393 /* Don't unconfigure if any other partitions are open */ 1394 part = ccdpart(dev); 1395 pmask = (1 << part); 1396 if ((cs->sc_openmask & ~pmask)) { 1397 ccdunlock(cs); 1398 return (EBUSY); 1399 } 1400 1401 /* 1402 * Free ccd_softc information and clear entry. 1403 */ 1404 1405 /* Close the components and free their pathnames. */ 1406 for (i = 0; i < cs->sc_nccdisks; ++i) { 1407 /* 1408 * XXX: this close could potentially fail and 1409 * cause Bad Things. Maybe we need to force 1410 * the close to happen? 1411 */ 1412 #ifdef DEBUG 1413 if (ccddebug & CCDB_VNODE) 1414 vprint("CCDIOCCLR: vnode info", 1415 cs->sc_cinfo[i].ci_vp); 1416 #endif 1417 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1418 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1419 } 1420 1421 /* Free interleave index. */ 1422 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1423 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1424 1425 /* Free component info and interleave table. */ 1426 free(cs->sc_cinfo, M_DEVBUF); 1427 free(cs->sc_itable, M_DEVBUF); 1428 cs->sc_flags &= ~CCDF_INITED; 1429 1430 /* 1431 * Free ccddevice information and clear entry. 1432 */ 1433 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1434 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1435 ccd.ccd_dk = -1; 1436 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1437 1438 /* 1439 * And remove the devstat entry. 1440 */ 1441 devstat_remove_entry(&cs->device_stats); 1442 1443 /* This must be atomic. */ 1444 crit_enter(); 1445 ccdunlock(cs); 1446 bzero(cs, sizeof(struct ccd_softc)); 1447 crit_exit(); 1448 1449 break; 1450 1451 case DIOCGDINFO: 1452 if ((cs->sc_flags & CCDF_INITED) == 0) 1453 return (ENXIO); 1454 1455 *(struct disklabel *)data = cs->sc_label; 1456 break; 1457 1458 case DIOCGPART: 1459 if ((cs->sc_flags & CCDF_INITED) == 0) 1460 return (ENXIO); 1461 1462 ((struct partinfo *)data)->disklab = &cs->sc_label; 1463 ((struct partinfo *)data)->part = 1464 &cs->sc_label.d_partitions[ccdpart(dev)]; 1465 break; 1466 1467 case DIOCWDINFO: 1468 case DIOCSDINFO: 1469 if ((cs->sc_flags & CCDF_INITED) == 0) 1470 return (ENXIO); 1471 1472 if ((flag & FWRITE) == 0) 1473 return (EBADF); 1474 1475 if ((error = ccdlock(cs)) != 0) 1476 return (error); 1477 1478 cs->sc_flags |= CCDF_LABELLING; 1479 1480 error = setdisklabel(&cs->sc_label, 1481 (struct disklabel *)data, 0); 1482 if (error == 0) { 1483 if (cmd == DIOCWDINFO) { 1484 dev_t cdev = CCDLABELDEV(dev); 1485 error = writedisklabel(cdev, &cs->sc_label); 1486 } 1487 } 1488 1489 cs->sc_flags &= ~CCDF_LABELLING; 1490 1491 ccdunlock(cs); 1492 1493 if (error) 1494 return (error); 1495 break; 1496 1497 case DIOCWLABEL: 1498 if ((cs->sc_flags & CCDF_INITED) == 0) 1499 return (ENXIO); 1500 1501 if ((flag & FWRITE) == 0) 1502 return (EBADF); 1503 if (*(int *)data != 0) 1504 cs->sc_flags |= CCDF_WLABEL; 1505 else 1506 cs->sc_flags &= ~CCDF_WLABEL; 1507 break; 1508 1509 default: 1510 return (ENOTTY); 1511 } 1512 1513 return (0); 1514 } 1515 1516 static int 1517 ccdsize(dev_t dev) 1518 { 1519 struct ccd_softc *cs; 1520 int part, size; 1521 1522 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1523 return (-1); 1524 1525 cs = &ccd_softc[ccdunit(dev)]; 1526 part = ccdpart(dev); 1527 1528 if ((cs->sc_flags & CCDF_INITED) == 0) 1529 return (-1); 1530 1531 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1532 size = -1; 1533 else 1534 size = cs->sc_label.d_partitions[part].p_size; 1535 1536 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1537 return (-1); 1538 1539 return (size); 1540 } 1541 1542 static int 1543 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1544 { 1545 /* Not implemented. */ 1546 return ENXIO; 1547 } 1548 1549 /* 1550 * Lookup the provided name in the filesystem. If the file exists, 1551 * is a valid block device, and isn't being used by anyone else, 1552 * set *vpp to the file's vnode. 1553 */ 1554 static int 1555 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1556 { 1557 struct nlookupdata nd; 1558 struct ucred *cred; 1559 struct vnode *vp; 1560 int error; 1561 1562 KKASSERT(td->td_proc); 1563 cred = td->td_proc->p_ucred; 1564 *vpp = NULL; 1565 1566 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1567 if (error) 1568 return (error); 1569 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1570 #ifdef DEBUG 1571 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1572 printf("ccdlookup: vn_open error = %d\n", error); 1573 #endif 1574 goto done; 1575 } 1576 vp = nd.nl_open_vp; 1577 1578 if (vp->v_usecount > 1) { 1579 error = EBUSY; 1580 goto done; 1581 } 1582 1583 if (!vn_isdisk(vp, &error)) 1584 goto done; 1585 1586 #ifdef DEBUG 1587 if (ccddebug & CCDB_VNODE) 1588 vprint("ccdlookup: vnode info", vp); 1589 #endif 1590 1591 VOP_UNLOCK(vp, 0, td); 1592 nd.nl_open_vp = NULL; 1593 nlookup_done(&nd); 1594 *vpp = vp; /* leave ref intact */ 1595 return (0); 1596 done: 1597 nlookup_done(&nd); 1598 return (error); 1599 } 1600 1601 /* 1602 * Read the disklabel from the ccd. If one is not present, fake one 1603 * up. 1604 */ 1605 static void 1606 ccdgetdisklabel(dev_t dev) 1607 { 1608 int unit = ccdunit(dev); 1609 struct ccd_softc *cs = &ccd_softc[unit]; 1610 char *errstring; 1611 struct disklabel *lp = &cs->sc_label; 1612 struct ccdgeom *ccg = &cs->sc_geom; 1613 dev_t cdev; 1614 1615 bzero(lp, sizeof(*lp)); 1616 1617 lp->d_secperunit = cs->sc_size; 1618 lp->d_secsize = ccg->ccg_secsize; 1619 lp->d_nsectors = ccg->ccg_nsectors; 1620 lp->d_ntracks = ccg->ccg_ntracks; 1621 lp->d_ncylinders = ccg->ccg_ncylinders; 1622 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1623 1624 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1625 lp->d_type = DTYPE_CCD; 1626 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1627 lp->d_rpm = 3600; 1628 lp->d_interleave = 1; 1629 lp->d_flags = 0; 1630 1631 lp->d_partitions[RAW_PART].p_offset = 0; 1632 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1633 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1634 lp->d_npartitions = RAW_PART + 1; 1635 1636 lp->d_bbsize = BBSIZE; /* XXX */ 1637 lp->d_sbsize = SBSIZE; /* XXX */ 1638 1639 lp->d_magic = DISKMAGIC; 1640 lp->d_magic2 = DISKMAGIC; 1641 lp->d_checksum = dkcksum(&cs->sc_label); 1642 1643 /* 1644 * Call the generic disklabel extraction routine. 1645 */ 1646 cdev = CCDLABELDEV(dev); 1647 errstring = readdisklabel(cdev, &cs->sc_label); 1648 if (errstring != NULL) 1649 ccdmakedisklabel(cs); 1650 1651 #ifdef DEBUG 1652 /* It's actually extremely common to have unlabeled ccds. */ 1653 if (ccddebug & CCDB_LABEL) 1654 if (errstring != NULL) 1655 printf("ccd%d: %s\n", unit, errstring); 1656 #endif 1657 } 1658 1659 /* 1660 * Take care of things one might want to take care of in the event 1661 * that a disklabel isn't present. 1662 */ 1663 static void 1664 ccdmakedisklabel(struct ccd_softc *cs) 1665 { 1666 struct disklabel *lp = &cs->sc_label; 1667 1668 /* 1669 * For historical reasons, if there's no disklabel present 1670 * the raw partition must be marked FS_BSDFFS. 1671 */ 1672 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1673 1674 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1675 } 1676 1677 /* 1678 * Wait interruptibly for an exclusive lock. 1679 * 1680 * XXX 1681 * Several drivers do this; it should be abstracted and made MP-safe. 1682 */ 1683 static int 1684 ccdlock(struct ccd_softc *cs) 1685 { 1686 int error; 1687 1688 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1689 cs->sc_flags |= CCDF_WANTED; 1690 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1691 return (error); 1692 } 1693 cs->sc_flags |= CCDF_LOCKED; 1694 return (0); 1695 } 1696 1697 /* 1698 * Unlock and wake up any waiters. 1699 */ 1700 static void 1701 ccdunlock(struct ccd_softc *cs) 1702 { 1703 1704 cs->sc_flags &= ~CCDF_LOCKED; 1705 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1706 cs->sc_flags &= ~CCDF_WANTED; 1707 wakeup(cs); 1708 } 1709 } 1710 1711 #ifdef DEBUG 1712 static void 1713 printiinfo(struct ccdiinfo *ii) 1714 { 1715 int ix, i; 1716 1717 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1718 printf(" itab[%d]: #dk %d sblk %d soff %d", 1719 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1720 for (i = 0; i < ii->ii_ndisk; i++) 1721 printf(" %d", ii->ii_index[i]); 1722 printf("\n"); 1723 } 1724 } 1725 #endif 1726 1727 1728 /* Local Variables: */ 1729 /* c-argdecl-indent: 8 */ 1730 /* c-continued-statement-offset: 8 */ 1731 /* c-indent-level: 8 */ 1732 /* End: */ 1733