1 /* $NetBSD: ccd.c,v 1.145 2013/09/12 12:22:40 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.145 2013/09/12 12:22:40 martin Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 119 #include <uvm/uvm_extern.h> 120 121 #include <dev/ccdvar.h> 122 #include <dev/dkvar.h> 123 124 #if defined(CCDDEBUG) && !defined(DEBUG) 125 #define DEBUG 126 #endif 127 128 #ifdef DEBUG 129 #define CCDB_FOLLOW 0x01 130 #define CCDB_INIT 0x02 131 #define CCDB_IO 0x04 132 #define CCDB_LABEL 0x08 133 #define CCDB_VNODE 0x10 134 int ccddebug = 0x00; 135 #endif 136 137 #define ccdunit(x) DISKUNIT(x) 138 139 struct ccdbuf { 140 struct buf cb_buf; /* new I/O buf */ 141 struct buf *cb_obp; /* ptr. to original I/O buf */ 142 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 143 int cb_comp; /* target component */ 144 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 145 }; 146 147 /* component buffer pool */ 148 static pool_cache_t ccd_cache; 149 150 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 151 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 152 153 #define CCDLABELDEV(dev) \ 154 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 155 156 /* called by main() at boot time */ 157 void ccdattach(int); 158 159 /* called by biodone() at interrupt time */ 160 static void ccdiodone(struct buf *); 161 162 static void ccdinterleave(struct ccd_softc *); 163 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 164 struct lwp *); 165 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 166 daddr_t, void *, long); 167 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 168 static void ccdgetdisklabel(dev_t); 169 static void ccdmakedisklabel(struct ccd_softc *); 170 static void ccdstart(struct ccd_softc *); 171 static void ccdthread(void *); 172 173 static dev_type_open(ccdopen); 174 static dev_type_close(ccdclose); 175 static dev_type_read(ccdread); 176 static dev_type_write(ccdwrite); 177 static dev_type_ioctl(ccdioctl); 178 static dev_type_strategy(ccdstrategy); 179 static dev_type_size(ccdsize); 180 181 const struct bdevsw ccd_bdevsw = { 182 .d_open = ccdopen, 183 .d_close = ccdclose, 184 .d_strategy = ccdstrategy, 185 .d_ioctl = ccdioctl, 186 .d_dump = nodump, 187 .d_psize = ccdsize, 188 .d_flag = D_DISK | D_MPSAFE 189 }; 190 191 const struct cdevsw ccd_cdevsw = { 192 .d_open = ccdopen, 193 .d_close = ccdclose, 194 .d_read = ccdread, 195 .d_write = ccdwrite, 196 .d_ioctl = ccdioctl, 197 .d_stop = nostop, 198 .d_tty = notty, 199 .d_poll = nopoll, 200 .d_mmap = nommap, 201 .d_kqfilter = nokqfilter, 202 .d_flag = D_DISK | D_MPSAFE 203 }; 204 205 #ifdef DEBUG 206 static void printiinfo(struct ccdiinfo *); 207 #endif 208 209 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 210 static kmutex_t ccd_lock; 211 212 static struct ccd_softc * 213 ccdcreate(int unit) { 214 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 215 if (sc == NULL) { 216 #ifdef DIAGNOSTIC 217 printf("%s: out of memory\n", __func__); 218 #endif 219 return NULL; 220 } 221 /* Initialize per-softc structures. */ 222 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 223 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 224 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 225 cv_init(&sc->sc_stop, "ccdstop"); 226 cv_init(&sc->sc_push, "ccdthr"); 227 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 228 return sc; 229 } 230 231 static void 232 ccddestroy(struct ccd_softc *sc) { 233 mutex_obj_free(sc->sc_iolock); 234 mutex_destroy(&sc->sc_dvlock); 235 cv_destroy(&sc->sc_stop); 236 cv_destroy(&sc->sc_push); 237 disk_destroy(&sc->sc_dkdev); 238 kmem_free(sc, sizeof(*sc)); 239 } 240 241 static struct ccd_softc * 242 ccdget(int unit) { 243 struct ccd_softc *sc; 244 if (unit < 0) { 245 #ifdef DIAGNOSTIC 246 panic("%s: unit %d!", __func__, unit); 247 #endif 248 return NULL; 249 } 250 mutex_enter(&ccd_lock); 251 LIST_FOREACH(sc, &ccds, sc_link) { 252 if (sc->sc_unit == unit) { 253 mutex_exit(&ccd_lock); 254 return sc; 255 } 256 } 257 mutex_exit(&ccd_lock); 258 if ((sc = ccdcreate(unit)) == NULL) 259 return NULL; 260 mutex_enter(&ccd_lock); 261 LIST_INSERT_HEAD(&ccds, sc, sc_link); 262 mutex_exit(&ccd_lock); 263 return sc; 264 } 265 266 static void 267 ccdput(struct ccd_softc *sc) { 268 mutex_enter(&ccd_lock); 269 LIST_REMOVE(sc, sc_link); 270 mutex_exit(&ccd_lock); 271 ccddestroy(sc); 272 } 273 274 /* 275 * Called by main() during pseudo-device attachment. All we need 276 * to do is allocate enough space for devices to be configured later. 277 */ 278 void 279 ccdattach(int num) 280 { 281 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 282 283 /* Initialize the component buffer pool. */ 284 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 285 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 286 } 287 288 static int 289 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 290 struct lwp *l) 291 { 292 struct ccdcinfo *ci = NULL; 293 int ix; 294 struct vattr va; 295 struct ccdgeom *ccg = &cs->sc_geom; 296 char *tmppath; 297 int error, path_alloced; 298 uint64_t psize, minsize; 299 unsigned secsize, maxsecsize; 300 301 #ifdef DEBUG 302 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 303 printf("%s: ccdinit\n", cs->sc_xname); 304 #endif 305 306 /* Allocate space for the component info. */ 307 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 308 KM_SLEEP); 309 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 310 311 cs->sc_size = 0; 312 313 /* 314 * Verify that each component piece exists and record 315 * relevant information about it. 316 */ 317 maxsecsize = 0; 318 minsize = 0; 319 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 320 ci = &cs->sc_cinfo[ix]; 321 ci->ci_vp = vpp[ix]; 322 323 /* 324 * Copy in the pathname of the component. 325 */ 326 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 327 error = copyinstr(cpaths[ix], tmppath, 328 MAXPATHLEN, &ci->ci_pathlen); 329 if (ci->ci_pathlen == 0) 330 error = EINVAL; 331 if (error) { 332 #ifdef DEBUG 333 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 334 printf("%s: can't copy path, error = %d\n", 335 cs->sc_xname, error); 336 #endif 337 goto out; 338 } 339 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 340 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 341 path_alloced++; 342 343 /* 344 * XXX: Cache the component's dev_t. 345 */ 346 vn_lock(vpp[ix], LK_SHARED | LK_RETRY); 347 error = VOP_GETATTR(vpp[ix], &va, l->l_cred); 348 VOP_UNLOCK(vpp[ix]); 349 if (error != 0) { 350 #ifdef DEBUG 351 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 352 printf("%s: %s: getattr failed %s = %d\n", 353 cs->sc_xname, ci->ci_path, 354 "error", error); 355 #endif 356 goto out; 357 } 358 ci->ci_dev = va.va_rdev; 359 360 /* 361 * Get partition information for the component. 362 */ 363 error = getdisksize(vpp[ix], &psize, &secsize); 364 if (error) { 365 #ifdef DEBUG 366 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 367 printf("%s: %s: disksize failed, error = %d\n", 368 cs->sc_xname, ci->ci_path, error); 369 #endif 370 goto out; 371 } 372 373 /* 374 * Calculate the size, truncating to an interleave 375 * boundary if necessary. 376 */ 377 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 378 if (cs->sc_ileave > 1) 379 psize -= psize % cs->sc_ileave; 380 381 if (psize == 0) { 382 #ifdef DEBUG 383 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 384 printf("%s: %s: size == 0\n", 385 cs->sc_xname, ci->ci_path); 386 #endif 387 error = ENODEV; 388 goto out; 389 } 390 391 if (minsize == 0 || psize < minsize) 392 minsize = psize; 393 ci->ci_size = psize; 394 cs->sc_size += psize; 395 } 396 397 /* 398 * Don't allow the interleave to be smaller than 399 * the biggest component sector. 400 */ 401 if ((cs->sc_ileave > 0) && 402 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 403 #ifdef DEBUG 404 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 405 printf("%s: interleave must be at least %d\n", 406 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 407 #endif 408 error = EINVAL; 409 goto out; 410 } 411 412 /* 413 * If uniform interleave is desired set all sizes to that of 414 * the smallest component. 415 */ 416 if (cs->sc_flags & CCDF_UNIFORM) { 417 for (ci = cs->sc_cinfo; 418 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 419 ci->ci_size = minsize; 420 421 cs->sc_size = cs->sc_nccdisks * minsize; 422 } 423 424 /* 425 * Construct the interleave table. 426 */ 427 ccdinterleave(cs); 428 429 /* 430 * Create pseudo-geometry based on 1MB cylinders. It's 431 * pretty close. 432 */ 433 ccg->ccg_secsize = DEV_BSIZE; 434 ccg->ccg_ntracks = 1; 435 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 436 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 437 438 /* 439 * Create thread to handle deferred I/O. 440 */ 441 cs->sc_zap = false; 442 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 443 cs, &cs->sc_thread, "%s", cs->sc_xname); 444 if (error) { 445 printf("ccdinit: can't create thread: %d\n", error); 446 goto out; 447 } 448 449 /* 450 * Only now that everything is set up can we enable the device. 451 */ 452 mutex_enter(cs->sc_iolock); 453 cs->sc_flags |= CCDF_INITED; 454 mutex_exit(cs->sc_iolock); 455 kmem_free(tmppath, MAXPATHLEN); 456 return (0); 457 458 out: 459 for (ix = 0; ix < path_alloced; ix++) { 460 kmem_free(cs->sc_cinfo[ix].ci_path, 461 cs->sc_cinfo[ix].ci_pathlen); 462 } 463 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 464 kmem_free(tmppath, MAXPATHLEN); 465 return (error); 466 } 467 468 static void 469 ccdinterleave(struct ccd_softc *cs) 470 { 471 struct ccdcinfo *ci, *smallci; 472 struct ccdiinfo *ii; 473 daddr_t bn, lbn; 474 int ix; 475 u_long size; 476 477 #ifdef DEBUG 478 if (ccddebug & CCDB_INIT) 479 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 480 #endif 481 /* 482 * Allocate an interleave table. 483 * Chances are this is too big, but we don't care. 484 */ 485 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 486 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 487 488 /* 489 * Trivial case: no interleave (actually interleave of disk size). 490 * Each table entry represents a single component in its entirety. 491 */ 492 if (cs->sc_ileave == 0) { 493 bn = 0; 494 ii = cs->sc_itable; 495 496 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 497 /* Allocate space for ii_index. */ 498 ii->ii_indexsz = sizeof(int); 499 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 500 ii->ii_ndisk = 1; 501 ii->ii_startblk = bn; 502 ii->ii_startoff = 0; 503 ii->ii_index[0] = ix; 504 bn += cs->sc_cinfo[ix].ci_size; 505 ii++; 506 } 507 ii->ii_ndisk = 0; 508 #ifdef DEBUG 509 if (ccddebug & CCDB_INIT) 510 printiinfo(cs->sc_itable); 511 #endif 512 return; 513 } 514 515 /* 516 * The following isn't fast or pretty; it doesn't have to be. 517 */ 518 size = 0; 519 bn = lbn = 0; 520 for (ii = cs->sc_itable; ; ii++) { 521 /* Allocate space for ii_index. */ 522 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 523 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 524 525 /* 526 * Locate the smallest of the remaining components 527 */ 528 smallci = NULL; 529 for (ci = cs->sc_cinfo; 530 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 531 if (ci->ci_size > size && 532 (smallci == NULL || 533 ci->ci_size < smallci->ci_size)) 534 smallci = ci; 535 536 /* 537 * Nobody left, all done 538 */ 539 if (smallci == NULL) { 540 ii->ii_ndisk = 0; 541 break; 542 } 543 544 /* 545 * Record starting logical block and component offset 546 */ 547 ii->ii_startblk = bn / cs->sc_ileave; 548 ii->ii_startoff = lbn; 549 550 /* 551 * Determine how many disks take part in this interleave 552 * and record their indices. 553 */ 554 ix = 0; 555 for (ci = cs->sc_cinfo; 556 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 557 if (ci->ci_size >= smallci->ci_size) 558 ii->ii_index[ix++] = ci - cs->sc_cinfo; 559 ii->ii_ndisk = ix; 560 bn += ix * (smallci->ci_size - size); 561 lbn = smallci->ci_size / cs->sc_ileave; 562 size = smallci->ci_size; 563 } 564 #ifdef DEBUG 565 if (ccddebug & CCDB_INIT) 566 printiinfo(cs->sc_itable); 567 #endif 568 } 569 570 /* ARGSUSED */ 571 static int 572 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 573 { 574 int unit = ccdunit(dev); 575 struct ccd_softc *cs; 576 struct disklabel *lp; 577 int error = 0, part, pmask; 578 579 #ifdef DEBUG 580 if (ccddebug & CCDB_FOLLOW) 581 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 582 #endif 583 if ((cs = ccdget(unit)) == NULL) 584 return ENXIO; 585 586 mutex_enter(&cs->sc_dvlock); 587 588 lp = cs->sc_dkdev.dk_label; 589 590 part = DISKPART(dev); 591 pmask = (1 << part); 592 593 /* 594 * If we're initialized, check to see if there are any other 595 * open partitions. If not, then it's safe to update 596 * the in-core disklabel. Only read the disklabel if it is 597 * not already valid. 598 */ 599 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 600 cs->sc_dkdev.dk_openmask == 0) 601 ccdgetdisklabel(dev); 602 603 /* Check that the partition exists. */ 604 if (part != RAW_PART) { 605 if (((cs->sc_flags & CCDF_INITED) == 0) || 606 ((part >= lp->d_npartitions) || 607 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 608 error = ENXIO; 609 goto done; 610 } 611 } 612 613 /* Prevent our unit from being unconfigured while open. */ 614 switch (fmt) { 615 case S_IFCHR: 616 cs->sc_dkdev.dk_copenmask |= pmask; 617 break; 618 619 case S_IFBLK: 620 cs->sc_dkdev.dk_bopenmask |= pmask; 621 break; 622 } 623 cs->sc_dkdev.dk_openmask = 624 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 625 626 done: 627 mutex_exit(&cs->sc_dvlock); 628 return (error); 629 } 630 631 /* ARGSUSED */ 632 static int 633 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 634 { 635 int unit = ccdunit(dev); 636 struct ccd_softc *cs; 637 int part; 638 639 #ifdef DEBUG 640 if (ccddebug & CCDB_FOLLOW) 641 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 642 #endif 643 644 if ((cs = ccdget(unit)) == NULL) 645 return ENXIO; 646 647 mutex_enter(&cs->sc_dvlock); 648 649 part = DISKPART(dev); 650 651 /* ...that much closer to allowing unconfiguration... */ 652 switch (fmt) { 653 case S_IFCHR: 654 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 655 break; 656 657 case S_IFBLK: 658 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 659 break; 660 } 661 cs->sc_dkdev.dk_openmask = 662 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 663 664 if (cs->sc_dkdev.dk_openmask == 0) { 665 if ((cs->sc_flags & CCDF_KLABEL) == 0) 666 cs->sc_flags &= ~CCDF_VLABEL; 667 } 668 669 mutex_exit(&cs->sc_dvlock); 670 return (0); 671 } 672 673 static bool 674 ccdbackoff(struct ccd_softc *cs) 675 { 676 677 /* XXX Arbitrary, should be a uvm call. */ 678 return uvmexp.free < (uvmexp.freemin >> 1) && 679 disk_isbusy(&cs->sc_dkdev); 680 } 681 682 static void 683 ccdthread(void *cookie) 684 { 685 struct ccd_softc *cs; 686 687 cs = cookie; 688 689 #ifdef DEBUG 690 if (ccddebug & CCDB_FOLLOW) 691 printf("ccdthread: hello\n"); 692 #endif 693 694 mutex_enter(cs->sc_iolock); 695 while (__predict_true(!cs->sc_zap)) { 696 if (bufq_peek(cs->sc_bufq) == NULL) { 697 /* Nothing to do. */ 698 cv_wait(&cs->sc_push, cs->sc_iolock); 699 continue; 700 } 701 if (ccdbackoff(cs)) { 702 /* Wait for memory to become available. */ 703 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 704 continue; 705 } 706 #ifdef DEBUG 707 if (ccddebug & CCDB_FOLLOW) 708 printf("ccdthread: dispatching I/O\n"); 709 #endif 710 ccdstart(cs); 711 mutex_enter(cs->sc_iolock); 712 } 713 cs->sc_thread = NULL; 714 mutex_exit(cs->sc_iolock); 715 #ifdef DEBUG 716 if (ccddebug & CCDB_FOLLOW) 717 printf("ccdthread: goodbye\n"); 718 #endif 719 kthread_exit(0); 720 } 721 722 static void 723 ccdstrategy(struct buf *bp) 724 { 725 int unit = ccdunit(bp->b_dev); 726 struct ccd_softc *cs; 727 if ((cs = ccdget(unit)) == NULL) 728 return; 729 730 /* Must be open or reading label. */ 731 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 732 (cs->sc_flags & CCDF_RLABEL) != 0); 733 734 mutex_enter(cs->sc_iolock); 735 /* Synchronize with device init/uninit. */ 736 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 737 mutex_exit(cs->sc_iolock); 738 #ifdef DEBUG 739 if (ccddebug & CCDB_FOLLOW) 740 printf("ccdstrategy: unit %d: not inited\n", unit); 741 #endif 742 bp->b_error = ENXIO; 743 bp->b_resid = bp->b_bcount; 744 biodone(bp); 745 return; 746 } 747 748 /* Defer to thread if system is low on memory. */ 749 bufq_put(cs->sc_bufq, bp); 750 if (__predict_false(ccdbackoff(cs))) { 751 mutex_exit(cs->sc_iolock); 752 #ifdef DEBUG 753 if (ccddebug & CCDB_FOLLOW) 754 printf("ccdstrategy: holding off on I/O\n"); 755 #endif 756 return; 757 } 758 ccdstart(cs); 759 } 760 761 static void 762 ccdstart(struct ccd_softc *cs) 763 { 764 daddr_t blkno; 765 int wlabel; 766 struct disklabel *lp; 767 long bcount, rcount; 768 struct ccdbuf *cbp; 769 char *addr; 770 daddr_t bn; 771 vnode_t *vp; 772 buf_t *bp; 773 774 KASSERT(mutex_owned(cs->sc_iolock)); 775 776 disk_busy(&cs->sc_dkdev); 777 bp = bufq_get(cs->sc_bufq); 778 KASSERT(bp != NULL); 779 780 #ifdef DEBUG 781 if (ccddebug & CCDB_FOLLOW) 782 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 783 #endif 784 785 /* If it's a nil transfer, wake up the top half now. */ 786 if (bp->b_bcount == 0) 787 goto done; 788 789 lp = cs->sc_dkdev.dk_label; 790 791 /* 792 * Do bounds checking and adjust transfer. If there's an 793 * error, the bounds check will flag that for us. Convert 794 * the partition relative block number to an absolute. 795 */ 796 blkno = bp->b_blkno; 797 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 798 if (DISKPART(bp->b_dev) != RAW_PART) { 799 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 800 goto done; 801 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 802 } 803 mutex_exit(cs->sc_iolock); 804 bp->b_rawblkno = blkno; 805 806 /* Allocate the component buffers and start I/O! */ 807 bp->b_resid = bp->b_bcount; 808 bn = bp->b_rawblkno; 809 addr = bp->b_data; 810 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 811 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 812 rcount = cbp->cb_buf.b_bcount; 813 bn += btodb(rcount); 814 addr += rcount; 815 vp = cbp->cb_buf.b_vp; 816 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 817 mutex_enter(vp->v_interlock); 818 vp->v_numoutput++; 819 mutex_exit(vp->v_interlock); 820 } 821 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 822 } 823 return; 824 825 done: 826 disk_unbusy(&cs->sc_dkdev, 0, 0); 827 cv_broadcast(&cs->sc_stop); 828 cv_broadcast(&cs->sc_push); 829 mutex_exit(cs->sc_iolock); 830 bp->b_resid = bp->b_bcount; 831 biodone(bp); 832 } 833 834 /* 835 * Build a component buffer header. 836 */ 837 static struct ccdbuf * 838 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 839 long bcount) 840 { 841 struct ccdcinfo *ci; 842 struct ccdbuf *cbp; 843 daddr_t cbn, cboff; 844 u_int64_t cbc; 845 int ccdisk; 846 847 #ifdef DEBUG 848 if (ccddebug & CCDB_IO) 849 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 850 cs, bp, bn, addr, bcount); 851 #endif 852 /* 853 * Determine which component bn falls in. 854 */ 855 cbn = bn; 856 cboff = 0; 857 858 /* 859 * Serially concatenated 860 */ 861 if (cs->sc_ileave == 0) { 862 daddr_t sblk; 863 864 sblk = 0; 865 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 866 cbn >= sblk + ci->ci_size; 867 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 868 sblk += ci->ci_size; 869 cbn -= sblk; 870 } 871 /* 872 * Interleaved 873 */ 874 else { 875 struct ccdiinfo *ii; 876 int off; 877 878 cboff = cbn % cs->sc_ileave; 879 cbn /= cs->sc_ileave; 880 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 881 if (ii->ii_startblk > cbn) 882 break; 883 ii--; 884 off = cbn - ii->ii_startblk; 885 if (ii->ii_ndisk == 1) { 886 ccdisk = ii->ii_index[0]; 887 cbn = ii->ii_startoff + off; 888 } else { 889 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 890 cbn = ii->ii_startoff + off / ii->ii_ndisk; 891 } 892 cbn *= cs->sc_ileave; 893 ci = &cs->sc_cinfo[ccdisk]; 894 } 895 896 /* 897 * Fill in the component buf structure. 898 */ 899 cbp = CCD_GETBUF(); 900 KASSERT(cbp != NULL); 901 buf_init(&cbp->cb_buf); 902 cbp->cb_buf.b_flags = bp->b_flags; 903 cbp->cb_buf.b_oflags = bp->b_oflags; 904 cbp->cb_buf.b_cflags = bp->b_cflags; 905 cbp->cb_buf.b_iodone = ccdiodone; 906 cbp->cb_buf.b_proc = bp->b_proc; 907 cbp->cb_buf.b_dev = ci->ci_dev; 908 cbp->cb_buf.b_blkno = cbn + cboff; 909 cbp->cb_buf.b_data = addr; 910 cbp->cb_buf.b_vp = ci->ci_vp; 911 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 912 if (cs->sc_ileave == 0) 913 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 914 else 915 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 916 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 917 918 /* 919 * context for ccdiodone 920 */ 921 cbp->cb_obp = bp; 922 cbp->cb_sc = cs; 923 cbp->cb_comp = ccdisk; 924 925 BIO_COPYPRIO(&cbp->cb_buf, bp); 926 927 #ifdef DEBUG 928 if (ccddebug & CCDB_IO) 929 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 930 " bcnt %d\n", 931 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 932 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 933 cbp->cb_buf.b_bcount); 934 #endif 935 936 return (cbp); 937 } 938 939 /* 940 * Called at interrupt time. 941 * Mark the component as done and if all components are done, 942 * take a ccd interrupt. 943 */ 944 static void 945 ccdiodone(struct buf *vbp) 946 { 947 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 948 struct buf *bp = cbp->cb_obp; 949 struct ccd_softc *cs = cbp->cb_sc; 950 int count; 951 952 #ifdef DEBUG 953 if (ccddebug & CCDB_FOLLOW) 954 printf("ccdiodone(%p)\n", cbp); 955 if (ccddebug & CCDB_IO) { 956 printf("ccdiodone: bp %p bcount %d resid %d\n", 957 bp, bp->b_bcount, bp->b_resid); 958 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 959 " bcnt %d\n", 960 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 961 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 962 cbp->cb_buf.b_bcount); 963 } 964 #endif 965 966 if (cbp->cb_buf.b_error != 0) { 967 bp->b_error = cbp->cb_buf.b_error; 968 printf("%s: error %d on component %d\n", 969 cs->sc_xname, bp->b_error, cbp->cb_comp); 970 } 971 count = cbp->cb_buf.b_bcount; 972 buf_destroy(&cbp->cb_buf); 973 CCD_PUTBUF(cbp); 974 975 /* 976 * If all done, "interrupt". 977 */ 978 mutex_enter(cs->sc_iolock); 979 bp->b_resid -= count; 980 if (bp->b_resid < 0) 981 panic("ccdiodone: count"); 982 if (bp->b_resid == 0) { 983 /* 984 * Request is done for better or worse, wakeup the top half. 985 */ 986 if (bp->b_error != 0) 987 bp->b_resid = bp->b_bcount; 988 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 989 (bp->b_flags & B_READ)); 990 if (!disk_isbusy(&cs->sc_dkdev)) { 991 if (bufq_peek(cs->sc_bufq) != NULL) { 992 cv_broadcast(&cs->sc_push); 993 } 994 cv_broadcast(&cs->sc_stop); 995 } 996 mutex_exit(cs->sc_iolock); 997 biodone(bp); 998 } else 999 mutex_exit(cs->sc_iolock); 1000 } 1001 1002 /* ARGSUSED */ 1003 static int 1004 ccdread(dev_t dev, struct uio *uio, int flags) 1005 { 1006 int unit = ccdunit(dev); 1007 struct ccd_softc *cs; 1008 1009 #ifdef DEBUG 1010 if (ccddebug & CCDB_FOLLOW) 1011 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1012 #endif 1013 if ((cs = ccdget(unit)) == NULL) 1014 return 0; 1015 1016 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1017 if ((cs->sc_flags & CCDF_INITED) == 0) 1018 return (ENXIO); 1019 1020 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1021 } 1022 1023 /* ARGSUSED */ 1024 static int 1025 ccdwrite(dev_t dev, struct uio *uio, int flags) 1026 { 1027 int unit = ccdunit(dev); 1028 struct ccd_softc *cs; 1029 1030 #ifdef DEBUG 1031 if (ccddebug & CCDB_FOLLOW) 1032 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1033 #endif 1034 if ((cs = ccdget(unit)) == NULL) 1035 return ENOENT; 1036 1037 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1038 if ((cs->sc_flags & CCDF_INITED) == 0) 1039 return (ENXIO); 1040 1041 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1042 } 1043 1044 static int 1045 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1046 { 1047 int unit = ccdunit(dev); 1048 int i, j, lookedup = 0, error = 0; 1049 int part, pmask; 1050 struct ccd_softc *cs; 1051 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1052 kauth_cred_t uc; 1053 char **cpp; 1054 struct pathbuf *pb; 1055 struct vnode **vpp; 1056 #ifdef __HAVE_OLD_DISKLABEL 1057 struct disklabel newlabel; 1058 #endif 1059 1060 if ((cs = ccdget(unit)) == NULL) 1061 return ENOENT; 1062 uc = kauth_cred_get(); 1063 1064 /* Must be open for writes for these commands... */ 1065 switch (cmd) { 1066 case CCDIOCSET: 1067 case CCDIOCCLR: 1068 case DIOCSDINFO: 1069 case DIOCWDINFO: 1070 #ifdef __HAVE_OLD_DISKLABEL 1071 case ODIOCSDINFO: 1072 case ODIOCWDINFO: 1073 #endif 1074 case DIOCKLABEL: 1075 case DIOCWLABEL: 1076 if ((flag & FWRITE) == 0) 1077 return (EBADF); 1078 } 1079 1080 mutex_enter(&cs->sc_dvlock); 1081 1082 /* Must be initialized for these... */ 1083 switch (cmd) { 1084 case CCDIOCCLR: 1085 case DIOCGDINFO: 1086 case DIOCCACHESYNC: 1087 case DIOCSDINFO: 1088 case DIOCWDINFO: 1089 case DIOCGPART: 1090 case DIOCWLABEL: 1091 case DIOCKLABEL: 1092 case DIOCGDEFLABEL: 1093 #ifdef __HAVE_OLD_DISKLABEL 1094 case ODIOCGDINFO: 1095 case ODIOCSDINFO: 1096 case ODIOCWDINFO: 1097 case ODIOCGDEFLABEL: 1098 #endif 1099 if ((cs->sc_flags & CCDF_INITED) == 0) { 1100 error = ENXIO; 1101 goto out; 1102 } 1103 } 1104 1105 switch (cmd) { 1106 case CCDIOCSET: 1107 if (cs->sc_flags & CCDF_INITED) { 1108 error = EBUSY; 1109 goto out; 1110 } 1111 1112 /* Validate the flags. */ 1113 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1114 error = EINVAL; 1115 goto out; 1116 } 1117 1118 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1119 ccio->ccio_ndisks == 0) { 1120 error = EINVAL; 1121 goto out; 1122 } 1123 1124 /* Fill in some important bits. */ 1125 cs->sc_ileave = ccio->ccio_ileave; 1126 cs->sc_nccdisks = ccio->ccio_ndisks; 1127 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1128 1129 /* 1130 * Allocate space for and copy in the array of 1131 * componet pathnames and device numbers. 1132 */ 1133 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1134 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1135 error = copyin(ccio->ccio_disks, cpp, 1136 ccio->ccio_ndisks * sizeof(*cpp)); 1137 if (error) { 1138 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1139 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1140 goto out; 1141 } 1142 1143 #ifdef DEBUG 1144 if (ccddebug & CCDB_INIT) 1145 for (i = 0; i < ccio->ccio_ndisks; ++i) 1146 printf("ccdioctl: component %d: %p\n", 1147 i, cpp[i]); 1148 #endif 1149 1150 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1151 #ifdef DEBUG 1152 if (ccddebug & CCDB_INIT) 1153 printf("ccdioctl: lookedup = %d\n", lookedup); 1154 #endif 1155 error = pathbuf_copyin(cpp[i], &pb); 1156 if (error == 0) { 1157 error = dk_lookup(pb, l, &vpp[i]); 1158 } 1159 pathbuf_destroy(pb); 1160 if (error != 0) { 1161 for (j = 0; j < lookedup; ++j) 1162 (void)vn_close(vpp[j], FREAD|FWRITE, 1163 uc); 1164 kmem_free(vpp, ccio->ccio_ndisks * 1165 sizeof(*vpp)); 1166 kmem_free(cpp, ccio->ccio_ndisks * 1167 sizeof(*cpp)); 1168 goto out; 1169 } 1170 ++lookedup; 1171 } 1172 1173 /* Attach the disk. */ 1174 disk_attach(&cs->sc_dkdev); 1175 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1176 1177 /* 1178 * Initialize the ccd. Fills in the softc for us. 1179 */ 1180 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1181 for (j = 0; j < lookedup; ++j) 1182 (void)vn_close(vpp[j], FREAD|FWRITE, 1183 uc); 1184 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1185 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1186 disk_detach(&cs->sc_dkdev); 1187 bufq_free(cs->sc_bufq); 1188 goto out; 1189 } 1190 1191 /* We can free the temporary variables now. */ 1192 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1193 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1194 1195 /* 1196 * The ccd has been successfully initialized, so 1197 * we can place it into the array. Don't try to 1198 * read the disklabel until the disk has been attached, 1199 * because space for the disklabel is allocated 1200 * in disk_attach(); 1201 */ 1202 ccio->ccio_unit = unit; 1203 ccio->ccio_size = cs->sc_size; 1204 1205 /* Try and read the disklabel. */ 1206 ccdgetdisklabel(dev); 1207 break; 1208 1209 case CCDIOCCLR: 1210 /* 1211 * Don't unconfigure if any other partitions are open 1212 * or if both the character and block flavors of this 1213 * partition are open. 1214 */ 1215 part = DISKPART(dev); 1216 pmask = (1 << part); 1217 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1218 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1219 (cs->sc_dkdev.dk_copenmask & pmask))) { 1220 error = EBUSY; 1221 goto out; 1222 } 1223 1224 /* Stop new I/O, wait for in-flight I/O to complete. */ 1225 mutex_enter(cs->sc_iolock); 1226 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1227 cs->sc_zap = true; 1228 while (disk_isbusy(&cs->sc_dkdev) || 1229 bufq_peek(cs->sc_bufq) != NULL || 1230 cs->sc_thread != NULL) { 1231 cv_broadcast(&cs->sc_push); 1232 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1233 } 1234 mutex_exit(cs->sc_iolock); 1235 1236 /* 1237 * Free ccd_softc information and clear entry. 1238 */ 1239 1240 /* Close the components and free their pathnames. */ 1241 for (i = 0; i < cs->sc_nccdisks; ++i) { 1242 /* 1243 * XXX: this close could potentially fail and 1244 * cause Bad Things. Maybe we need to force 1245 * the close to happen? 1246 */ 1247 #ifdef DEBUG 1248 if (ccddebug & CCDB_VNODE) 1249 vprint("CCDIOCCLR: vnode info", 1250 cs->sc_cinfo[i].ci_vp); 1251 #endif 1252 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1253 uc); 1254 kmem_free(cs->sc_cinfo[i].ci_path, 1255 cs->sc_cinfo[i].ci_pathlen); 1256 } 1257 1258 /* Free interleave index. */ 1259 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1260 kmem_free(cs->sc_itable[i].ii_index, 1261 cs->sc_itable[i].ii_indexsz); 1262 } 1263 1264 /* Free component info and interleave table. */ 1265 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1266 sizeof(struct ccdcinfo)); 1267 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1268 sizeof(struct ccdiinfo)); 1269 1270 /* Detatch the disk. */ 1271 disk_detach(&cs->sc_dkdev); 1272 bufq_free(cs->sc_bufq); 1273 ccdput(cs); 1274 break; 1275 1276 case DIOCGDINFO: 1277 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label); 1278 break; 1279 1280 #ifdef __HAVE_OLD_DISKLABEL 1281 case ODIOCGDINFO: 1282 newlabel = *(cs->sc_dkdev.dk_label); 1283 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1284 return ENOTTY; 1285 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1286 break; 1287 #endif 1288 1289 case DIOCGPART: 1290 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label; 1291 ((struct partinfo *)data)->part = 1292 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1293 break; 1294 1295 case DIOCCACHESYNC: 1296 /* 1297 * XXX Do we really need to care about having a writable 1298 * file descriptor here? 1299 */ 1300 if ((flag & FWRITE) == 0) 1301 return (EBADF); 1302 1303 /* 1304 * We pass this call down to all components and report 1305 * the first error we encounter. 1306 */ 1307 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1308 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1309 flag, uc); 1310 if (j != 0 && error == 0) 1311 error = j; 1312 } 1313 break; 1314 1315 case DIOCWDINFO: 1316 case DIOCSDINFO: 1317 #ifdef __HAVE_OLD_DISKLABEL 1318 case ODIOCWDINFO: 1319 case ODIOCSDINFO: 1320 #endif 1321 { 1322 struct disklabel *lp; 1323 #ifdef __HAVE_OLD_DISKLABEL 1324 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1325 memset(&newlabel, 0, sizeof newlabel); 1326 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1327 lp = &newlabel; 1328 } else 1329 #endif 1330 lp = (struct disklabel *)data; 1331 1332 cs->sc_flags |= CCDF_LABELLING; 1333 1334 error = setdisklabel(cs->sc_dkdev.dk_label, 1335 lp, 0, cs->sc_dkdev.dk_cpulabel); 1336 if (error == 0) { 1337 if (cmd == DIOCWDINFO 1338 #ifdef __HAVE_OLD_DISKLABEL 1339 || cmd == ODIOCWDINFO 1340 #endif 1341 ) 1342 error = writedisklabel(CCDLABELDEV(dev), 1343 ccdstrategy, cs->sc_dkdev.dk_label, 1344 cs->sc_dkdev.dk_cpulabel); 1345 } 1346 1347 cs->sc_flags &= ~CCDF_LABELLING; 1348 break; 1349 } 1350 1351 case DIOCKLABEL: 1352 if (*(int *)data != 0) 1353 cs->sc_flags |= CCDF_KLABEL; 1354 else 1355 cs->sc_flags &= ~CCDF_KLABEL; 1356 break; 1357 1358 case DIOCWLABEL: 1359 if (*(int *)data != 0) 1360 cs->sc_flags |= CCDF_WLABEL; 1361 else 1362 cs->sc_flags &= ~CCDF_WLABEL; 1363 break; 1364 1365 case DIOCGDEFLABEL: 1366 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1367 break; 1368 1369 #ifdef __HAVE_OLD_DISKLABEL 1370 case ODIOCGDEFLABEL: 1371 ccdgetdefaultlabel(cs, &newlabel); 1372 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1373 return ENOTTY; 1374 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1375 break; 1376 #endif 1377 1378 default: 1379 error = ENOTTY; 1380 } 1381 1382 out: 1383 mutex_exit(&cs->sc_dvlock); 1384 return (error); 1385 } 1386 1387 static int 1388 ccdsize(dev_t dev) 1389 { 1390 struct ccd_softc *cs; 1391 struct disklabel *lp; 1392 int part, unit, omask, size; 1393 1394 unit = ccdunit(dev); 1395 if ((cs = ccdget(unit)) == NULL) 1396 return -1; 1397 1398 if ((cs->sc_flags & CCDF_INITED) == 0) 1399 return (-1); 1400 1401 part = DISKPART(dev); 1402 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1403 lp = cs->sc_dkdev.dk_label; 1404 1405 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1406 return (-1); 1407 1408 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1409 size = -1; 1410 else 1411 size = lp->d_partitions[part].p_size * 1412 (lp->d_secsize / DEV_BSIZE); 1413 1414 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1415 return (-1); 1416 1417 return (size); 1418 } 1419 1420 static void 1421 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1422 { 1423 struct ccdgeom *ccg = &cs->sc_geom; 1424 1425 memset(lp, 0, sizeof(*lp)); 1426 1427 lp->d_secperunit = cs->sc_size; 1428 lp->d_secsize = ccg->ccg_secsize; 1429 lp->d_nsectors = ccg->ccg_nsectors; 1430 lp->d_ntracks = ccg->ccg_ntracks; 1431 lp->d_ncylinders = ccg->ccg_ncylinders; 1432 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1433 1434 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1435 lp->d_type = DTYPE_CCD; 1436 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1437 lp->d_rpm = 3600; 1438 lp->d_interleave = 1; 1439 lp->d_flags = 0; 1440 1441 lp->d_partitions[RAW_PART].p_offset = 0; 1442 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1443 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1444 lp->d_npartitions = RAW_PART + 1; 1445 1446 lp->d_magic = DISKMAGIC; 1447 lp->d_magic2 = DISKMAGIC; 1448 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1449 } 1450 1451 /* 1452 * Read the disklabel from the ccd. If one is not present, fake one 1453 * up. 1454 */ 1455 static void 1456 ccdgetdisklabel(dev_t dev) 1457 { 1458 int unit = ccdunit(dev); 1459 struct ccd_softc *cs; 1460 const char *errstring; 1461 struct disklabel *lp; 1462 struct cpu_disklabel *clp; 1463 1464 if ((cs = ccdget(unit)) == NULL) 1465 return; 1466 lp = cs->sc_dkdev.dk_label; 1467 clp = cs->sc_dkdev.dk_cpulabel; 1468 KASSERT(mutex_owned(&cs->sc_dvlock)); 1469 1470 memset(clp, 0, sizeof(*clp)); 1471 1472 ccdgetdefaultlabel(cs, lp); 1473 1474 /* 1475 * Call the generic disklabel extraction routine. 1476 */ 1477 cs->sc_flags |= CCDF_RLABEL; 1478 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1479 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1480 else 1481 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1482 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1483 if (errstring) 1484 ccdmakedisklabel(cs); 1485 else { 1486 int i; 1487 struct partition *pp; 1488 1489 /* 1490 * Sanity check whether the found disklabel is valid. 1491 * 1492 * This is necessary since total size of ccd may vary 1493 * when an interleave is changed even though exactly 1494 * same componets are used, and old disklabel may used 1495 * if that is found. 1496 */ 1497 if (lp->d_secperunit != cs->sc_size) 1498 printf("WARNING: %s: " 1499 "total sector size in disklabel (%d) != " 1500 "the size of ccd (%lu)\n", cs->sc_xname, 1501 lp->d_secperunit, (u_long)cs->sc_size); 1502 for (i = 0; i < lp->d_npartitions; i++) { 1503 pp = &lp->d_partitions[i]; 1504 if (pp->p_offset + pp->p_size > cs->sc_size) 1505 printf("WARNING: %s: end of partition `%c' " 1506 "exceeds the size of ccd (%lu)\n", 1507 cs->sc_xname, 'a' + i, (u_long)cs->sc_size); 1508 } 1509 } 1510 1511 #ifdef DEBUG 1512 /* It's actually extremely common to have unlabeled ccds. */ 1513 if (ccddebug & CCDB_LABEL) 1514 if (errstring != NULL) 1515 printf("%s: %s\n", cs->sc_xname, errstring); 1516 #endif 1517 1518 /* In-core label now valid. */ 1519 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1520 } 1521 1522 /* 1523 * Take care of things one might want to take care of in the event 1524 * that a disklabel isn't present. 1525 */ 1526 static void 1527 ccdmakedisklabel(struct ccd_softc *cs) 1528 { 1529 struct disklabel *lp = cs->sc_dkdev.dk_label; 1530 1531 /* 1532 * For historical reasons, if there's no disklabel present 1533 * the raw partition must be marked FS_BSDFFS. 1534 */ 1535 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1536 1537 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1538 1539 lp->d_checksum = dkcksum(lp); 1540 } 1541 1542 #ifdef DEBUG 1543 static void 1544 printiinfo(struct ccdiinfo *ii) 1545 { 1546 int ix, i; 1547 1548 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1549 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1550 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1551 for (i = 0; i < ii->ii_ndisk; i++) 1552 printf(" %d", ii->ii_index[i]); 1553 printf("\n"); 1554 } 1555 } 1556 #endif 1557 1558 MODULE(MODULE_CLASS_DRIVER, ccd, NULL); 1559 1560 static int 1561 ccd_modcmd(modcmd_t cmd, void *arg) 1562 { 1563 int error = 0; 1564 #ifdef _MODULE 1565 int bmajor = -1, cmajor = -1; 1566 #endif 1567 1568 1569 switch (cmd) { 1570 case MODULE_CMD_INIT: 1571 #ifdef _MODULE 1572 ccdattach(4); 1573 1574 return devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1575 &ccd_cdevsw, &cmajor); 1576 #endif 1577 break; 1578 1579 case MODULE_CMD_FINI: 1580 #ifdef _MODULE 1581 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1582 #endif 1583 break; 1584 1585 case MODULE_CMD_STAT: 1586 return ENOTTY; 1587 1588 default: 1589 return ENOTTY; 1590 } 1591 1592 return error; 1593 } 1594 1595 static int 1596 ccd_units_sysctl(SYSCTLFN_ARGS) 1597 { 1598 struct sysctlnode node; 1599 struct ccd_softc *sc; 1600 int error, i, nccd, *units; 1601 size_t size; 1602 1603 nccd = 0; 1604 mutex_enter(&ccd_lock); 1605 LIST_FOREACH(sc, &ccds, sc_link) 1606 nccd++; 1607 mutex_exit(&ccd_lock); 1608 1609 if (nccd != 0) { 1610 size = nccd * sizeof(*units); 1611 units = kmem_zalloc(size, KM_SLEEP); 1612 if (units == NULL) 1613 return ENOMEM; 1614 1615 i = 0; 1616 mutex_enter(&ccd_lock); 1617 LIST_FOREACH(sc, &ccds, sc_link) { 1618 if (i >= nccd) 1619 break; 1620 units[i] = sc->sc_unit; 1621 } 1622 mutex_exit(&ccd_lock); 1623 } else { 1624 units = NULL; 1625 size = 0; 1626 } 1627 1628 node = *rnode; 1629 node.sysctl_data = units; 1630 node.sysctl_size = size; 1631 1632 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1633 if (units) 1634 kmem_free(units, size); 1635 return error; 1636 } 1637 1638 static int 1639 ccd_info_sysctl(SYSCTLFN_ARGS) 1640 { 1641 struct sysctlnode node; 1642 struct ccddiskinfo ccd; 1643 struct ccd_softc *sc; 1644 int unit; 1645 1646 if (newp == NULL || newlen != sizeof(int)) 1647 return EINVAL; 1648 1649 unit = *(const int *)newp; 1650 newp = NULL; 1651 newlen = 0; 1652 ccd.ccd_ndisks = ~0; 1653 mutex_enter(&ccd_lock); 1654 LIST_FOREACH(sc, &ccds, sc_link) { 1655 if (sc->sc_unit == unit) { 1656 ccd.ccd_ileave = sc->sc_ileave; 1657 ccd.ccd_size = sc->sc_size; 1658 ccd.ccd_ndisks = sc->sc_nccdisks; 1659 ccd.ccd_flags = sc->sc_flags; 1660 break; 1661 } 1662 } 1663 mutex_exit(&ccd_lock); 1664 1665 if (ccd.ccd_ndisks == ~0) 1666 return ENOENT; 1667 1668 node = *rnode; 1669 node.sysctl_data = &ccd; 1670 node.sysctl_size = sizeof(ccd); 1671 1672 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1673 } 1674 1675 static int 1676 ccd_components_sysctl(SYSCTLFN_ARGS) 1677 { 1678 struct sysctlnode node; 1679 int error, unit; 1680 size_t size; 1681 char *names, *p, *ep; 1682 struct ccd_softc *sc; 1683 1684 if (newp == NULL || newlen != sizeof(int)) 1685 return EINVAL; 1686 1687 size = 0; 1688 unit = *(const int *)newp; 1689 newp = NULL; 1690 newlen = 0; 1691 mutex_enter(&ccd_lock); 1692 LIST_FOREACH(sc, &ccds, sc_link) 1693 if (sc->sc_unit == unit) { 1694 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1695 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1696 break; 1697 } 1698 mutex_exit(&ccd_lock); 1699 1700 if (size == 0) 1701 return ENOENT; 1702 names = kmem_zalloc(size, KM_SLEEP); 1703 if (names == NULL) 1704 return ENOMEM; 1705 1706 p = names; 1707 ep = names + size; 1708 mutex_enter(&ccd_lock); 1709 LIST_FOREACH(sc, &ccds, sc_link) 1710 if (sc->sc_unit == unit) { 1711 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1712 char *d = sc->sc_cinfo[i].ci_path; 1713 while (p < ep && (*p++ = *d++) != '\0') 1714 continue; 1715 } 1716 break; 1717 } 1718 mutex_exit(&ccd_lock); 1719 1720 node = *rnode; 1721 node.sysctl_data = names; 1722 node.sysctl_size = ep - names; 1723 1724 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1725 kmem_free(names, size); 1726 return error; 1727 } 1728 1729 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1730 { 1731 const struct sysctlnode *node = NULL; 1732 1733 /* Make sure net.key exists before we register nodes underneath it. */ 1734 sysctl_createv(clog, 0, NULL, NULL, 1735 CTLFLAG_PERMANENT, 1736 CTLTYPE_NODE, "kern", NULL, 1737 NULL, 0, NULL, 0, 1738 CTL_KERN, CTL_EOL); 1739 sysctl_createv(clog, 0, NULL, &node, 1740 CTLFLAG_PERMANENT, 1741 CTLTYPE_NODE, "ccd", 1742 SYSCTL_DESCR("ConCatenated Disk state"), 1743 NULL, 0, NULL, 0, 1744 CTL_KERN, CTL_CREATE, CTL_EOL); 1745 1746 if (node == NULL) 1747 return; 1748 1749 sysctl_createv(clog, 0, &node, NULL, 1750 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1751 CTLTYPE_STRUCT, "units", 1752 SYSCTL_DESCR("List of ccd unit numbers"), 1753 ccd_units_sysctl, 0, NULL, 0, 1754 CTL_CREATE, CTL_EOL); 1755 sysctl_createv(clog, 0, &node, NULL, 1756 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1757 CTLTYPE_STRUCT, "info", 1758 SYSCTL_DESCR("Information about a CCD unit"), 1759 ccd_info_sysctl, 0, NULL, 0, 1760 CTL_CREATE, CTL_EOL); 1761 sysctl_createv(clog, 0, &node, NULL, 1762 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1763 CTLTYPE_STRUCT, "components", 1764 SYSCTL_DESCR("Information about CCD components"), 1765 ccd_components_sysctl, 0, NULL, 0, 1766 CTL_CREATE, CTL_EOL); 1767 } 1768