1 /* $NetBSD: ccd.c,v 1.152 2014/08/16 19:27:27 sborrill Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.152 2014/08/16 19:27:27 sborrill Exp $"); 92 93 #if defined(_KERNEL_OPT) 94 #include "opt_compat_netbsd.h" 95 #endif 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/kernel.h> 100 #include <sys/proc.h> 101 #include <sys/errno.h> 102 #include <sys/buf.h> 103 #include <sys/kmem.h> 104 #include <sys/pool.h> 105 #include <sys/module.h> 106 #include <sys/namei.h> 107 #include <sys/stat.h> 108 #include <sys/ioctl.h> 109 #include <sys/disklabel.h> 110 #include <sys/device.h> 111 #include <sys/disk.h> 112 #include <sys/syslog.h> 113 #include <sys/fcntl.h> 114 #include <sys/vnode.h> 115 #include <sys/conf.h> 116 #include <sys/mutex.h> 117 #include <sys/queue.h> 118 #include <sys/kauth.h> 119 #include <sys/kthread.h> 120 #include <sys/bufq.h> 121 #include <sys/sysctl.h> 122 123 #include <uvm/uvm_extern.h> 124 125 #include <dev/ccdvar.h> 126 #include <dev/dkvar.h> 127 128 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 129 130 #if defined(CCDDEBUG) && !defined(DEBUG) 131 #define DEBUG 132 #endif 133 134 #ifdef DEBUG 135 #define CCDB_FOLLOW 0x01 136 #define CCDB_INIT 0x02 137 #define CCDB_IO 0x04 138 #define CCDB_LABEL 0x08 139 #define CCDB_VNODE 0x10 140 int ccddebug = 0x00; 141 #endif 142 143 #define ccdunit(x) DISKUNIT(x) 144 145 struct ccdbuf { 146 struct buf cb_buf; /* new I/O buf */ 147 struct buf *cb_obp; /* ptr. to original I/O buf */ 148 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 149 int cb_comp; /* target component */ 150 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 151 }; 152 153 /* component buffer pool */ 154 static pool_cache_t ccd_cache; 155 156 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 157 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 158 159 #define CCDLABELDEV(dev) \ 160 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 161 162 /* called by main() at boot time */ 163 void ccdattach(int); 164 165 /* called by biodone() at interrupt time */ 166 static void ccdiodone(struct buf *); 167 168 static void ccdinterleave(struct ccd_softc *); 169 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 170 struct lwp *); 171 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 172 daddr_t, void *, long); 173 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 174 static void ccdgetdisklabel(dev_t); 175 static void ccdmakedisklabel(struct ccd_softc *); 176 static void ccdstart(struct ccd_softc *); 177 static void ccdthread(void *); 178 179 static dev_type_open(ccdopen); 180 static dev_type_close(ccdclose); 181 static dev_type_read(ccdread); 182 static dev_type_write(ccdwrite); 183 static dev_type_ioctl(ccdioctl); 184 static dev_type_strategy(ccdstrategy); 185 static dev_type_size(ccdsize); 186 187 const struct bdevsw ccd_bdevsw = { 188 .d_open = ccdopen, 189 .d_close = ccdclose, 190 .d_strategy = ccdstrategy, 191 .d_ioctl = ccdioctl, 192 .d_dump = nodump, 193 .d_psize = ccdsize, 194 .d_discard = nodiscard, 195 .d_flag = D_DISK | D_MPSAFE 196 }; 197 198 const struct cdevsw ccd_cdevsw = { 199 .d_open = ccdopen, 200 .d_close = ccdclose, 201 .d_read = ccdread, 202 .d_write = ccdwrite, 203 .d_ioctl = ccdioctl, 204 .d_stop = nostop, 205 .d_tty = notty, 206 .d_poll = nopoll, 207 .d_mmap = nommap, 208 .d_kqfilter = nokqfilter, 209 .d_discard = nodiscard, 210 .d_flag = D_DISK | D_MPSAFE 211 }; 212 213 #ifdef DEBUG 214 static void printiinfo(struct ccdiinfo *); 215 #endif 216 217 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 218 static kmutex_t ccd_lock; 219 220 static struct ccd_softc * 221 ccdcreate(int unit) { 222 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 223 if (sc == NULL) { 224 #ifdef DIAGNOSTIC 225 printf("%s: out of memory\n", __func__); 226 #endif 227 return NULL; 228 } 229 /* Initialize per-softc structures. */ 230 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 231 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 232 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 233 cv_init(&sc->sc_stop, "ccdstop"); 234 cv_init(&sc->sc_push, "ccdthr"); 235 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 236 return sc; 237 } 238 239 static void 240 ccddestroy(struct ccd_softc *sc) { 241 mutex_obj_free(sc->sc_iolock); 242 mutex_exit(&sc->sc_dvlock); 243 mutex_destroy(&sc->sc_dvlock); 244 cv_destroy(&sc->sc_stop); 245 cv_destroy(&sc->sc_push); 246 disk_destroy(&sc->sc_dkdev); 247 kmem_free(sc, sizeof(*sc)); 248 } 249 250 static struct ccd_softc * 251 ccdget(int unit) { 252 struct ccd_softc *sc; 253 if (unit < 0) { 254 #ifdef DIAGNOSTIC 255 panic("%s: unit %d!", __func__, unit); 256 #endif 257 return NULL; 258 } 259 mutex_enter(&ccd_lock); 260 LIST_FOREACH(sc, &ccds, sc_link) { 261 if (sc->sc_unit == unit) { 262 mutex_exit(&ccd_lock); 263 return sc; 264 } 265 } 266 mutex_exit(&ccd_lock); 267 if ((sc = ccdcreate(unit)) == NULL) 268 return NULL; 269 mutex_enter(&ccd_lock); 270 LIST_INSERT_HEAD(&ccds, sc, sc_link); 271 mutex_exit(&ccd_lock); 272 return sc; 273 } 274 275 static void 276 ccdput(struct ccd_softc *sc) { 277 mutex_enter(&ccd_lock); 278 LIST_REMOVE(sc, sc_link); 279 mutex_exit(&ccd_lock); 280 ccddestroy(sc); 281 } 282 283 /* 284 * Called by main() during pseudo-device attachment. All we need 285 * to do is allocate enough space for devices to be configured later. 286 */ 287 void 288 ccdattach(int num) 289 { 290 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 291 292 /* Initialize the component buffer pool. */ 293 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 294 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 295 } 296 297 static int 298 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 299 struct lwp *l) 300 { 301 struct ccdcinfo *ci = NULL; 302 int ix; 303 struct ccdgeom *ccg = &cs->sc_geom; 304 char *tmppath; 305 int error, path_alloced; 306 uint64_t psize, minsize; 307 unsigned secsize, maxsecsize; 308 309 #ifdef DEBUG 310 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 311 printf("%s: ccdinit\n", cs->sc_xname); 312 #endif 313 314 /* Allocate space for the component info. */ 315 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 316 KM_SLEEP); 317 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 318 319 cs->sc_size = 0; 320 321 /* 322 * Verify that each component piece exists and record 323 * relevant information about it. 324 */ 325 maxsecsize = 0; 326 minsize = 0; 327 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 328 ci = &cs->sc_cinfo[ix]; 329 ci->ci_vp = vpp[ix]; 330 331 /* 332 * Copy in the pathname of the component. 333 */ 334 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 335 error = copyinstr(cpaths[ix], tmppath, 336 MAXPATHLEN, &ci->ci_pathlen); 337 if (ci->ci_pathlen == 0) 338 error = EINVAL; 339 if (error) { 340 #ifdef DEBUG 341 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 342 printf("%s: can't copy path, error = %d\n", 343 cs->sc_xname, error); 344 #endif 345 goto out; 346 } 347 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 348 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 349 path_alloced++; 350 351 /* 352 * XXX: Cache the component's dev_t. 353 */ 354 ci->ci_dev = vpp[ix]->v_rdev; 355 356 /* 357 * Get partition information for the component. 358 */ 359 error = getdisksize(vpp[ix], &psize, &secsize); 360 if (error) { 361 #ifdef DEBUG 362 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 363 printf("%s: %s: disksize failed, error = %d\n", 364 cs->sc_xname, ci->ci_path, error); 365 #endif 366 goto out; 367 } 368 369 /* 370 * Calculate the size, truncating to an interleave 371 * boundary if necessary. 372 */ 373 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 374 if (cs->sc_ileave > 1) 375 psize -= psize % cs->sc_ileave; 376 377 if (psize == 0) { 378 #ifdef DEBUG 379 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 380 printf("%s: %s: size == 0\n", 381 cs->sc_xname, ci->ci_path); 382 #endif 383 error = ENODEV; 384 goto out; 385 } 386 387 if (minsize == 0 || psize < minsize) 388 minsize = psize; 389 ci->ci_size = psize; 390 cs->sc_size += psize; 391 } 392 393 /* 394 * Don't allow the interleave to be smaller than 395 * the biggest component sector. 396 */ 397 if ((cs->sc_ileave > 0) && 398 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 399 #ifdef DEBUG 400 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 401 printf("%s: interleave must be at least %d\n", 402 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 403 #endif 404 error = EINVAL; 405 goto out; 406 } 407 408 /* 409 * If uniform interleave is desired set all sizes to that of 410 * the smallest component. 411 */ 412 if (cs->sc_flags & CCDF_UNIFORM) { 413 for (ci = cs->sc_cinfo; 414 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 415 ci->ci_size = minsize; 416 417 cs->sc_size = cs->sc_nccdisks * minsize; 418 } 419 420 /* 421 * Construct the interleave table. 422 */ 423 ccdinterleave(cs); 424 425 /* 426 * Create pseudo-geometry based on 1MB cylinders. It's 427 * pretty close. 428 */ 429 ccg->ccg_secsize = DEV_BSIZE; 430 ccg->ccg_ntracks = 1; 431 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 432 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 433 434 if (cs->sc_ileave > 0) 435 aprint_normal("%s: Interleaving %d component%s " 436 "(%d block interleave)\n", cs->sc_xname, 437 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 438 cs->sc_ileave); 439 else 440 aprint_normal("%s: Concatenating %d component%s\n", 441 cs->sc_xname, 442 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 443 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 444 ci = &cs->sc_cinfo[ix]; 445 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 446 ci->ci_path, (uintmax_t)ci->ci_size); 447 } 448 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 449 450 /* 451 * Create thread to handle deferred I/O. 452 */ 453 cs->sc_zap = false; 454 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 455 cs, &cs->sc_thread, "%s", cs->sc_xname); 456 if (error) { 457 printf("ccdinit: can't create thread: %d\n", error); 458 goto out; 459 } 460 461 /* 462 * Only now that everything is set up can we enable the device. 463 */ 464 mutex_enter(cs->sc_iolock); 465 cs->sc_flags |= CCDF_INITED; 466 mutex_exit(cs->sc_iolock); 467 kmem_free(tmppath, MAXPATHLEN); 468 return (0); 469 470 out: 471 for (ix = 0; ix < path_alloced; ix++) { 472 kmem_free(cs->sc_cinfo[ix].ci_path, 473 cs->sc_cinfo[ix].ci_pathlen); 474 } 475 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 476 kmem_free(tmppath, MAXPATHLEN); 477 return (error); 478 } 479 480 static void 481 ccdinterleave(struct ccd_softc *cs) 482 { 483 struct ccdcinfo *ci, *smallci; 484 struct ccdiinfo *ii; 485 daddr_t bn, lbn; 486 int ix; 487 u_long size; 488 489 #ifdef DEBUG 490 if (ccddebug & CCDB_INIT) 491 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 492 #endif 493 /* 494 * Allocate an interleave table. 495 * Chances are this is too big, but we don't care. 496 */ 497 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 498 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 499 500 /* 501 * Trivial case: no interleave (actually interleave of disk size). 502 * Each table entry represents a single component in its entirety. 503 */ 504 if (cs->sc_ileave == 0) { 505 bn = 0; 506 ii = cs->sc_itable; 507 508 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 509 /* Allocate space for ii_index. */ 510 ii->ii_indexsz = sizeof(int); 511 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 512 ii->ii_ndisk = 1; 513 ii->ii_startblk = bn; 514 ii->ii_startoff = 0; 515 ii->ii_index[0] = ix; 516 bn += cs->sc_cinfo[ix].ci_size; 517 ii++; 518 } 519 ii->ii_ndisk = 0; 520 #ifdef DEBUG 521 if (ccddebug & CCDB_INIT) 522 printiinfo(cs->sc_itable); 523 #endif 524 return; 525 } 526 527 /* 528 * The following isn't fast or pretty; it doesn't have to be. 529 */ 530 size = 0; 531 bn = lbn = 0; 532 for (ii = cs->sc_itable; ; ii++) { 533 /* Allocate space for ii_index. */ 534 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 535 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 536 537 /* 538 * Locate the smallest of the remaining components 539 */ 540 smallci = NULL; 541 for (ci = cs->sc_cinfo; 542 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 543 if (ci->ci_size > size && 544 (smallci == NULL || 545 ci->ci_size < smallci->ci_size)) 546 smallci = ci; 547 548 /* 549 * Nobody left, all done 550 */ 551 if (smallci == NULL) { 552 ii->ii_ndisk = 0; 553 break; 554 } 555 556 /* 557 * Record starting logical block and component offset 558 */ 559 ii->ii_startblk = bn / cs->sc_ileave; 560 ii->ii_startoff = lbn; 561 562 /* 563 * Determine how many disks take part in this interleave 564 * and record their indices. 565 */ 566 ix = 0; 567 for (ci = cs->sc_cinfo; 568 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 569 if (ci->ci_size >= smallci->ci_size) 570 ii->ii_index[ix++] = ci - cs->sc_cinfo; 571 ii->ii_ndisk = ix; 572 bn += ix * (smallci->ci_size - size); 573 lbn = smallci->ci_size / cs->sc_ileave; 574 size = smallci->ci_size; 575 } 576 #ifdef DEBUG 577 if (ccddebug & CCDB_INIT) 578 printiinfo(cs->sc_itable); 579 #endif 580 } 581 582 /* ARGSUSED */ 583 static int 584 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 585 { 586 int unit = ccdunit(dev); 587 struct ccd_softc *cs; 588 struct disklabel *lp; 589 int error = 0, part, pmask; 590 591 #ifdef DEBUG 592 if (ccddebug & CCDB_FOLLOW) 593 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 594 #endif 595 if ((cs = ccdget(unit)) == NULL) 596 return ENXIO; 597 598 mutex_enter(&cs->sc_dvlock); 599 600 lp = cs->sc_dkdev.dk_label; 601 602 part = DISKPART(dev); 603 pmask = (1 << part); 604 605 /* 606 * If we're initialized, check to see if there are any other 607 * open partitions. If not, then it's safe to update 608 * the in-core disklabel. Only read the disklabel if it is 609 * not already valid. 610 */ 611 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 612 cs->sc_dkdev.dk_openmask == 0) 613 ccdgetdisklabel(dev); 614 615 /* Check that the partition exists. */ 616 if (part != RAW_PART) { 617 if (((cs->sc_flags & CCDF_INITED) == 0) || 618 ((part >= lp->d_npartitions) || 619 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 620 error = ENXIO; 621 goto done; 622 } 623 } 624 625 /* Prevent our unit from being unconfigured while open. */ 626 switch (fmt) { 627 case S_IFCHR: 628 cs->sc_dkdev.dk_copenmask |= pmask; 629 break; 630 631 case S_IFBLK: 632 cs->sc_dkdev.dk_bopenmask |= pmask; 633 break; 634 } 635 cs->sc_dkdev.dk_openmask = 636 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 637 638 done: 639 mutex_exit(&cs->sc_dvlock); 640 return (error); 641 } 642 643 /* ARGSUSED */ 644 static int 645 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 646 { 647 int unit = ccdunit(dev); 648 struct ccd_softc *cs; 649 int part; 650 651 #ifdef DEBUG 652 if (ccddebug & CCDB_FOLLOW) 653 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 654 #endif 655 656 if ((cs = ccdget(unit)) == NULL) 657 return ENXIO; 658 659 mutex_enter(&cs->sc_dvlock); 660 661 part = DISKPART(dev); 662 663 /* ...that much closer to allowing unconfiguration... */ 664 switch (fmt) { 665 case S_IFCHR: 666 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 667 break; 668 669 case S_IFBLK: 670 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 671 break; 672 } 673 cs->sc_dkdev.dk_openmask = 674 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 675 676 if (cs->sc_dkdev.dk_openmask == 0) { 677 if ((cs->sc_flags & CCDF_KLABEL) == 0) 678 cs->sc_flags &= ~CCDF_VLABEL; 679 } 680 681 mutex_exit(&cs->sc_dvlock); 682 return (0); 683 } 684 685 static bool 686 ccdbackoff(struct ccd_softc *cs) 687 { 688 689 /* XXX Arbitrary, should be a uvm call. */ 690 return uvmexp.free < (uvmexp.freemin >> 1) && 691 disk_isbusy(&cs->sc_dkdev); 692 } 693 694 static void 695 ccdthread(void *cookie) 696 { 697 struct ccd_softc *cs; 698 699 cs = cookie; 700 701 #ifdef DEBUG 702 if (ccddebug & CCDB_FOLLOW) 703 printf("ccdthread: hello\n"); 704 #endif 705 706 mutex_enter(cs->sc_iolock); 707 while (__predict_true(!cs->sc_zap)) { 708 if (bufq_peek(cs->sc_bufq) == NULL) { 709 /* Nothing to do. */ 710 cv_wait(&cs->sc_push, cs->sc_iolock); 711 continue; 712 } 713 if (ccdbackoff(cs)) { 714 /* Wait for memory to become available. */ 715 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 716 continue; 717 } 718 #ifdef DEBUG 719 if (ccddebug & CCDB_FOLLOW) 720 printf("ccdthread: dispatching I/O\n"); 721 #endif 722 ccdstart(cs); 723 mutex_enter(cs->sc_iolock); 724 } 725 cs->sc_thread = NULL; 726 mutex_exit(cs->sc_iolock); 727 #ifdef DEBUG 728 if (ccddebug & CCDB_FOLLOW) 729 printf("ccdthread: goodbye\n"); 730 #endif 731 kthread_exit(0); 732 } 733 734 static void 735 ccdstrategy(struct buf *bp) 736 { 737 int unit = ccdunit(bp->b_dev); 738 struct ccd_softc *cs; 739 if ((cs = ccdget(unit)) == NULL) 740 return; 741 742 /* Must be open or reading label. */ 743 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 744 (cs->sc_flags & CCDF_RLABEL) != 0); 745 746 mutex_enter(cs->sc_iolock); 747 /* Synchronize with device init/uninit. */ 748 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 749 mutex_exit(cs->sc_iolock); 750 #ifdef DEBUG 751 if (ccddebug & CCDB_FOLLOW) 752 printf("ccdstrategy: unit %d: not inited\n", unit); 753 #endif 754 bp->b_error = ENXIO; 755 bp->b_resid = bp->b_bcount; 756 biodone(bp); 757 return; 758 } 759 760 /* Defer to thread if system is low on memory. */ 761 bufq_put(cs->sc_bufq, bp); 762 if (__predict_false(ccdbackoff(cs))) { 763 mutex_exit(cs->sc_iolock); 764 #ifdef DEBUG 765 if (ccddebug & CCDB_FOLLOW) 766 printf("ccdstrategy: holding off on I/O\n"); 767 #endif 768 return; 769 } 770 ccdstart(cs); 771 } 772 773 static void 774 ccdstart(struct ccd_softc *cs) 775 { 776 daddr_t blkno; 777 int wlabel; 778 struct disklabel *lp; 779 long bcount, rcount; 780 struct ccdbuf *cbp; 781 char *addr; 782 daddr_t bn; 783 vnode_t *vp; 784 buf_t *bp; 785 786 KASSERT(mutex_owned(cs->sc_iolock)); 787 788 disk_busy(&cs->sc_dkdev); 789 bp = bufq_get(cs->sc_bufq); 790 KASSERT(bp != NULL); 791 792 #ifdef DEBUG 793 if (ccddebug & CCDB_FOLLOW) 794 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 795 #endif 796 797 /* If it's a nil transfer, wake up the top half now. */ 798 if (bp->b_bcount == 0) 799 goto done; 800 801 lp = cs->sc_dkdev.dk_label; 802 803 /* 804 * Do bounds checking and adjust transfer. If there's an 805 * error, the bounds check will flag that for us. Convert 806 * the partition relative block number to an absolute. 807 */ 808 blkno = bp->b_blkno; 809 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 810 if (DISKPART(bp->b_dev) != RAW_PART) { 811 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 812 goto done; 813 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 814 } 815 mutex_exit(cs->sc_iolock); 816 bp->b_rawblkno = blkno; 817 818 /* Allocate the component buffers and start I/O! */ 819 bp->b_resid = bp->b_bcount; 820 bn = bp->b_rawblkno; 821 addr = bp->b_data; 822 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 823 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 824 rcount = cbp->cb_buf.b_bcount; 825 bn += btodb(rcount); 826 addr += rcount; 827 vp = cbp->cb_buf.b_vp; 828 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 829 mutex_enter(vp->v_interlock); 830 vp->v_numoutput++; 831 mutex_exit(vp->v_interlock); 832 } 833 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 834 } 835 return; 836 837 done: 838 disk_unbusy(&cs->sc_dkdev, 0, 0); 839 cv_broadcast(&cs->sc_stop); 840 cv_broadcast(&cs->sc_push); 841 mutex_exit(cs->sc_iolock); 842 bp->b_resid = bp->b_bcount; 843 biodone(bp); 844 } 845 846 /* 847 * Build a component buffer header. 848 */ 849 static struct ccdbuf * 850 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 851 long bcount) 852 { 853 struct ccdcinfo *ci; 854 struct ccdbuf *cbp; 855 daddr_t cbn, cboff; 856 u_int64_t cbc; 857 int ccdisk; 858 859 #ifdef DEBUG 860 if (ccddebug & CCDB_IO) 861 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 862 cs, bp, bn, addr, bcount); 863 #endif 864 /* 865 * Determine which component bn falls in. 866 */ 867 cbn = bn; 868 cboff = 0; 869 870 /* 871 * Serially concatenated 872 */ 873 if (cs->sc_ileave == 0) { 874 daddr_t sblk; 875 876 sblk = 0; 877 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 878 cbn >= sblk + ci->ci_size; 879 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 880 sblk += ci->ci_size; 881 cbn -= sblk; 882 } 883 /* 884 * Interleaved 885 */ 886 else { 887 struct ccdiinfo *ii; 888 int off; 889 890 cboff = cbn % cs->sc_ileave; 891 cbn /= cs->sc_ileave; 892 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 893 if (ii->ii_startblk > cbn) 894 break; 895 ii--; 896 off = cbn - ii->ii_startblk; 897 if (ii->ii_ndisk == 1) { 898 ccdisk = ii->ii_index[0]; 899 cbn = ii->ii_startoff + off; 900 } else { 901 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 902 cbn = ii->ii_startoff + off / ii->ii_ndisk; 903 } 904 cbn *= cs->sc_ileave; 905 ci = &cs->sc_cinfo[ccdisk]; 906 } 907 908 /* 909 * Fill in the component buf structure. 910 */ 911 cbp = CCD_GETBUF(); 912 KASSERT(cbp != NULL); 913 buf_init(&cbp->cb_buf); 914 cbp->cb_buf.b_flags = bp->b_flags; 915 cbp->cb_buf.b_oflags = bp->b_oflags; 916 cbp->cb_buf.b_cflags = bp->b_cflags; 917 cbp->cb_buf.b_iodone = ccdiodone; 918 cbp->cb_buf.b_proc = bp->b_proc; 919 cbp->cb_buf.b_dev = ci->ci_dev; 920 cbp->cb_buf.b_blkno = cbn + cboff; 921 cbp->cb_buf.b_data = addr; 922 cbp->cb_buf.b_vp = ci->ci_vp; 923 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 924 if (cs->sc_ileave == 0) 925 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 926 else 927 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 928 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 929 930 /* 931 * context for ccdiodone 932 */ 933 cbp->cb_obp = bp; 934 cbp->cb_sc = cs; 935 cbp->cb_comp = ccdisk; 936 937 BIO_COPYPRIO(&cbp->cb_buf, bp); 938 939 #ifdef DEBUG 940 if (ccddebug & CCDB_IO) 941 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 942 " bcnt %d\n", 943 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 944 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 945 cbp->cb_buf.b_bcount); 946 #endif 947 948 return (cbp); 949 } 950 951 /* 952 * Called at interrupt time. 953 * Mark the component as done and if all components are done, 954 * take a ccd interrupt. 955 */ 956 static void 957 ccdiodone(struct buf *vbp) 958 { 959 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 960 struct buf *bp = cbp->cb_obp; 961 struct ccd_softc *cs = cbp->cb_sc; 962 int count; 963 964 #ifdef DEBUG 965 if (ccddebug & CCDB_FOLLOW) 966 printf("ccdiodone(%p)\n", cbp); 967 if (ccddebug & CCDB_IO) { 968 printf("ccdiodone: bp %p bcount %d resid %d\n", 969 bp, bp->b_bcount, bp->b_resid); 970 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 971 " bcnt %d\n", 972 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 973 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 974 cbp->cb_buf.b_bcount); 975 } 976 #endif 977 978 if (cbp->cb_buf.b_error != 0) { 979 bp->b_error = cbp->cb_buf.b_error; 980 printf("%s: error %d on component %d\n", 981 cs->sc_xname, bp->b_error, cbp->cb_comp); 982 } 983 count = cbp->cb_buf.b_bcount; 984 buf_destroy(&cbp->cb_buf); 985 CCD_PUTBUF(cbp); 986 987 /* 988 * If all done, "interrupt". 989 */ 990 mutex_enter(cs->sc_iolock); 991 bp->b_resid -= count; 992 if (bp->b_resid < 0) 993 panic("ccdiodone: count"); 994 if (bp->b_resid == 0) { 995 /* 996 * Request is done for better or worse, wakeup the top half. 997 */ 998 if (bp->b_error != 0) 999 bp->b_resid = bp->b_bcount; 1000 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1001 (bp->b_flags & B_READ)); 1002 if (!disk_isbusy(&cs->sc_dkdev)) { 1003 if (bufq_peek(cs->sc_bufq) != NULL) { 1004 cv_broadcast(&cs->sc_push); 1005 } 1006 cv_broadcast(&cs->sc_stop); 1007 } 1008 mutex_exit(cs->sc_iolock); 1009 biodone(bp); 1010 } else 1011 mutex_exit(cs->sc_iolock); 1012 } 1013 1014 /* ARGSUSED */ 1015 static int 1016 ccdread(dev_t dev, struct uio *uio, int flags) 1017 { 1018 int unit = ccdunit(dev); 1019 struct ccd_softc *cs; 1020 1021 #ifdef DEBUG 1022 if (ccddebug & CCDB_FOLLOW) 1023 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1024 #endif 1025 if ((cs = ccdget(unit)) == NULL) 1026 return 0; 1027 1028 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1029 if ((cs->sc_flags & CCDF_INITED) == 0) 1030 return (ENXIO); 1031 1032 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1033 } 1034 1035 /* ARGSUSED */ 1036 static int 1037 ccdwrite(dev_t dev, struct uio *uio, int flags) 1038 { 1039 int unit = ccdunit(dev); 1040 struct ccd_softc *cs; 1041 1042 #ifdef DEBUG 1043 if (ccddebug & CCDB_FOLLOW) 1044 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1045 #endif 1046 if ((cs = ccdget(unit)) == NULL) 1047 return ENOENT; 1048 1049 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1050 if ((cs->sc_flags & CCDF_INITED) == 0) 1051 return (ENXIO); 1052 1053 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1054 } 1055 1056 static int 1057 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1058 { 1059 int unit = ccdunit(dev); 1060 int i, j, lookedup = 0, error = 0; 1061 int part, pmask; 1062 struct ccd_softc *cs; 1063 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1064 kauth_cred_t uc; 1065 char **cpp; 1066 struct pathbuf *pb; 1067 struct vnode **vpp; 1068 #ifdef __HAVE_OLD_DISKLABEL 1069 struct disklabel newlabel; 1070 #endif 1071 1072 if ((cs = ccdget(unit)) == NULL) 1073 return ENOENT; 1074 uc = kauth_cred_get(); 1075 1076 /* 1077 * Compat code must not be called if on a platform where 1078 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will 1079 * be the same as CCDIOCSET_60 1080 */ 1081 #if defined(COMPAT_60) && !defined(_LP64) 1082 switch (cmd) { 1083 case CCDIOCSET_60: { 1084 struct ccd_ioctl ccionew; 1085 struct ccd_ioctl_60 *ccio60 = 1086 (struct ccd_ioctl_60 *)data; 1087 ccionew.ccio_disks = ccio->ccio_disks; 1088 ccionew.ccio_ndisks = ccio->ccio_ndisks; 1089 ccionew.ccio_ileave = ccio->ccio_ileave; 1090 ccionew.ccio_flags = ccio->ccio_flags; 1091 ccionew.ccio_unit = ccio->ccio_unit; 1092 error = ccdioctl(dev, CCDIOCSET, &ccionew, flag, l); 1093 if (!error) { 1094 /* Copy data back, adjust types if necessary */ 1095 ccio60->ccio_disks = ccionew.ccio_disks; 1096 ccio60->ccio_ndisks = ccionew.ccio_ndisks; 1097 ccio60->ccio_ileave = ccionew.ccio_ileave; 1098 ccio60->ccio_flags = ccionew.ccio_flags; 1099 ccio60->ccio_unit = ccionew.ccio_unit; 1100 ccio60->ccio_size = (size_t)ccionew.ccio_size; 1101 } 1102 return error; 1103 } 1104 break; 1105 1106 case CCDIOCCLR_60: 1107 /* 1108 * ccio_size member not used, so existing struct OK 1109 * drop through to existing non-compat version 1110 */ 1111 cmd = CCDIOCCLR; 1112 break; 1113 } 1114 #endif /* COMPAT_60 && !_LP64*/ 1115 1116 /* Must be open for writes for these commands... */ 1117 switch (cmd) { 1118 case CCDIOCSET: 1119 case CCDIOCCLR: 1120 case DIOCSDINFO: 1121 case DIOCWDINFO: 1122 #ifdef __HAVE_OLD_DISKLABEL 1123 case ODIOCSDINFO: 1124 case ODIOCWDINFO: 1125 #endif 1126 case DIOCKLABEL: 1127 case DIOCWLABEL: 1128 if ((flag & FWRITE) == 0) 1129 return (EBADF); 1130 } 1131 1132 mutex_enter(&cs->sc_dvlock); 1133 1134 /* Must be initialized for these... */ 1135 switch (cmd) { 1136 case CCDIOCCLR: 1137 case DIOCGDINFO: 1138 case DIOCCACHESYNC: 1139 case DIOCSDINFO: 1140 case DIOCWDINFO: 1141 case DIOCGPART: 1142 case DIOCWLABEL: 1143 case DIOCKLABEL: 1144 case DIOCGDEFLABEL: 1145 #ifdef __HAVE_OLD_DISKLABEL 1146 case ODIOCGDINFO: 1147 case ODIOCSDINFO: 1148 case ODIOCWDINFO: 1149 case ODIOCGDEFLABEL: 1150 #endif 1151 if ((cs->sc_flags & CCDF_INITED) == 0) { 1152 error = ENXIO; 1153 goto out; 1154 } 1155 } 1156 1157 switch (cmd) { 1158 case CCDIOCSET: 1159 if (cs->sc_flags & CCDF_INITED) { 1160 error = EBUSY; 1161 goto out; 1162 } 1163 1164 /* Validate the flags. */ 1165 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1166 error = EINVAL; 1167 goto out; 1168 } 1169 1170 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1171 ccio->ccio_ndisks == 0) { 1172 error = EINVAL; 1173 goto out; 1174 } 1175 1176 /* Fill in some important bits. */ 1177 cs->sc_ileave = ccio->ccio_ileave; 1178 cs->sc_nccdisks = ccio->ccio_ndisks; 1179 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1180 1181 /* 1182 * Allocate space for and copy in the array of 1183 * component pathnames and device numbers. 1184 */ 1185 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1186 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1187 error = copyin(ccio->ccio_disks, cpp, 1188 ccio->ccio_ndisks * sizeof(*cpp)); 1189 if (error) { 1190 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1191 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1192 goto out; 1193 } 1194 1195 #ifdef DEBUG 1196 if (ccddebug & CCDB_INIT) 1197 for (i = 0; i < ccio->ccio_ndisks; ++i) 1198 printf("ccdioctl: component %d: %p\n", 1199 i, cpp[i]); 1200 #endif 1201 1202 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1203 #ifdef DEBUG 1204 if (ccddebug & CCDB_INIT) 1205 printf("ccdioctl: lookedup = %d\n", lookedup); 1206 #endif 1207 error = pathbuf_copyin(cpp[i], &pb); 1208 if (error == 0) { 1209 error = dk_lookup(pb, l, &vpp[i]); 1210 } 1211 pathbuf_destroy(pb); 1212 if (error != 0) { 1213 for (j = 0; j < lookedup; ++j) 1214 (void)vn_close(vpp[j], FREAD|FWRITE, 1215 uc); 1216 kmem_free(vpp, ccio->ccio_ndisks * 1217 sizeof(*vpp)); 1218 kmem_free(cpp, ccio->ccio_ndisks * 1219 sizeof(*cpp)); 1220 goto out; 1221 } 1222 ++lookedup; 1223 } 1224 1225 /* Attach the disk. */ 1226 disk_attach(&cs->sc_dkdev); 1227 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1228 1229 /* 1230 * Initialize the ccd. Fills in the softc for us. 1231 */ 1232 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1233 for (j = 0; j < lookedup; ++j) 1234 (void)vn_close(vpp[j], FREAD|FWRITE, 1235 uc); 1236 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1237 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1238 disk_detach(&cs->sc_dkdev); 1239 bufq_free(cs->sc_bufq); 1240 goto out; 1241 } 1242 1243 /* We can free the temporary variables now. */ 1244 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1245 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1246 1247 /* 1248 * The ccd has been successfully initialized, so 1249 * we can place it into the array. Don't try to 1250 * read the disklabel until the disk has been attached, 1251 * because space for the disklabel is allocated 1252 * in disk_attach(); 1253 */ 1254 ccio->ccio_unit = unit; 1255 ccio->ccio_size = cs->sc_size; 1256 1257 /* Try and read the disklabel. */ 1258 ccdgetdisklabel(dev); 1259 break; 1260 1261 case CCDIOCCLR: 1262 /* 1263 * Don't unconfigure if any other partitions are open 1264 * or if both the character and block flavors of this 1265 * partition are open. 1266 */ 1267 part = DISKPART(dev); 1268 pmask = (1 << part); 1269 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1270 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1271 (cs->sc_dkdev.dk_copenmask & pmask))) { 1272 error = EBUSY; 1273 goto out; 1274 } 1275 1276 /* Stop new I/O, wait for in-flight I/O to complete. */ 1277 mutex_enter(cs->sc_iolock); 1278 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1279 cs->sc_zap = true; 1280 while (disk_isbusy(&cs->sc_dkdev) || 1281 bufq_peek(cs->sc_bufq) != NULL || 1282 cs->sc_thread != NULL) { 1283 cv_broadcast(&cs->sc_push); 1284 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1285 } 1286 mutex_exit(cs->sc_iolock); 1287 1288 /* 1289 * Free ccd_softc information and clear entry. 1290 */ 1291 1292 /* Close the components and free their pathnames. */ 1293 for (i = 0; i < cs->sc_nccdisks; ++i) { 1294 /* 1295 * XXX: this close could potentially fail and 1296 * cause Bad Things. Maybe we need to force 1297 * the close to happen? 1298 */ 1299 #ifdef DEBUG 1300 if (ccddebug & CCDB_VNODE) 1301 vprint("CCDIOCCLR: vnode info", 1302 cs->sc_cinfo[i].ci_vp); 1303 #endif 1304 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1305 uc); 1306 kmem_free(cs->sc_cinfo[i].ci_path, 1307 cs->sc_cinfo[i].ci_pathlen); 1308 } 1309 1310 /* Free interleave index. */ 1311 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1312 kmem_free(cs->sc_itable[i].ii_index, 1313 cs->sc_itable[i].ii_indexsz); 1314 } 1315 1316 /* Free component info and interleave table. */ 1317 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1318 sizeof(struct ccdcinfo)); 1319 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1320 sizeof(struct ccdiinfo)); 1321 1322 aprint_normal("%s: detached\n", cs->sc_xname); 1323 1324 /* Detach the disk. */ 1325 disk_detach(&cs->sc_dkdev); 1326 bufq_free(cs->sc_bufq); 1327 ccdput(cs); 1328 /* Don't break, otherwise cs is read again. */ 1329 return 0; 1330 1331 case DIOCGDINFO: 1332 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label); 1333 break; 1334 1335 #ifdef __HAVE_OLD_DISKLABEL 1336 case ODIOCGDINFO: 1337 newlabel = *(cs->sc_dkdev.dk_label); 1338 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1339 return ENOTTY; 1340 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1341 break; 1342 #endif 1343 1344 case DIOCGPART: 1345 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label; 1346 ((struct partinfo *)data)->part = 1347 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1348 break; 1349 1350 case DIOCCACHESYNC: 1351 /* 1352 * XXX Do we really need to care about having a writable 1353 * file descriptor here? 1354 */ 1355 if ((flag & FWRITE) == 0) 1356 return (EBADF); 1357 1358 /* 1359 * We pass this call down to all components and report 1360 * the first error we encounter. 1361 */ 1362 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1363 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1364 flag, uc); 1365 if (j != 0 && error == 0) 1366 error = j; 1367 } 1368 break; 1369 1370 case DIOCWDINFO: 1371 case DIOCSDINFO: 1372 #ifdef __HAVE_OLD_DISKLABEL 1373 case ODIOCWDINFO: 1374 case ODIOCSDINFO: 1375 #endif 1376 { 1377 struct disklabel *lp; 1378 #ifdef __HAVE_OLD_DISKLABEL 1379 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1380 memset(&newlabel, 0, sizeof newlabel); 1381 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1382 lp = &newlabel; 1383 } else 1384 #endif 1385 lp = (struct disklabel *)data; 1386 1387 cs->sc_flags |= CCDF_LABELLING; 1388 1389 error = setdisklabel(cs->sc_dkdev.dk_label, 1390 lp, 0, cs->sc_dkdev.dk_cpulabel); 1391 if (error == 0) { 1392 if (cmd == DIOCWDINFO 1393 #ifdef __HAVE_OLD_DISKLABEL 1394 || cmd == ODIOCWDINFO 1395 #endif 1396 ) 1397 error = writedisklabel(CCDLABELDEV(dev), 1398 ccdstrategy, cs->sc_dkdev.dk_label, 1399 cs->sc_dkdev.dk_cpulabel); 1400 } 1401 1402 cs->sc_flags &= ~CCDF_LABELLING; 1403 break; 1404 } 1405 1406 case DIOCKLABEL: 1407 if (*(int *)data != 0) 1408 cs->sc_flags |= CCDF_KLABEL; 1409 else 1410 cs->sc_flags &= ~CCDF_KLABEL; 1411 break; 1412 1413 case DIOCWLABEL: 1414 if (*(int *)data != 0) 1415 cs->sc_flags |= CCDF_WLABEL; 1416 else 1417 cs->sc_flags &= ~CCDF_WLABEL; 1418 break; 1419 1420 case DIOCGDEFLABEL: 1421 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1422 break; 1423 1424 #ifdef __HAVE_OLD_DISKLABEL 1425 case ODIOCGDEFLABEL: 1426 ccdgetdefaultlabel(cs, &newlabel); 1427 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1428 return ENOTTY; 1429 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1430 break; 1431 #endif 1432 1433 default: 1434 error = ENOTTY; 1435 } 1436 1437 out: 1438 mutex_exit(&cs->sc_dvlock); 1439 return (error); 1440 } 1441 1442 static int 1443 ccdsize(dev_t dev) 1444 { 1445 struct ccd_softc *cs; 1446 struct disklabel *lp; 1447 int part, unit, omask, size; 1448 1449 unit = ccdunit(dev); 1450 if ((cs = ccdget(unit)) == NULL) 1451 return -1; 1452 1453 if ((cs->sc_flags & CCDF_INITED) == 0) 1454 return (-1); 1455 1456 part = DISKPART(dev); 1457 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1458 lp = cs->sc_dkdev.dk_label; 1459 1460 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1461 return (-1); 1462 1463 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1464 size = -1; 1465 else 1466 size = lp->d_partitions[part].p_size * 1467 (lp->d_secsize / DEV_BSIZE); 1468 1469 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1470 return (-1); 1471 1472 return (size); 1473 } 1474 1475 static void 1476 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1477 { 1478 struct ccdgeom *ccg = &cs->sc_geom; 1479 1480 memset(lp, 0, sizeof(*lp)); 1481 1482 lp->d_secperunit = cs->sc_size; 1483 lp->d_secsize = ccg->ccg_secsize; 1484 lp->d_nsectors = ccg->ccg_nsectors; 1485 lp->d_ntracks = ccg->ccg_ntracks; 1486 lp->d_ncylinders = ccg->ccg_ncylinders; 1487 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1488 1489 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1490 lp->d_type = DTYPE_CCD; 1491 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1492 lp->d_rpm = 3600; 1493 lp->d_interleave = 1; 1494 lp->d_flags = 0; 1495 1496 lp->d_partitions[RAW_PART].p_offset = 0; 1497 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1498 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1499 lp->d_npartitions = RAW_PART + 1; 1500 1501 lp->d_magic = DISKMAGIC; 1502 lp->d_magic2 = DISKMAGIC; 1503 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1504 } 1505 1506 /* 1507 * Read the disklabel from the ccd. If one is not present, fake one 1508 * up. 1509 */ 1510 static void 1511 ccdgetdisklabel(dev_t dev) 1512 { 1513 int unit = ccdunit(dev); 1514 struct ccd_softc *cs; 1515 const char *errstring; 1516 struct disklabel *lp; 1517 struct cpu_disklabel *clp; 1518 1519 if ((cs = ccdget(unit)) == NULL) 1520 return; 1521 lp = cs->sc_dkdev.dk_label; 1522 clp = cs->sc_dkdev.dk_cpulabel; 1523 KASSERT(mutex_owned(&cs->sc_dvlock)); 1524 1525 memset(clp, 0, sizeof(*clp)); 1526 1527 ccdgetdefaultlabel(cs, lp); 1528 1529 /* 1530 * Call the generic disklabel extraction routine. 1531 */ 1532 cs->sc_flags |= CCDF_RLABEL; 1533 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1534 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1535 else 1536 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1537 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1538 if (errstring) 1539 ccdmakedisklabel(cs); 1540 else { 1541 int i; 1542 struct partition *pp; 1543 1544 /* 1545 * Sanity check whether the found disklabel is valid. 1546 * 1547 * This is necessary since total size of ccd may vary 1548 * when an interleave is changed even though exactly 1549 * same componets are used, and old disklabel may used 1550 * if that is found. 1551 */ 1552 if (lp->d_secperunit != cs->sc_size) 1553 printf("WARNING: %s: " 1554 "total sector size in disklabel (%ju) != " 1555 "the size of ccd (%ju)\n", cs->sc_xname, 1556 (uintmax_t)lp->d_secperunit, 1557 (uintmax_t)cs->sc_size); 1558 for (i = 0; i < lp->d_npartitions; i++) { 1559 pp = &lp->d_partitions[i]; 1560 if (pp->p_offset + pp->p_size > cs->sc_size) 1561 printf("WARNING: %s: end of partition `%c' " 1562 "exceeds the size of ccd (%ju)\n", 1563 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1564 } 1565 } 1566 1567 #ifdef DEBUG 1568 /* It's actually extremely common to have unlabeled ccds. */ 1569 if (ccddebug & CCDB_LABEL) 1570 if (errstring != NULL) 1571 printf("%s: %s\n", cs->sc_xname, errstring); 1572 #endif 1573 1574 /* In-core label now valid. */ 1575 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1576 } 1577 1578 /* 1579 * Take care of things one might want to take care of in the event 1580 * that a disklabel isn't present. 1581 */ 1582 static void 1583 ccdmakedisklabel(struct ccd_softc *cs) 1584 { 1585 struct disklabel *lp = cs->sc_dkdev.dk_label; 1586 1587 /* 1588 * For historical reasons, if there's no disklabel present 1589 * the raw partition must be marked FS_BSDFFS. 1590 */ 1591 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1592 1593 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1594 1595 lp->d_checksum = dkcksum(lp); 1596 } 1597 1598 #ifdef DEBUG 1599 static void 1600 printiinfo(struct ccdiinfo *ii) 1601 { 1602 int ix, i; 1603 1604 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1605 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1606 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1607 for (i = 0; i < ii->ii_ndisk; i++) 1608 printf(" %d", ii->ii_index[i]); 1609 printf("\n"); 1610 } 1611 } 1612 #endif 1613 1614 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1615 1616 static int 1617 ccd_modcmd(modcmd_t cmd, void *arg) 1618 { 1619 int error = 0; 1620 #ifdef _MODULE 1621 int bmajor = -1, cmajor = -1; 1622 #endif 1623 1624 1625 switch (cmd) { 1626 case MODULE_CMD_INIT: 1627 #ifdef _MODULE 1628 ccdattach(4); 1629 1630 return devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1631 &ccd_cdevsw, &cmajor); 1632 #endif 1633 break; 1634 1635 case MODULE_CMD_FINI: 1636 #ifdef _MODULE 1637 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1638 #endif 1639 break; 1640 1641 case MODULE_CMD_STAT: 1642 return ENOTTY; 1643 1644 default: 1645 return ENOTTY; 1646 } 1647 1648 return error; 1649 } 1650 1651 static int 1652 ccd_units_sysctl(SYSCTLFN_ARGS) 1653 { 1654 struct sysctlnode node; 1655 struct ccd_softc *sc; 1656 int error, i, nccd, *units; 1657 size_t size; 1658 1659 nccd = 0; 1660 mutex_enter(&ccd_lock); 1661 LIST_FOREACH(sc, &ccds, sc_link) 1662 nccd++; 1663 mutex_exit(&ccd_lock); 1664 1665 if (nccd != 0) { 1666 size = nccd * sizeof(*units); 1667 units = kmem_zalloc(size, KM_SLEEP); 1668 if (units == NULL) 1669 return ENOMEM; 1670 1671 i = 0; 1672 mutex_enter(&ccd_lock); 1673 LIST_FOREACH(sc, &ccds, sc_link) { 1674 if (i >= nccd) 1675 break; 1676 units[i] = sc->sc_unit; 1677 } 1678 mutex_exit(&ccd_lock); 1679 } else { 1680 units = NULL; 1681 size = 0; 1682 } 1683 1684 node = *rnode; 1685 node.sysctl_data = units; 1686 node.sysctl_size = size; 1687 1688 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1689 if (units) 1690 kmem_free(units, size); 1691 return error; 1692 } 1693 1694 static int 1695 ccd_info_sysctl(SYSCTLFN_ARGS) 1696 { 1697 struct sysctlnode node; 1698 struct ccddiskinfo ccd; 1699 struct ccd_softc *sc; 1700 int unit; 1701 1702 if (newp == NULL || newlen != sizeof(int)) 1703 return EINVAL; 1704 1705 unit = *(const int *)newp; 1706 newp = NULL; 1707 newlen = 0; 1708 ccd.ccd_ndisks = ~0; 1709 mutex_enter(&ccd_lock); 1710 LIST_FOREACH(sc, &ccds, sc_link) { 1711 if (sc->sc_unit == unit) { 1712 ccd.ccd_ileave = sc->sc_ileave; 1713 ccd.ccd_size = sc->sc_size; 1714 ccd.ccd_ndisks = sc->sc_nccdisks; 1715 ccd.ccd_flags = sc->sc_flags; 1716 break; 1717 } 1718 } 1719 mutex_exit(&ccd_lock); 1720 1721 if (ccd.ccd_ndisks == ~0) 1722 return ENOENT; 1723 1724 node = *rnode; 1725 node.sysctl_data = &ccd; 1726 node.sysctl_size = sizeof(ccd); 1727 1728 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1729 } 1730 1731 static int 1732 ccd_components_sysctl(SYSCTLFN_ARGS) 1733 { 1734 struct sysctlnode node; 1735 int error, unit; 1736 size_t size; 1737 char *names, *p, *ep; 1738 struct ccd_softc *sc; 1739 1740 if (newp == NULL || newlen != sizeof(int)) 1741 return EINVAL; 1742 1743 size = 0; 1744 unit = *(const int *)newp; 1745 newp = NULL; 1746 newlen = 0; 1747 mutex_enter(&ccd_lock); 1748 LIST_FOREACH(sc, &ccds, sc_link) 1749 if (sc->sc_unit == unit) { 1750 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1751 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1752 break; 1753 } 1754 mutex_exit(&ccd_lock); 1755 1756 if (size == 0) 1757 return ENOENT; 1758 names = kmem_zalloc(size, KM_SLEEP); 1759 if (names == NULL) 1760 return ENOMEM; 1761 1762 p = names; 1763 ep = names + size; 1764 mutex_enter(&ccd_lock); 1765 LIST_FOREACH(sc, &ccds, sc_link) 1766 if (sc->sc_unit == unit) { 1767 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1768 char *d = sc->sc_cinfo[i].ci_path; 1769 while (p < ep && (*p++ = *d++) != '\0') 1770 continue; 1771 } 1772 break; 1773 } 1774 mutex_exit(&ccd_lock); 1775 1776 node = *rnode; 1777 node.sysctl_data = names; 1778 node.sysctl_size = ep - names; 1779 1780 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1781 kmem_free(names, size); 1782 return error; 1783 } 1784 1785 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1786 { 1787 const struct sysctlnode *node = NULL; 1788 1789 sysctl_createv(clog, 0, NULL, &node, 1790 CTLFLAG_PERMANENT, 1791 CTLTYPE_NODE, "ccd", 1792 SYSCTL_DESCR("ConCatenated Disk state"), 1793 NULL, 0, NULL, 0, 1794 CTL_KERN, CTL_CREATE, CTL_EOL); 1795 1796 if (node == NULL) 1797 return; 1798 1799 sysctl_createv(clog, 0, &node, NULL, 1800 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1801 CTLTYPE_STRUCT, "units", 1802 SYSCTL_DESCR("List of ccd unit numbers"), 1803 ccd_units_sysctl, 0, NULL, 0, 1804 CTL_CREATE, CTL_EOL); 1805 sysctl_createv(clog, 0, &node, NULL, 1806 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1807 CTLTYPE_STRUCT, "info", 1808 SYSCTL_DESCR("Information about a CCD unit"), 1809 ccd_info_sysctl, 0, NULL, 0, 1810 CTL_CREATE, CTL_EOL); 1811 sysctl_createv(clog, 0, &node, NULL, 1812 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1813 CTLTYPE_STRUCT, "components", 1814 SYSCTL_DESCR("Information about CCD components"), 1815 ccd_components_sysctl, 0, NULL, 0, 1816 CTL_CREATE, CTL_EOL); 1817 } 1818