1 /* $NetBSD: ccd.c,v 1.176 2018/03/18 20:33:52 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.176 2018/03/18 20:33:52 christos Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 119 #include <uvm/uvm_extern.h> 120 121 #include <dev/ccdvar.h> 122 #include <dev/dkvar.h> 123 124 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 125 126 #include "ioconf.h" 127 128 #if defined(CCDDEBUG) && !defined(DEBUG) 129 #define DEBUG 130 #endif 131 132 #ifdef DEBUG 133 #define CCDB_FOLLOW 0x01 134 #define CCDB_INIT 0x02 135 #define CCDB_IO 0x04 136 #define CCDB_LABEL 0x08 137 #define CCDB_VNODE 0x10 138 int ccddebug = 0x00; 139 #endif 140 141 #define ccdunit(x) DISKUNIT(x) 142 143 struct ccdbuf { 144 struct buf cb_buf; /* new I/O buf */ 145 struct buf *cb_obp; /* ptr. to original I/O buf */ 146 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 147 int cb_comp; /* target component */ 148 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 149 }; 150 151 /* component buffer pool */ 152 static pool_cache_t ccd_cache; 153 154 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 155 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 156 157 #define CCDLABELDEV(dev) \ 158 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 159 160 /* called by main() at boot time */ 161 void ccddetach(void); 162 163 /* called by biodone() at interrupt time */ 164 static void ccdiodone(struct buf *); 165 166 static void ccdinterleave(struct ccd_softc *); 167 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 168 struct lwp *); 169 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 170 daddr_t, void *, long); 171 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 172 static void ccdgetdisklabel(dev_t); 173 static void ccdmakedisklabel(struct ccd_softc *); 174 static void ccdstart(struct ccd_softc *); 175 static void ccdthread(void *); 176 177 static dev_type_open(ccdopen); 178 static dev_type_close(ccdclose); 179 static dev_type_read(ccdread); 180 static dev_type_write(ccdwrite); 181 static dev_type_ioctl(ccdioctl); 182 static dev_type_strategy(ccdstrategy); 183 static dev_type_size(ccdsize); 184 185 const struct bdevsw ccd_bdevsw = { 186 .d_open = ccdopen, 187 .d_close = ccdclose, 188 .d_strategy = ccdstrategy, 189 .d_ioctl = ccdioctl, 190 .d_dump = nodump, 191 .d_psize = ccdsize, 192 .d_discard = nodiscard, 193 .d_flag = D_DISK | D_MPSAFE 194 }; 195 196 const struct cdevsw ccd_cdevsw = { 197 .d_open = ccdopen, 198 .d_close = ccdclose, 199 .d_read = ccdread, 200 .d_write = ccdwrite, 201 .d_ioctl = ccdioctl, 202 .d_stop = nostop, 203 .d_tty = notty, 204 .d_poll = nopoll, 205 .d_mmap = nommap, 206 .d_kqfilter = nokqfilter, 207 .d_discard = nodiscard, 208 .d_flag = D_DISK | D_MPSAFE 209 }; 210 211 #ifdef DEBUG 212 static void printiinfo(struct ccdiinfo *); 213 #endif 214 215 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 216 static kmutex_t ccd_lock; 217 218 #ifdef _MODULE 219 static struct sysctllog *ccd_clog; 220 #endif 221 222 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 223 224 static struct ccd_softc * 225 ccdcreate(int unit) { 226 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 227 228 /* Initialize per-softc structures. */ 229 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 230 sc->sc_unit = unit; 231 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 232 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 233 cv_init(&sc->sc_stop, "ccdstop"); 234 cv_init(&sc->sc_push, "ccdthr"); 235 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 236 return sc; 237 } 238 239 static void 240 ccddestroy(struct ccd_softc *sc) { 241 mutex_obj_free(sc->sc_iolock); 242 mutex_exit(&sc->sc_dvlock); 243 mutex_destroy(&sc->sc_dvlock); 244 cv_destroy(&sc->sc_stop); 245 cv_destroy(&sc->sc_push); 246 disk_destroy(&sc->sc_dkdev); 247 kmem_free(sc, sizeof(*sc)); 248 } 249 250 static struct ccd_softc * 251 ccdget(int unit, int make) { 252 struct ccd_softc *sc; 253 if (unit < 0) { 254 #ifdef DIAGNOSTIC 255 panic("%s: unit %d!", __func__, unit); 256 #endif 257 return NULL; 258 } 259 mutex_enter(&ccd_lock); 260 LIST_FOREACH(sc, &ccds, sc_link) { 261 if (sc->sc_unit == unit) { 262 mutex_exit(&ccd_lock); 263 return sc; 264 } 265 } 266 mutex_exit(&ccd_lock); 267 if (!make) 268 return NULL; 269 if ((sc = ccdcreate(unit)) == NULL) 270 return NULL; 271 mutex_enter(&ccd_lock); 272 LIST_INSERT_HEAD(&ccds, sc, sc_link); 273 mutex_exit(&ccd_lock); 274 return sc; 275 } 276 277 static void 278 ccdput(struct ccd_softc *sc) { 279 mutex_enter(&ccd_lock); 280 LIST_REMOVE(sc, sc_link); 281 mutex_exit(&ccd_lock); 282 ccddestroy(sc); 283 } 284 285 /* 286 * Called by main() during pseudo-device attachment. All we need 287 * to do is allocate enough space for devices to be configured later. 288 */ 289 void 290 ccdattach(int num) 291 { 292 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 293 294 /* Initialize the component buffer pool. */ 295 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 296 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 297 } 298 299 void 300 ccddetach(void) 301 { 302 pool_cache_destroy(ccd_cache); 303 mutex_destroy(&ccd_lock); 304 } 305 306 static int 307 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 308 struct lwp *l) 309 { 310 struct ccdcinfo *ci = NULL; 311 int ix; 312 struct ccdgeom *ccg = &cs->sc_geom; 313 char *tmppath; 314 int error, path_alloced; 315 uint64_t psize, minsize; 316 unsigned secsize, maxsecsize; 317 struct disk_geom *dg; 318 319 #ifdef DEBUG 320 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 321 printf("%s: ccdinit\n", cs->sc_xname); 322 #endif 323 324 /* Allocate space for the component info. */ 325 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 326 KM_SLEEP); 327 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 328 329 cs->sc_size = 0; 330 331 /* 332 * Verify that each component piece exists and record 333 * relevant information about it. 334 */ 335 maxsecsize = 0; 336 minsize = 0; 337 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 338 ci = &cs->sc_cinfo[ix]; 339 ci->ci_vp = vpp[ix]; 340 341 /* 342 * Copy in the pathname of the component. 343 */ 344 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 345 error = copyinstr(cpaths[ix], tmppath, 346 MAXPATHLEN, &ci->ci_pathlen); 347 if (ci->ci_pathlen == 0) 348 error = EINVAL; 349 if (error) { 350 #ifdef DEBUG 351 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 352 printf("%s: can't copy path, error = %d\n", 353 cs->sc_xname, error); 354 #endif 355 goto out; 356 } 357 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 358 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 359 path_alloced++; 360 361 /* 362 * XXX: Cache the component's dev_t. 363 */ 364 ci->ci_dev = vpp[ix]->v_rdev; 365 366 /* 367 * Get partition information for the component. 368 */ 369 error = getdisksize(vpp[ix], &psize, &secsize); 370 if (error) { 371 #ifdef DEBUG 372 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 373 printf("%s: %s: disksize failed, error = %d\n", 374 cs->sc_xname, ci->ci_path, error); 375 #endif 376 goto out; 377 } 378 379 /* 380 * Calculate the size, truncating to an interleave 381 * boundary if necessary. 382 */ 383 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 384 if (cs->sc_ileave > 1) 385 psize -= psize % cs->sc_ileave; 386 387 if (psize == 0) { 388 #ifdef DEBUG 389 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 390 printf("%s: %s: size == 0\n", 391 cs->sc_xname, ci->ci_path); 392 #endif 393 error = ENODEV; 394 goto out; 395 } 396 397 if (minsize == 0 || psize < minsize) 398 minsize = psize; 399 ci->ci_size = psize; 400 cs->sc_size += psize; 401 } 402 403 /* 404 * Don't allow the interleave to be smaller than 405 * the biggest component sector. 406 */ 407 if ((cs->sc_ileave > 0) && 408 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 409 #ifdef DEBUG 410 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 411 printf("%s: interleave must be at least %d\n", 412 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 413 #endif 414 error = EINVAL; 415 goto out; 416 } 417 418 /* 419 * If uniform interleave is desired set all sizes to that of 420 * the smallest component. 421 */ 422 if (cs->sc_flags & CCDF_UNIFORM) { 423 for (ci = cs->sc_cinfo; 424 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 425 ci->ci_size = minsize; 426 427 cs->sc_size = cs->sc_nccdisks * minsize; 428 } 429 430 /* 431 * Construct the interleave table. 432 */ 433 ccdinterleave(cs); 434 435 /* 436 * Create pseudo-geometry based on 1MB cylinders. It's 437 * pretty close. 438 */ 439 ccg->ccg_secsize = DEV_BSIZE; 440 ccg->ccg_ntracks = 1; 441 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 442 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 443 444 dg = &cs->sc_dkdev.dk_geom; 445 memset(dg, 0, sizeof(*dg)); 446 dg->dg_secperunit = cs->sc_size; 447 dg->dg_secsize = ccg->ccg_secsize; 448 dg->dg_nsectors = ccg->ccg_nsectors; 449 dg->dg_ntracks = ccg->ccg_ntracks; 450 dg->dg_ncylinders = ccg->ccg_ncylinders; 451 452 if (cs->sc_ileave > 0) 453 aprint_normal("%s: Interleaving %d component%s " 454 "(%d block interleave)\n", cs->sc_xname, 455 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 456 cs->sc_ileave); 457 else 458 aprint_normal("%s: Concatenating %d component%s\n", 459 cs->sc_xname, 460 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 461 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 462 ci = &cs->sc_cinfo[ix]; 463 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 464 ci->ci_path, (uintmax_t)ci->ci_size); 465 } 466 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 467 468 /* 469 * Create thread to handle deferred I/O. 470 */ 471 cs->sc_zap = false; 472 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 473 cs, &cs->sc_thread, "%s", cs->sc_xname); 474 if (error) { 475 printf("ccdinit: can't create thread: %d\n", error); 476 goto out; 477 } 478 479 /* 480 * Only now that everything is set up can we enable the device. 481 */ 482 mutex_enter(cs->sc_iolock); 483 cs->sc_flags |= CCDF_INITED; 484 mutex_exit(cs->sc_iolock); 485 kmem_free(tmppath, MAXPATHLEN); 486 return (0); 487 488 out: 489 for (ix = 0; ix < path_alloced; ix++) { 490 kmem_free(cs->sc_cinfo[ix].ci_path, 491 cs->sc_cinfo[ix].ci_pathlen); 492 } 493 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 494 kmem_free(tmppath, MAXPATHLEN); 495 return (error); 496 } 497 498 static void 499 ccdinterleave(struct ccd_softc *cs) 500 { 501 struct ccdcinfo *ci, *smallci; 502 struct ccdiinfo *ii; 503 daddr_t bn, lbn; 504 int ix; 505 u_long size; 506 507 #ifdef DEBUG 508 if (ccddebug & CCDB_INIT) 509 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 510 #endif 511 /* 512 * Allocate an interleave table. 513 * Chances are this is too big, but we don't care. 514 */ 515 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 516 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 517 518 /* 519 * Trivial case: no interleave (actually interleave of disk size). 520 * Each table entry represents a single component in its entirety. 521 */ 522 if (cs->sc_ileave == 0) { 523 bn = 0; 524 ii = cs->sc_itable; 525 526 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 527 /* Allocate space for ii_index. */ 528 ii->ii_indexsz = sizeof(int); 529 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 530 ii->ii_ndisk = 1; 531 ii->ii_startblk = bn; 532 ii->ii_startoff = 0; 533 ii->ii_index[0] = ix; 534 bn += cs->sc_cinfo[ix].ci_size; 535 ii++; 536 } 537 ii->ii_ndisk = 0; 538 #ifdef DEBUG 539 if (ccddebug & CCDB_INIT) 540 printiinfo(cs->sc_itable); 541 #endif 542 return; 543 } 544 545 /* 546 * The following isn't fast or pretty; it doesn't have to be. 547 */ 548 size = 0; 549 bn = lbn = 0; 550 for (ii = cs->sc_itable; ; ii++) { 551 /* Allocate space for ii_index. */ 552 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 553 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 554 555 /* 556 * Locate the smallest of the remaining components 557 */ 558 smallci = NULL; 559 for (ci = cs->sc_cinfo; 560 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 561 if (ci->ci_size > size && 562 (smallci == NULL || 563 ci->ci_size < smallci->ci_size)) 564 smallci = ci; 565 566 /* 567 * Nobody left, all done 568 */ 569 if (smallci == NULL) { 570 ii->ii_ndisk = 0; 571 break; 572 } 573 574 /* 575 * Record starting logical block and component offset 576 */ 577 ii->ii_startblk = bn / cs->sc_ileave; 578 ii->ii_startoff = lbn; 579 580 /* 581 * Determine how many disks take part in this interleave 582 * and record their indices. 583 */ 584 ix = 0; 585 for (ci = cs->sc_cinfo; 586 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 587 if (ci->ci_size >= smallci->ci_size) 588 ii->ii_index[ix++] = ci - cs->sc_cinfo; 589 ii->ii_ndisk = ix; 590 bn += ix * (smallci->ci_size - size); 591 lbn = smallci->ci_size / cs->sc_ileave; 592 size = smallci->ci_size; 593 } 594 #ifdef DEBUG 595 if (ccddebug & CCDB_INIT) 596 printiinfo(cs->sc_itable); 597 #endif 598 } 599 600 /* ARGSUSED */ 601 static int 602 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 603 { 604 int unit = ccdunit(dev); 605 struct ccd_softc *cs; 606 struct disklabel *lp; 607 int error = 0, part, pmask; 608 609 #ifdef DEBUG 610 if (ccddebug & CCDB_FOLLOW) 611 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 612 #endif 613 if ((cs = ccdget(unit, 1)) == NULL) 614 return ENXIO; 615 616 mutex_enter(&cs->sc_dvlock); 617 618 lp = cs->sc_dkdev.dk_label; 619 620 part = DISKPART(dev); 621 pmask = (1 << part); 622 623 /* 624 * If we're initialized, check to see if there are any other 625 * open partitions. If not, then it's safe to update 626 * the in-core disklabel. Only read the disklabel if it is 627 * not already valid. 628 */ 629 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 630 cs->sc_dkdev.dk_openmask == 0) 631 ccdgetdisklabel(dev); 632 633 /* Check that the partition exists. */ 634 if (part != RAW_PART) { 635 if (((cs->sc_flags & CCDF_INITED) == 0) || 636 ((part >= lp->d_npartitions) || 637 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 638 error = ENXIO; 639 goto done; 640 } 641 } 642 643 /* Prevent our unit from being unconfigured while open. */ 644 switch (fmt) { 645 case S_IFCHR: 646 cs->sc_dkdev.dk_copenmask |= pmask; 647 break; 648 649 case S_IFBLK: 650 cs->sc_dkdev.dk_bopenmask |= pmask; 651 break; 652 } 653 cs->sc_dkdev.dk_openmask = 654 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 655 656 done: 657 mutex_exit(&cs->sc_dvlock); 658 return (error); 659 } 660 661 /* ARGSUSED */ 662 static int 663 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 664 { 665 int unit = ccdunit(dev); 666 struct ccd_softc *cs; 667 int part; 668 669 #ifdef DEBUG 670 if (ccddebug & CCDB_FOLLOW) 671 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 672 #endif 673 674 if ((cs = ccdget(unit, 0)) == NULL) 675 return ENXIO; 676 677 mutex_enter(&cs->sc_dvlock); 678 679 part = DISKPART(dev); 680 681 /* ...that much closer to allowing unconfiguration... */ 682 switch (fmt) { 683 case S_IFCHR: 684 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 685 break; 686 687 case S_IFBLK: 688 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 689 break; 690 } 691 cs->sc_dkdev.dk_openmask = 692 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 693 694 if (cs->sc_dkdev.dk_openmask == 0) { 695 if ((cs->sc_flags & CCDF_KLABEL) == 0) 696 cs->sc_flags &= ~CCDF_VLABEL; 697 } 698 699 mutex_exit(&cs->sc_dvlock); 700 return (0); 701 } 702 703 static bool 704 ccdbackoff(struct ccd_softc *cs) 705 { 706 707 /* XXX Arbitrary, should be a uvm call. */ 708 return uvmexp.free < (uvmexp.freemin >> 1) && 709 disk_isbusy(&cs->sc_dkdev); 710 } 711 712 static void 713 ccdthread(void *cookie) 714 { 715 struct ccd_softc *cs; 716 717 cs = cookie; 718 719 #ifdef DEBUG 720 if (ccddebug & CCDB_FOLLOW) 721 printf("ccdthread: hello\n"); 722 #endif 723 724 mutex_enter(cs->sc_iolock); 725 while (__predict_true(!cs->sc_zap)) { 726 if (bufq_peek(cs->sc_bufq) == NULL) { 727 /* Nothing to do. */ 728 cv_wait(&cs->sc_push, cs->sc_iolock); 729 continue; 730 } 731 if (ccdbackoff(cs)) { 732 /* Wait for memory to become available. */ 733 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 734 continue; 735 } 736 #ifdef DEBUG 737 if (ccddebug & CCDB_FOLLOW) 738 printf("ccdthread: dispatching I/O\n"); 739 #endif 740 ccdstart(cs); 741 mutex_enter(cs->sc_iolock); 742 } 743 cs->sc_thread = NULL; 744 mutex_exit(cs->sc_iolock); 745 #ifdef DEBUG 746 if (ccddebug & CCDB_FOLLOW) 747 printf("ccdthread: goodbye\n"); 748 #endif 749 kthread_exit(0); 750 } 751 752 static void 753 ccdstrategy(struct buf *bp) 754 { 755 int unit = ccdunit(bp->b_dev); 756 struct ccd_softc *cs; 757 if ((cs = ccdget(unit, 0)) == NULL) 758 return; 759 760 /* Must be open or reading label. */ 761 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 762 (cs->sc_flags & CCDF_RLABEL) != 0); 763 764 mutex_enter(cs->sc_iolock); 765 /* Synchronize with device init/uninit. */ 766 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 767 mutex_exit(cs->sc_iolock); 768 #ifdef DEBUG 769 if (ccddebug & CCDB_FOLLOW) 770 printf("ccdstrategy: unit %d: not inited\n", unit); 771 #endif 772 bp->b_error = ENXIO; 773 bp->b_resid = bp->b_bcount; 774 biodone(bp); 775 return; 776 } 777 778 /* Defer to thread if system is low on memory. */ 779 bufq_put(cs->sc_bufq, bp); 780 if (__predict_false(ccdbackoff(cs))) { 781 mutex_exit(cs->sc_iolock); 782 #ifdef DEBUG 783 if (ccddebug & CCDB_FOLLOW) 784 printf("ccdstrategy: holding off on I/O\n"); 785 #endif 786 return; 787 } 788 ccdstart(cs); 789 } 790 791 static void 792 ccdstart(struct ccd_softc *cs) 793 { 794 daddr_t blkno; 795 int wlabel; 796 struct disklabel *lp; 797 long bcount, rcount; 798 struct ccdbuf *cbp; 799 char *addr; 800 daddr_t bn; 801 vnode_t *vp; 802 buf_t *bp; 803 804 KASSERT(mutex_owned(cs->sc_iolock)); 805 806 bp = bufq_get(cs->sc_bufq); 807 KASSERT(bp != NULL); 808 809 disk_busy(&cs->sc_dkdev); 810 811 #ifdef DEBUG 812 if (ccddebug & CCDB_FOLLOW) 813 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 814 #endif 815 816 /* If it's a nil transfer, wake up the top half now. */ 817 if (bp->b_bcount == 0) 818 goto done; 819 820 lp = cs->sc_dkdev.dk_label; 821 822 /* 823 * Do bounds checking and adjust transfer. If there's an 824 * error, the bounds check will flag that for us. Convert 825 * the partition relative block number to an absolute. 826 */ 827 blkno = bp->b_blkno; 828 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 829 if (DISKPART(bp->b_dev) != RAW_PART) { 830 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 831 goto done; 832 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 833 } 834 mutex_exit(cs->sc_iolock); 835 bp->b_rawblkno = blkno; 836 837 /* Allocate the component buffers and start I/O! */ 838 bp->b_resid = bp->b_bcount; 839 bn = bp->b_rawblkno; 840 addr = bp->b_data; 841 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 842 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 843 rcount = cbp->cb_buf.b_bcount; 844 bn += btodb(rcount); 845 addr += rcount; 846 vp = cbp->cb_buf.b_vp; 847 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 848 mutex_enter(vp->v_interlock); 849 vp->v_numoutput++; 850 mutex_exit(vp->v_interlock); 851 } 852 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 853 } 854 return; 855 856 done: 857 disk_unbusy(&cs->sc_dkdev, 0, 0); 858 cv_broadcast(&cs->sc_stop); 859 cv_broadcast(&cs->sc_push); 860 mutex_exit(cs->sc_iolock); 861 bp->b_resid = bp->b_bcount; 862 biodone(bp); 863 } 864 865 /* 866 * Build a component buffer header. 867 */ 868 static struct ccdbuf * 869 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 870 long bcount) 871 { 872 struct ccdcinfo *ci; 873 struct ccdbuf *cbp; 874 daddr_t cbn, cboff; 875 u_int64_t cbc; 876 int ccdisk; 877 878 #ifdef DEBUG 879 if (ccddebug & CCDB_IO) 880 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 881 cs, bp, bn, addr, bcount); 882 #endif 883 /* 884 * Determine which component bn falls in. 885 */ 886 cbn = bn; 887 cboff = 0; 888 889 /* 890 * Serially concatenated 891 */ 892 if (cs->sc_ileave == 0) { 893 daddr_t sblk; 894 895 sblk = 0; 896 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 897 cbn >= sblk + ci->ci_size; 898 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 899 sblk += ci->ci_size; 900 cbn -= sblk; 901 } 902 /* 903 * Interleaved 904 */ 905 else { 906 struct ccdiinfo *ii; 907 int off; 908 909 cboff = cbn % cs->sc_ileave; 910 cbn /= cs->sc_ileave; 911 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 912 if (ii->ii_startblk > cbn) 913 break; 914 ii--; 915 off = cbn - ii->ii_startblk; 916 if (ii->ii_ndisk == 1) { 917 ccdisk = ii->ii_index[0]; 918 cbn = ii->ii_startoff + off; 919 } else { 920 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 921 cbn = ii->ii_startoff + off / ii->ii_ndisk; 922 } 923 cbn *= cs->sc_ileave; 924 ci = &cs->sc_cinfo[ccdisk]; 925 } 926 927 /* 928 * Fill in the component buf structure. 929 */ 930 cbp = CCD_GETBUF(); 931 KASSERT(cbp != NULL); 932 buf_init(&cbp->cb_buf); 933 cbp->cb_buf.b_flags = bp->b_flags; 934 cbp->cb_buf.b_oflags = bp->b_oflags; 935 cbp->cb_buf.b_cflags = bp->b_cflags; 936 cbp->cb_buf.b_iodone = ccdiodone; 937 cbp->cb_buf.b_proc = bp->b_proc; 938 cbp->cb_buf.b_dev = ci->ci_dev; 939 cbp->cb_buf.b_blkno = cbn + cboff; 940 cbp->cb_buf.b_data = addr; 941 cbp->cb_buf.b_vp = ci->ci_vp; 942 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 943 if (cs->sc_ileave == 0) 944 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 945 else 946 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 947 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 948 949 /* 950 * context for ccdiodone 951 */ 952 cbp->cb_obp = bp; 953 cbp->cb_sc = cs; 954 cbp->cb_comp = ccdisk; 955 956 BIO_COPYPRIO(&cbp->cb_buf, bp); 957 958 #ifdef DEBUG 959 if (ccddebug & CCDB_IO) 960 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 961 " bcnt %d\n", 962 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 963 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 964 cbp->cb_buf.b_bcount); 965 #endif 966 967 return (cbp); 968 } 969 970 /* 971 * Called at interrupt time. 972 * Mark the component as done and if all components are done, 973 * take a ccd interrupt. 974 */ 975 static void 976 ccdiodone(struct buf *vbp) 977 { 978 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 979 struct buf *bp = cbp->cb_obp; 980 struct ccd_softc *cs = cbp->cb_sc; 981 int count; 982 983 #ifdef DEBUG 984 if (ccddebug & CCDB_FOLLOW) 985 printf("ccdiodone(%p)\n", cbp); 986 if (ccddebug & CCDB_IO) { 987 printf("ccdiodone: bp %p bcount %d resid %d\n", 988 bp, bp->b_bcount, bp->b_resid); 989 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 990 " bcnt %d\n", 991 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 992 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 993 cbp->cb_buf.b_bcount); 994 } 995 #endif 996 997 if (cbp->cb_buf.b_error != 0) { 998 bp->b_error = cbp->cb_buf.b_error; 999 printf("%s: error %d on component %d\n", 1000 cs->sc_xname, bp->b_error, cbp->cb_comp); 1001 } 1002 count = cbp->cb_buf.b_bcount; 1003 buf_destroy(&cbp->cb_buf); 1004 CCD_PUTBUF(cbp); 1005 1006 /* 1007 * If all done, "interrupt". 1008 */ 1009 mutex_enter(cs->sc_iolock); 1010 bp->b_resid -= count; 1011 if (bp->b_resid < 0) 1012 panic("ccdiodone: count"); 1013 if (bp->b_resid == 0) { 1014 /* 1015 * Request is done for better or worse, wakeup the top half. 1016 */ 1017 if (bp->b_error != 0) 1018 bp->b_resid = bp->b_bcount; 1019 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1020 (bp->b_flags & B_READ)); 1021 if (!disk_isbusy(&cs->sc_dkdev)) { 1022 if (bufq_peek(cs->sc_bufq) != NULL) { 1023 cv_broadcast(&cs->sc_push); 1024 } 1025 cv_broadcast(&cs->sc_stop); 1026 } 1027 mutex_exit(cs->sc_iolock); 1028 biodone(bp); 1029 } else 1030 mutex_exit(cs->sc_iolock); 1031 } 1032 1033 /* ARGSUSED */ 1034 static int 1035 ccdread(dev_t dev, struct uio *uio, int flags) 1036 { 1037 int unit = ccdunit(dev); 1038 struct ccd_softc *cs; 1039 1040 #ifdef DEBUG 1041 if (ccddebug & CCDB_FOLLOW) 1042 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1043 #endif 1044 if ((cs = ccdget(unit, 0)) == NULL) 1045 return 0; 1046 1047 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1048 if ((cs->sc_flags & CCDF_INITED) == 0) 1049 return (ENXIO); 1050 1051 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1052 } 1053 1054 /* ARGSUSED */ 1055 static int 1056 ccdwrite(dev_t dev, struct uio *uio, int flags) 1057 { 1058 int unit = ccdunit(dev); 1059 struct ccd_softc *cs; 1060 1061 #ifdef DEBUG 1062 if (ccddebug & CCDB_FOLLOW) 1063 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1064 #endif 1065 if ((cs = ccdget(unit, 0)) == NULL) 1066 return ENOENT; 1067 1068 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1069 if ((cs->sc_flags & CCDF_INITED) == 0) 1070 return (ENXIO); 1071 1072 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1073 } 1074 1075 int (*compat_ccd_ioctl_60)(dev_t, u_long, void *, int, struct lwp *, 1076 int (*)(dev_t, u_long, void *, int, struct lwp *)) = (void *)enosys; 1077 1078 static int 1079 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1080 { 1081 int unit = ccdunit(dev); 1082 int i, j, lookedup = 0, error = 0; 1083 int part, pmask, make; 1084 struct ccd_softc *cs; 1085 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1086 kauth_cred_t uc; 1087 char **cpp; 1088 struct pathbuf *pb; 1089 struct vnode **vpp; 1090 #ifdef __HAVE_OLD_DISKLABEL 1091 struct disklabel newlabel; 1092 #endif 1093 1094 switch (cmd) { 1095 case CCDIOCSET: 1096 make = 1; 1097 break; 1098 default: 1099 if ((*compat_ccd_ioctl_60)(0, cmd, NULL, 0, NULL, NULL) == 0) 1100 make = 1; 1101 else 1102 make = 0; 1103 break; 1104 } 1105 1106 if ((cs = ccdget(unit, make)) == NULL) 1107 return ENOENT; 1108 uc = kauth_cred_get(); 1109 1110 error = (*compat_ccd_ioctl_60)(dev, cmd, data, flag, l, ccdioctl); 1111 if (error != ENOSYS) 1112 return error; 1113 1114 /* Must be open for writes for these commands... */ 1115 switch (cmd) { 1116 case CCDIOCSET: 1117 case CCDIOCCLR: 1118 case DIOCSDINFO: 1119 case DIOCWDINFO: 1120 case DIOCCACHESYNC: 1121 case DIOCAWEDGE: 1122 case DIOCDWEDGE: 1123 case DIOCMWEDGES: 1124 #ifdef __HAVE_OLD_DISKLABEL 1125 case ODIOCSDINFO: 1126 case ODIOCWDINFO: 1127 #endif 1128 case DIOCKLABEL: 1129 case DIOCWLABEL: 1130 if ((flag & FWRITE) == 0) 1131 return (EBADF); 1132 } 1133 1134 mutex_enter(&cs->sc_dvlock); 1135 1136 /* Must be initialized for these... */ 1137 switch (cmd) { 1138 case CCDIOCCLR: 1139 case DIOCGDINFO: 1140 case DIOCGSTRATEGY: 1141 case DIOCGCACHE: 1142 case DIOCCACHESYNC: 1143 case DIOCAWEDGE: 1144 case DIOCDWEDGE: 1145 case DIOCLWEDGES: 1146 case DIOCMWEDGES: 1147 case DIOCSDINFO: 1148 case DIOCWDINFO: 1149 case DIOCGPARTINFO: 1150 case DIOCWLABEL: 1151 case DIOCKLABEL: 1152 case DIOCGDEFLABEL: 1153 #ifdef __HAVE_OLD_DISKLABEL 1154 case ODIOCGDINFO: 1155 case ODIOCSDINFO: 1156 case ODIOCWDINFO: 1157 case ODIOCGDEFLABEL: 1158 #endif 1159 if ((cs->sc_flags & CCDF_INITED) == 0) { 1160 error = ENXIO; 1161 goto out; 1162 } 1163 } 1164 1165 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1166 if (error != EPASSTHROUGH) 1167 goto out; 1168 1169 error = 0; 1170 switch (cmd) { 1171 case CCDIOCSET: 1172 if (cs->sc_flags & CCDF_INITED) { 1173 error = EBUSY; 1174 goto out; 1175 } 1176 1177 /* Validate the flags. */ 1178 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1179 error = EINVAL; 1180 goto out; 1181 } 1182 1183 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1184 ccio->ccio_ndisks == 0) { 1185 error = EINVAL; 1186 goto out; 1187 } 1188 1189 /* Fill in some important bits. */ 1190 cs->sc_ileave = ccio->ccio_ileave; 1191 cs->sc_nccdisks = ccio->ccio_ndisks; 1192 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1193 1194 /* 1195 * Allocate space for and copy in the array of 1196 * component pathnames and device numbers. 1197 */ 1198 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1199 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1200 error = copyin(ccio->ccio_disks, cpp, 1201 ccio->ccio_ndisks * sizeof(*cpp)); 1202 if (error) { 1203 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1204 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1205 goto out; 1206 } 1207 1208 #ifdef DEBUG 1209 if (ccddebug & CCDB_INIT) 1210 for (i = 0; i < ccio->ccio_ndisks; ++i) 1211 printf("ccdioctl: component %d: %p\n", 1212 i, cpp[i]); 1213 #endif 1214 1215 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1216 #ifdef DEBUG 1217 if (ccddebug & CCDB_INIT) 1218 printf("ccdioctl: lookedup = %d\n", lookedup); 1219 #endif 1220 error = pathbuf_copyin(cpp[i], &pb); 1221 if (error == 0) { 1222 error = dk_lookup(pb, l, &vpp[i]); 1223 } 1224 pathbuf_destroy(pb); 1225 if (error != 0) { 1226 for (j = 0; j < lookedup; ++j) 1227 (void)vn_close(vpp[j], FREAD|FWRITE, 1228 uc); 1229 kmem_free(vpp, ccio->ccio_ndisks * 1230 sizeof(*vpp)); 1231 kmem_free(cpp, ccio->ccio_ndisks * 1232 sizeof(*cpp)); 1233 goto out; 1234 } 1235 ++lookedup; 1236 } 1237 1238 /* Attach the disk. */ 1239 disk_attach(&cs->sc_dkdev); 1240 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1241 1242 /* 1243 * Initialize the ccd. Fills in the softc for us. 1244 */ 1245 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1246 for (j = 0; j < lookedup; ++j) 1247 (void)vn_close(vpp[j], FREAD|FWRITE, 1248 uc); 1249 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1250 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1251 disk_detach(&cs->sc_dkdev); 1252 mutex_exit(&cs->sc_dvlock); 1253 bufq_free(cs->sc_bufq); 1254 return error; 1255 } 1256 1257 /* We can free the temporary variables now. */ 1258 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1259 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1260 1261 /* 1262 * The ccd has been successfully initialized, so 1263 * we can place it into the array. Don't try to 1264 * read the disklabel until the disk has been attached, 1265 * because space for the disklabel is allocated 1266 * in disk_attach(); 1267 */ 1268 ccio->ccio_unit = unit; 1269 ccio->ccio_size = cs->sc_size; 1270 1271 /* Try and read the disklabel. */ 1272 ccdgetdisklabel(dev); 1273 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1274 1275 /* discover wedges */ 1276 mutex_exit(&cs->sc_dvlock); 1277 dkwedge_discover(&cs->sc_dkdev); 1278 return 0; 1279 1280 case CCDIOCCLR: 1281 /* 1282 * Don't unconfigure if any other partitions are open 1283 * or if both the character and block flavors of this 1284 * partition are open. 1285 */ 1286 part = DISKPART(dev); 1287 pmask = (1 << part); 1288 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1289 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1290 (cs->sc_dkdev.dk_copenmask & pmask))) { 1291 error = EBUSY; 1292 goto out; 1293 } 1294 1295 /* Delete all of our wedges. */ 1296 dkwedge_delall(&cs->sc_dkdev); 1297 1298 /* Stop new I/O, wait for in-flight I/O to complete. */ 1299 mutex_enter(cs->sc_iolock); 1300 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1301 cs->sc_zap = true; 1302 while (disk_isbusy(&cs->sc_dkdev) || 1303 bufq_peek(cs->sc_bufq) != NULL || 1304 cs->sc_thread != NULL) { 1305 cv_broadcast(&cs->sc_push); 1306 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1307 } 1308 mutex_exit(cs->sc_iolock); 1309 1310 /* 1311 * Free ccd_softc information and clear entry. 1312 */ 1313 1314 /* Close the components and free their pathnames. */ 1315 for (i = 0; i < cs->sc_nccdisks; ++i) { 1316 /* 1317 * XXX: this close could potentially fail and 1318 * cause Bad Things. Maybe we need to force 1319 * the close to happen? 1320 */ 1321 #ifdef DEBUG 1322 if (ccddebug & CCDB_VNODE) 1323 vprint("CCDIOCCLR: vnode info", 1324 cs->sc_cinfo[i].ci_vp); 1325 #endif 1326 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1327 uc); 1328 kmem_free(cs->sc_cinfo[i].ci_path, 1329 cs->sc_cinfo[i].ci_pathlen); 1330 } 1331 1332 /* Free interleave index. */ 1333 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1334 kmem_free(cs->sc_itable[i].ii_index, 1335 cs->sc_itable[i].ii_indexsz); 1336 } 1337 1338 /* Free component info and interleave table. */ 1339 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1340 sizeof(struct ccdcinfo)); 1341 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1342 sizeof(struct ccdiinfo)); 1343 1344 aprint_normal("%s: detached\n", cs->sc_xname); 1345 1346 /* Detach the disk. */ 1347 disk_detach(&cs->sc_dkdev); 1348 bufq_free(cs->sc_bufq); 1349 ccdput(cs); 1350 /* Don't break, otherwise cs is read again. */ 1351 return 0; 1352 1353 case DIOCGSTRATEGY: 1354 { 1355 struct disk_strategy *dks = (void *)data; 1356 1357 mutex_enter(cs->sc_iolock); 1358 if (cs->sc_bufq != NULL) 1359 strlcpy(dks->dks_name, 1360 bufq_getstrategyname(cs->sc_bufq), 1361 sizeof(dks->dks_name)); 1362 else 1363 error = EINVAL; 1364 mutex_exit(cs->sc_iolock); 1365 dks->dks_paramlen = 0; 1366 break; 1367 } 1368 1369 case DIOCGCACHE: 1370 { 1371 int dkcache = 0; 1372 1373 /* 1374 * We pass this call down to all components and report 1375 * intersection of the flags returned by the components. 1376 * If any errors out, we return error. CCD components 1377 * can not change unless the device is unconfigured, so 1378 * device feature flags will remain static. RCE/WCE can change 1379 * of course, if set directly on underlying device. 1380 */ 1381 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1382 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1383 flag, uc); 1384 if (error) 1385 break; 1386 1387 if (i == 0) 1388 dkcache = j; 1389 else 1390 dkcache = DKCACHE_COMBINE(dkcache, j); 1391 } 1392 1393 *((int *)data) = dkcache; 1394 break; 1395 } 1396 1397 case DIOCCACHESYNC: 1398 /* 1399 * We pass this call down to all components and report 1400 * the first error we encounter. 1401 */ 1402 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1403 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1404 flag, uc); 1405 if (j != 0 && error == 0) 1406 error = j; 1407 } 1408 break; 1409 1410 case DIOCWDINFO: 1411 case DIOCSDINFO: 1412 #ifdef __HAVE_OLD_DISKLABEL 1413 case ODIOCWDINFO: 1414 case ODIOCSDINFO: 1415 #endif 1416 { 1417 struct disklabel *lp; 1418 #ifdef __HAVE_OLD_DISKLABEL 1419 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1420 memset(&newlabel, 0, sizeof newlabel); 1421 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1422 lp = &newlabel; 1423 } else 1424 #endif 1425 lp = (struct disklabel *)data; 1426 1427 cs->sc_flags |= CCDF_LABELLING; 1428 1429 error = setdisklabel(cs->sc_dkdev.dk_label, 1430 lp, 0, cs->sc_dkdev.dk_cpulabel); 1431 if (error == 0) { 1432 if (cmd == DIOCWDINFO 1433 #ifdef __HAVE_OLD_DISKLABEL 1434 || cmd == ODIOCWDINFO 1435 #endif 1436 ) 1437 error = writedisklabel(CCDLABELDEV(dev), 1438 ccdstrategy, cs->sc_dkdev.dk_label, 1439 cs->sc_dkdev.dk_cpulabel); 1440 } 1441 1442 cs->sc_flags &= ~CCDF_LABELLING; 1443 break; 1444 } 1445 1446 case DIOCKLABEL: 1447 if (*(int *)data != 0) 1448 cs->sc_flags |= CCDF_KLABEL; 1449 else 1450 cs->sc_flags &= ~CCDF_KLABEL; 1451 break; 1452 1453 case DIOCWLABEL: 1454 if (*(int *)data != 0) 1455 cs->sc_flags |= CCDF_WLABEL; 1456 else 1457 cs->sc_flags &= ~CCDF_WLABEL; 1458 break; 1459 1460 case DIOCGDEFLABEL: 1461 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1462 break; 1463 1464 #ifdef __HAVE_OLD_DISKLABEL 1465 case ODIOCGDEFLABEL: 1466 ccdgetdefaultlabel(cs, &newlabel); 1467 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1468 return ENOTTY; 1469 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1470 break; 1471 #endif 1472 1473 default: 1474 error = ENOTTY; 1475 } 1476 1477 out: 1478 mutex_exit(&cs->sc_dvlock); 1479 return (error); 1480 } 1481 1482 static int 1483 ccdsize(dev_t dev) 1484 { 1485 struct ccd_softc *cs; 1486 struct disklabel *lp; 1487 int part, unit, omask, size; 1488 1489 unit = ccdunit(dev); 1490 if ((cs = ccdget(unit, 0)) == NULL) 1491 return -1; 1492 1493 if ((cs->sc_flags & CCDF_INITED) == 0) 1494 return (-1); 1495 1496 part = DISKPART(dev); 1497 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1498 lp = cs->sc_dkdev.dk_label; 1499 1500 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1501 return (-1); 1502 1503 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1504 size = -1; 1505 else 1506 size = lp->d_partitions[part].p_size * 1507 (lp->d_secsize / DEV_BSIZE); 1508 1509 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1510 return (-1); 1511 1512 return (size); 1513 } 1514 1515 static void 1516 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1517 { 1518 struct ccdgeom *ccg = &cs->sc_geom; 1519 1520 memset(lp, 0, sizeof(*lp)); 1521 1522 if (cs->sc_size > UINT32_MAX) 1523 lp->d_secperunit = UINT32_MAX; 1524 else 1525 lp->d_secperunit = cs->sc_size; 1526 lp->d_secsize = ccg->ccg_secsize; 1527 lp->d_nsectors = ccg->ccg_nsectors; 1528 lp->d_ntracks = ccg->ccg_ntracks; 1529 lp->d_ncylinders = ccg->ccg_ncylinders; 1530 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1531 1532 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1533 lp->d_type = DKTYPE_CCD; 1534 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1535 lp->d_rpm = 3600; 1536 lp->d_interleave = 1; 1537 lp->d_flags = 0; 1538 1539 lp->d_partitions[RAW_PART].p_offset = 0; 1540 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1541 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1542 lp->d_npartitions = RAW_PART + 1; 1543 1544 lp->d_magic = DISKMAGIC; 1545 lp->d_magic2 = DISKMAGIC; 1546 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1547 } 1548 1549 /* 1550 * Read the disklabel from the ccd. If one is not present, fake one 1551 * up. 1552 */ 1553 static void 1554 ccdgetdisklabel(dev_t dev) 1555 { 1556 int unit = ccdunit(dev); 1557 struct ccd_softc *cs; 1558 const char *errstring; 1559 struct disklabel *lp; 1560 struct cpu_disklabel *clp; 1561 1562 if ((cs = ccdget(unit, 0)) == NULL) 1563 return; 1564 lp = cs->sc_dkdev.dk_label; 1565 clp = cs->sc_dkdev.dk_cpulabel; 1566 KASSERT(mutex_owned(&cs->sc_dvlock)); 1567 1568 memset(clp, 0, sizeof(*clp)); 1569 1570 ccdgetdefaultlabel(cs, lp); 1571 1572 /* 1573 * Call the generic disklabel extraction routine. 1574 */ 1575 cs->sc_flags |= CCDF_RLABEL; 1576 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1577 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1578 else 1579 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1580 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1581 if (errstring) 1582 ccdmakedisklabel(cs); 1583 else { 1584 int i; 1585 struct partition *pp; 1586 1587 /* 1588 * Sanity check whether the found disklabel is valid. 1589 * 1590 * This is necessary since total size of ccd may vary 1591 * when an interleave is changed even though exactly 1592 * same componets are used, and old disklabel may used 1593 * if that is found. 1594 */ 1595 if (lp->d_secperunit < UINT32_MAX ? 1596 lp->d_secperunit != cs->sc_size : 1597 lp->d_secperunit > cs->sc_size) 1598 printf("WARNING: %s: " 1599 "total sector size in disklabel (%ju) != " 1600 "the size of ccd (%ju)\n", cs->sc_xname, 1601 (uintmax_t)lp->d_secperunit, 1602 (uintmax_t)cs->sc_size); 1603 for (i = 0; i < lp->d_npartitions; i++) { 1604 pp = &lp->d_partitions[i]; 1605 if (pp->p_offset + pp->p_size > cs->sc_size) 1606 printf("WARNING: %s: end of partition `%c' " 1607 "exceeds the size of ccd (%ju)\n", 1608 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1609 } 1610 } 1611 1612 #ifdef DEBUG 1613 /* It's actually extremely common to have unlabeled ccds. */ 1614 if (ccddebug & CCDB_LABEL) 1615 if (errstring != NULL) 1616 printf("%s: %s\n", cs->sc_xname, errstring); 1617 #endif 1618 1619 /* In-core label now valid. */ 1620 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1621 } 1622 1623 /* 1624 * Take care of things one might want to take care of in the event 1625 * that a disklabel isn't present. 1626 */ 1627 static void 1628 ccdmakedisklabel(struct ccd_softc *cs) 1629 { 1630 struct disklabel *lp = cs->sc_dkdev.dk_label; 1631 1632 /* 1633 * For historical reasons, if there's no disklabel present 1634 * the raw partition must be marked FS_BSDFFS. 1635 */ 1636 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1637 1638 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1639 1640 lp->d_checksum = dkcksum(lp); 1641 } 1642 1643 #ifdef DEBUG 1644 static void 1645 printiinfo(struct ccdiinfo *ii) 1646 { 1647 int ix, i; 1648 1649 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1650 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1651 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1652 for (i = 0; i < ii->ii_ndisk; i++) 1653 printf(" %d", ii->ii_index[i]); 1654 printf("\n"); 1655 } 1656 } 1657 #endif 1658 1659 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr,bufq_fcfs"); 1660 1661 static int 1662 ccd_modcmd(modcmd_t cmd, void *arg) 1663 { 1664 int error = 0; 1665 #ifdef _MODULE 1666 int bmajor = -1, cmajor = -1; 1667 #endif 1668 1669 1670 switch (cmd) { 1671 case MODULE_CMD_INIT: 1672 #ifdef _MODULE 1673 ccdattach(0); 1674 1675 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1676 &ccd_cdevsw, &cmajor); 1677 sysctl_kern_ccd_setup(&ccd_clog); 1678 #endif 1679 break; 1680 1681 case MODULE_CMD_FINI: 1682 #ifdef _MODULE 1683 mutex_enter(&ccd_lock); 1684 if (!LIST_EMPTY(&ccds)) { 1685 mutex_exit(&ccd_lock); 1686 error = EBUSY; 1687 } else { 1688 mutex_exit(&ccd_lock); 1689 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1690 ccddetach(); 1691 } 1692 sysctl_teardown(&ccd_clog); 1693 #endif 1694 break; 1695 1696 case MODULE_CMD_STAT: 1697 return ENOTTY; 1698 1699 default: 1700 return ENOTTY; 1701 } 1702 1703 return error; 1704 } 1705 1706 static int 1707 ccd_units_sysctl(SYSCTLFN_ARGS) 1708 { 1709 struct sysctlnode node; 1710 struct ccd_softc *sc; 1711 int error, i, nccd, *units; 1712 size_t size; 1713 1714 nccd = 0; 1715 mutex_enter(&ccd_lock); 1716 LIST_FOREACH(sc, &ccds, sc_link) 1717 nccd++; 1718 mutex_exit(&ccd_lock); 1719 1720 if (nccd != 0) { 1721 size = nccd * sizeof(*units); 1722 units = kmem_zalloc(size, KM_SLEEP); 1723 i = 0; 1724 mutex_enter(&ccd_lock); 1725 LIST_FOREACH(sc, &ccds, sc_link) { 1726 if (i >= nccd) 1727 break; 1728 units[i] = sc->sc_unit; 1729 } 1730 mutex_exit(&ccd_lock); 1731 } else { 1732 units = NULL; 1733 size = 0; 1734 } 1735 1736 node = *rnode; 1737 node.sysctl_data = units; 1738 node.sysctl_size = size; 1739 1740 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1741 if (units) 1742 kmem_free(units, size); 1743 return error; 1744 } 1745 1746 static int 1747 ccd_info_sysctl(SYSCTLFN_ARGS) 1748 { 1749 struct sysctlnode node; 1750 struct ccddiskinfo ccd; 1751 struct ccd_softc *sc; 1752 int unit; 1753 1754 if (newp == NULL || newlen != sizeof(int)) 1755 return EINVAL; 1756 1757 unit = *(const int *)newp; 1758 newp = NULL; 1759 newlen = 0; 1760 ccd.ccd_ndisks = ~0; 1761 mutex_enter(&ccd_lock); 1762 LIST_FOREACH(sc, &ccds, sc_link) { 1763 if (sc->sc_unit == unit) { 1764 ccd.ccd_ileave = sc->sc_ileave; 1765 ccd.ccd_size = sc->sc_size; 1766 ccd.ccd_ndisks = sc->sc_nccdisks; 1767 ccd.ccd_flags = sc->sc_flags; 1768 break; 1769 } 1770 } 1771 mutex_exit(&ccd_lock); 1772 1773 if (ccd.ccd_ndisks == ~0) 1774 return ENOENT; 1775 1776 node = *rnode; 1777 node.sysctl_data = &ccd; 1778 node.sysctl_size = sizeof(ccd); 1779 1780 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1781 } 1782 1783 static int 1784 ccd_components_sysctl(SYSCTLFN_ARGS) 1785 { 1786 struct sysctlnode node; 1787 int error, unit; 1788 size_t size; 1789 char *names, *p, *ep; 1790 struct ccd_softc *sc; 1791 1792 if (newp == NULL || newlen != sizeof(int)) 1793 return EINVAL; 1794 1795 size = 0; 1796 unit = *(const int *)newp; 1797 newp = NULL; 1798 newlen = 0; 1799 mutex_enter(&ccd_lock); 1800 LIST_FOREACH(sc, &ccds, sc_link) 1801 if (sc->sc_unit == unit) { 1802 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1803 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1804 break; 1805 } 1806 mutex_exit(&ccd_lock); 1807 1808 if (size == 0) 1809 return ENOENT; 1810 names = kmem_zalloc(size, KM_SLEEP); 1811 p = names; 1812 ep = names + size; 1813 mutex_enter(&ccd_lock); 1814 LIST_FOREACH(sc, &ccds, sc_link) 1815 if (sc->sc_unit == unit) { 1816 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1817 char *d = sc->sc_cinfo[i].ci_path; 1818 while (p < ep && (*p++ = *d++) != '\0') 1819 continue; 1820 } 1821 break; 1822 } 1823 mutex_exit(&ccd_lock); 1824 1825 node = *rnode; 1826 node.sysctl_data = names; 1827 node.sysctl_size = ep - names; 1828 1829 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1830 kmem_free(names, size); 1831 return error; 1832 } 1833 1834 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1835 { 1836 const struct sysctlnode *node = NULL; 1837 1838 sysctl_createv(clog, 0, NULL, &node, 1839 CTLFLAG_PERMANENT, 1840 CTLTYPE_NODE, "ccd", 1841 SYSCTL_DESCR("ConCatenated Disk state"), 1842 NULL, 0, NULL, 0, 1843 CTL_KERN, CTL_CREATE, CTL_EOL); 1844 1845 if (node == NULL) 1846 return; 1847 1848 sysctl_createv(clog, 0, &node, NULL, 1849 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1850 CTLTYPE_STRUCT, "units", 1851 SYSCTL_DESCR("List of ccd unit numbers"), 1852 ccd_units_sysctl, 0, NULL, 0, 1853 CTL_CREATE, CTL_EOL); 1854 sysctl_createv(clog, 0, &node, NULL, 1855 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1856 CTLTYPE_STRUCT, "info", 1857 SYSCTL_DESCR("Information about a CCD unit"), 1858 ccd_info_sysctl, 0, NULL, 0, 1859 CTL_CREATE, CTL_EOL); 1860 sysctl_createv(clog, 0, &node, NULL, 1861 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1862 CTLTYPE_STRUCT, "components", 1863 SYSCTL_DESCR("Information about CCD components"), 1864 ccd_components_sysctl, 0, NULL, 0, 1865 CTL_CREATE, CTL_EOL); 1866 } 1867