1 /* $NetBSD: ccd.c,v 1.179 2019/03/27 19:13:34 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.179 2019/03/27 19:13:34 martin Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 #include <sys/compat_stub.h> 119 120 #include <uvm/uvm_extern.h> 121 122 #include <dev/ccdvar.h> 123 #include <dev/dkvar.h> 124 125 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 126 127 #include "ioconf.h" 128 129 #if defined(CCDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 #define CCDB_FOLLOW 0x01 135 #define CCDB_INIT 0x02 136 #define CCDB_IO 0x04 137 #define CCDB_LABEL 0x08 138 #define CCDB_VNODE 0x10 139 int ccddebug = 0x00; 140 #endif 141 142 #define ccdunit(x) DISKUNIT(x) 143 144 struct ccdbuf { 145 struct buf cb_buf; /* new I/O buf */ 146 struct buf *cb_obp; /* ptr. to original I/O buf */ 147 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 148 int cb_comp; /* target component */ 149 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 150 }; 151 152 /* component buffer pool */ 153 static pool_cache_t ccd_cache; 154 155 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 156 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 157 158 #define CCDLABELDEV(dev) \ 159 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 160 161 /* called by main() at boot time */ 162 void ccddetach(void); 163 164 /* called by biodone() at interrupt time */ 165 static void ccdiodone(struct buf *); 166 167 static void ccdinterleave(struct ccd_softc *); 168 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 169 struct lwp *); 170 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 171 daddr_t, void *, long); 172 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 173 static void ccdgetdisklabel(dev_t); 174 static void ccdmakedisklabel(struct ccd_softc *); 175 static void ccdstart(struct ccd_softc *); 176 static void ccdthread(void *); 177 178 static dev_type_open(ccdopen); 179 static dev_type_close(ccdclose); 180 static dev_type_read(ccdread); 181 static dev_type_write(ccdwrite); 182 static dev_type_ioctl(ccdioctl); 183 static dev_type_strategy(ccdstrategy); 184 static dev_type_size(ccdsize); 185 186 const struct bdevsw ccd_bdevsw = { 187 .d_open = ccdopen, 188 .d_close = ccdclose, 189 .d_strategy = ccdstrategy, 190 .d_ioctl = ccdioctl, 191 .d_dump = nodump, 192 .d_psize = ccdsize, 193 .d_discard = nodiscard, 194 .d_flag = D_DISK | D_MPSAFE 195 }; 196 197 const struct cdevsw ccd_cdevsw = { 198 .d_open = ccdopen, 199 .d_close = ccdclose, 200 .d_read = ccdread, 201 .d_write = ccdwrite, 202 .d_ioctl = ccdioctl, 203 .d_stop = nostop, 204 .d_tty = notty, 205 .d_poll = nopoll, 206 .d_mmap = nommap, 207 .d_kqfilter = nokqfilter, 208 .d_discard = nodiscard, 209 .d_flag = D_DISK | D_MPSAFE 210 }; 211 212 #ifdef DEBUG 213 static void printiinfo(struct ccdiinfo *); 214 #endif 215 216 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 217 static kmutex_t ccd_lock; 218 219 #ifdef _MODULE 220 static struct sysctllog *ccd_clog; 221 #endif 222 223 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 224 225 static struct ccd_softc * 226 ccdcreate(int unit) { 227 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 228 229 /* Initialize per-softc structures. */ 230 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 231 sc->sc_unit = unit; 232 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 233 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 234 cv_init(&sc->sc_stop, "ccdstop"); 235 cv_init(&sc->sc_push, "ccdthr"); 236 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 237 return sc; 238 } 239 240 static void 241 ccddestroy(struct ccd_softc *sc) { 242 mutex_obj_free(sc->sc_iolock); 243 mutex_exit(&sc->sc_dvlock); 244 mutex_destroy(&sc->sc_dvlock); 245 cv_destroy(&sc->sc_stop); 246 cv_destroy(&sc->sc_push); 247 disk_destroy(&sc->sc_dkdev); 248 kmem_free(sc, sizeof(*sc)); 249 } 250 251 static struct ccd_softc * 252 ccdget(int unit, int make) { 253 struct ccd_softc *sc; 254 if (unit < 0) { 255 #ifdef DIAGNOSTIC 256 panic("%s: unit %d!", __func__, unit); 257 #endif 258 return NULL; 259 } 260 mutex_enter(&ccd_lock); 261 LIST_FOREACH(sc, &ccds, sc_link) { 262 if (sc->sc_unit == unit) { 263 mutex_exit(&ccd_lock); 264 return sc; 265 } 266 } 267 mutex_exit(&ccd_lock); 268 if (!make) 269 return NULL; 270 if ((sc = ccdcreate(unit)) == NULL) 271 return NULL; 272 mutex_enter(&ccd_lock); 273 LIST_INSERT_HEAD(&ccds, sc, sc_link); 274 mutex_exit(&ccd_lock); 275 return sc; 276 } 277 278 static void 279 ccdput(struct ccd_softc *sc) { 280 mutex_enter(&ccd_lock); 281 LIST_REMOVE(sc, sc_link); 282 mutex_exit(&ccd_lock); 283 ccddestroy(sc); 284 } 285 286 /* 287 * Called by main() during pseudo-device attachment. All we need 288 * to do is allocate enough space for devices to be configured later. 289 */ 290 void 291 ccdattach(int num) 292 { 293 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 294 295 /* Initialize the component buffer pool. */ 296 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 297 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 298 } 299 300 void 301 ccddetach(void) 302 { 303 pool_cache_destroy(ccd_cache); 304 mutex_destroy(&ccd_lock); 305 } 306 307 static int 308 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 309 struct lwp *l) 310 { 311 struct ccdcinfo *ci = NULL; 312 int ix; 313 struct ccdgeom *ccg = &cs->sc_geom; 314 char *tmppath; 315 int error, path_alloced; 316 uint64_t psize, minsize; 317 unsigned secsize, maxsecsize; 318 struct disk_geom *dg; 319 320 #ifdef DEBUG 321 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 322 printf("%s: ccdinit\n", cs->sc_xname); 323 #endif 324 325 /* Allocate space for the component info. */ 326 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 327 KM_SLEEP); 328 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 329 330 cs->sc_size = 0; 331 332 /* 333 * Verify that each component piece exists and record 334 * relevant information about it. 335 */ 336 maxsecsize = 0; 337 minsize = 0; 338 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 339 ci = &cs->sc_cinfo[ix]; 340 ci->ci_vp = vpp[ix]; 341 342 /* 343 * Copy in the pathname of the component. 344 */ 345 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 346 error = copyinstr(cpaths[ix], tmppath, 347 MAXPATHLEN, &ci->ci_pathlen); 348 if (ci->ci_pathlen == 0) 349 error = EINVAL; 350 if (error) { 351 #ifdef DEBUG 352 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 353 printf("%s: can't copy path, error = %d\n", 354 cs->sc_xname, error); 355 #endif 356 goto out; 357 } 358 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 359 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 360 path_alloced++; 361 362 /* 363 * XXX: Cache the component's dev_t. 364 */ 365 ci->ci_dev = vpp[ix]->v_rdev; 366 367 /* 368 * Get partition information for the component. 369 */ 370 error = getdisksize(vpp[ix], &psize, &secsize); 371 if (error) { 372 #ifdef DEBUG 373 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 374 printf("%s: %s: disksize failed, error = %d\n", 375 cs->sc_xname, ci->ci_path, error); 376 #endif 377 goto out; 378 } 379 380 /* 381 * Calculate the size, truncating to an interleave 382 * boundary if necessary. 383 */ 384 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 385 if (cs->sc_ileave > 1) 386 psize -= psize % cs->sc_ileave; 387 388 if (psize == 0) { 389 #ifdef DEBUG 390 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 391 printf("%s: %s: size == 0\n", 392 cs->sc_xname, ci->ci_path); 393 #endif 394 error = ENODEV; 395 goto out; 396 } 397 398 if (minsize == 0 || psize < minsize) 399 minsize = psize; 400 ci->ci_size = psize; 401 cs->sc_size += psize; 402 } 403 404 /* 405 * Don't allow the interleave to be smaller than 406 * the biggest component sector. 407 */ 408 if ((cs->sc_ileave > 0) && 409 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 410 #ifdef DEBUG 411 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 412 printf("%s: interleave must be at least %d\n", 413 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 414 #endif 415 error = EINVAL; 416 goto out; 417 } 418 419 /* 420 * If uniform interleave is desired set all sizes to that of 421 * the smallest component. 422 */ 423 if (cs->sc_flags & CCDF_UNIFORM) { 424 for (ci = cs->sc_cinfo; 425 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 426 ci->ci_size = minsize; 427 428 cs->sc_size = cs->sc_nccdisks * minsize; 429 } 430 431 /* 432 * Construct the interleave table. 433 */ 434 ccdinterleave(cs); 435 436 /* 437 * Create pseudo-geometry based on 1MB cylinders. It's 438 * pretty close. 439 */ 440 ccg->ccg_secsize = DEV_BSIZE; 441 ccg->ccg_ntracks = 1; 442 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 443 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 444 445 dg = &cs->sc_dkdev.dk_geom; 446 memset(dg, 0, sizeof(*dg)); 447 dg->dg_secperunit = cs->sc_size; 448 dg->dg_secsize = ccg->ccg_secsize; 449 dg->dg_nsectors = ccg->ccg_nsectors; 450 dg->dg_ntracks = ccg->ccg_ntracks; 451 dg->dg_ncylinders = ccg->ccg_ncylinders; 452 453 if (cs->sc_ileave > 0) 454 aprint_normal("%s: Interleaving %d component%s " 455 "(%d block interleave)\n", cs->sc_xname, 456 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 457 cs->sc_ileave); 458 else 459 aprint_normal("%s: Concatenating %d component%s\n", 460 cs->sc_xname, 461 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 462 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 463 ci = &cs->sc_cinfo[ix]; 464 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 465 ci->ci_path, (uintmax_t)ci->ci_size); 466 } 467 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 468 469 /* 470 * Create thread to handle deferred I/O. 471 */ 472 cs->sc_zap = false; 473 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 474 cs, &cs->sc_thread, "%s", cs->sc_xname); 475 if (error) { 476 printf("ccdinit: can't create thread: %d\n", error); 477 goto out; 478 } 479 480 /* 481 * Only now that everything is set up can we enable the device. 482 */ 483 mutex_enter(cs->sc_iolock); 484 cs->sc_flags |= CCDF_INITED; 485 mutex_exit(cs->sc_iolock); 486 kmem_free(tmppath, MAXPATHLEN); 487 return (0); 488 489 out: 490 for (ix = 0; ix < path_alloced; ix++) { 491 kmem_free(cs->sc_cinfo[ix].ci_path, 492 cs->sc_cinfo[ix].ci_pathlen); 493 } 494 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 495 kmem_free(tmppath, MAXPATHLEN); 496 return (error); 497 } 498 499 static void 500 ccdinterleave(struct ccd_softc *cs) 501 { 502 struct ccdcinfo *ci, *smallci; 503 struct ccdiinfo *ii; 504 daddr_t bn, lbn; 505 int ix; 506 u_long size; 507 508 #ifdef DEBUG 509 if (ccddebug & CCDB_INIT) 510 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 511 #endif 512 /* 513 * Allocate an interleave table. 514 * Chances are this is too big, but we don't care. 515 */ 516 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 517 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 518 519 /* 520 * Trivial case: no interleave (actually interleave of disk size). 521 * Each table entry represents a single component in its entirety. 522 */ 523 if (cs->sc_ileave == 0) { 524 bn = 0; 525 ii = cs->sc_itable; 526 527 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 528 /* Allocate space for ii_index. */ 529 ii->ii_indexsz = sizeof(int); 530 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 531 ii->ii_ndisk = 1; 532 ii->ii_startblk = bn; 533 ii->ii_startoff = 0; 534 ii->ii_index[0] = ix; 535 bn += cs->sc_cinfo[ix].ci_size; 536 ii++; 537 } 538 ii->ii_ndisk = 0; 539 #ifdef DEBUG 540 if (ccddebug & CCDB_INIT) 541 printiinfo(cs->sc_itable); 542 #endif 543 return; 544 } 545 546 /* 547 * The following isn't fast or pretty; it doesn't have to be. 548 */ 549 size = 0; 550 bn = lbn = 0; 551 for (ii = cs->sc_itable; ; ii++) { 552 /* Allocate space for ii_index. */ 553 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 554 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 555 556 /* 557 * Locate the smallest of the remaining components 558 */ 559 smallci = NULL; 560 for (ci = cs->sc_cinfo; 561 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 562 if (ci->ci_size > size && 563 (smallci == NULL || 564 ci->ci_size < smallci->ci_size)) 565 smallci = ci; 566 567 /* 568 * Nobody left, all done 569 */ 570 if (smallci == NULL) { 571 ii->ii_ndisk = 0; 572 break; 573 } 574 575 /* 576 * Record starting logical block and component offset 577 */ 578 ii->ii_startblk = bn / cs->sc_ileave; 579 ii->ii_startoff = lbn; 580 581 /* 582 * Determine how many disks take part in this interleave 583 * and record their indices. 584 */ 585 ix = 0; 586 for (ci = cs->sc_cinfo; 587 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 588 if (ci->ci_size >= smallci->ci_size) 589 ii->ii_index[ix++] = ci - cs->sc_cinfo; 590 ii->ii_ndisk = ix; 591 bn += ix * (smallci->ci_size - size); 592 lbn = smallci->ci_size / cs->sc_ileave; 593 size = smallci->ci_size; 594 } 595 #ifdef DEBUG 596 if (ccddebug & CCDB_INIT) 597 printiinfo(cs->sc_itable); 598 #endif 599 } 600 601 /* ARGSUSED */ 602 static int 603 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 604 { 605 int unit = ccdunit(dev); 606 struct ccd_softc *cs; 607 struct disklabel *lp; 608 int error = 0, part, pmask; 609 610 #ifdef DEBUG 611 if (ccddebug & CCDB_FOLLOW) 612 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 613 #endif 614 if ((cs = ccdget(unit, 1)) == NULL) 615 return ENXIO; 616 617 mutex_enter(&cs->sc_dvlock); 618 619 lp = cs->sc_dkdev.dk_label; 620 621 part = DISKPART(dev); 622 pmask = (1 << part); 623 624 /* 625 * If we're initialized, check to see if there are any other 626 * open partitions. If not, then it's safe to update 627 * the in-core disklabel. Only read the disklabel if it is 628 * not already valid. 629 */ 630 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 631 cs->sc_dkdev.dk_openmask == 0) 632 ccdgetdisklabel(dev); 633 634 /* Check that the partition exists. */ 635 if (part != RAW_PART) { 636 if (((cs->sc_flags & CCDF_INITED) == 0) || 637 ((part >= lp->d_npartitions) || 638 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 639 error = ENXIO; 640 goto done; 641 } 642 } 643 644 /* Prevent our unit from being unconfigured while open. */ 645 switch (fmt) { 646 case S_IFCHR: 647 cs->sc_dkdev.dk_copenmask |= pmask; 648 break; 649 650 case S_IFBLK: 651 cs->sc_dkdev.dk_bopenmask |= pmask; 652 break; 653 } 654 cs->sc_dkdev.dk_openmask = 655 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 656 657 done: 658 mutex_exit(&cs->sc_dvlock); 659 return (error); 660 } 661 662 /* ARGSUSED */ 663 static int 664 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 665 { 666 int unit = ccdunit(dev); 667 struct ccd_softc *cs; 668 int part; 669 670 #ifdef DEBUG 671 if (ccddebug & CCDB_FOLLOW) 672 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 673 #endif 674 675 if ((cs = ccdget(unit, 0)) == NULL) 676 return ENXIO; 677 678 mutex_enter(&cs->sc_dvlock); 679 680 part = DISKPART(dev); 681 682 /* ...that much closer to allowing unconfiguration... */ 683 switch (fmt) { 684 case S_IFCHR: 685 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 686 break; 687 688 case S_IFBLK: 689 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 690 break; 691 } 692 cs->sc_dkdev.dk_openmask = 693 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 694 695 if (cs->sc_dkdev.dk_openmask == 0) { 696 if ((cs->sc_flags & CCDF_KLABEL) == 0) 697 cs->sc_flags &= ~CCDF_VLABEL; 698 } 699 700 mutex_exit(&cs->sc_dvlock); 701 return (0); 702 } 703 704 static bool 705 ccdbackoff(struct ccd_softc *cs) 706 { 707 708 /* XXX Arbitrary, should be a uvm call. */ 709 return uvmexp.free < (uvmexp.freemin >> 1) && 710 disk_isbusy(&cs->sc_dkdev); 711 } 712 713 static void 714 ccdthread(void *cookie) 715 { 716 struct ccd_softc *cs; 717 718 cs = cookie; 719 720 #ifdef DEBUG 721 if (ccddebug & CCDB_FOLLOW) 722 printf("ccdthread: hello\n"); 723 #endif 724 725 mutex_enter(cs->sc_iolock); 726 while (__predict_true(!cs->sc_zap)) { 727 if (bufq_peek(cs->sc_bufq) == NULL) { 728 /* Nothing to do. */ 729 cv_wait(&cs->sc_push, cs->sc_iolock); 730 continue; 731 } 732 if (ccdbackoff(cs)) { 733 /* Wait for memory to become available. */ 734 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 735 continue; 736 } 737 #ifdef DEBUG 738 if (ccddebug & CCDB_FOLLOW) 739 printf("ccdthread: dispatching I/O\n"); 740 #endif 741 ccdstart(cs); 742 mutex_enter(cs->sc_iolock); 743 } 744 cs->sc_thread = NULL; 745 mutex_exit(cs->sc_iolock); 746 #ifdef DEBUG 747 if (ccddebug & CCDB_FOLLOW) 748 printf("ccdthread: goodbye\n"); 749 #endif 750 kthread_exit(0); 751 } 752 753 static void 754 ccdstrategy(struct buf *bp) 755 { 756 int unit = ccdunit(bp->b_dev); 757 struct ccd_softc *cs; 758 if ((cs = ccdget(unit, 0)) == NULL) 759 return; 760 761 /* Must be open or reading label. */ 762 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 763 (cs->sc_flags & CCDF_RLABEL) != 0); 764 765 mutex_enter(cs->sc_iolock); 766 /* Synchronize with device init/uninit. */ 767 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 768 mutex_exit(cs->sc_iolock); 769 #ifdef DEBUG 770 if (ccddebug & CCDB_FOLLOW) 771 printf("ccdstrategy: unit %d: not inited\n", unit); 772 #endif 773 bp->b_error = ENXIO; 774 bp->b_resid = bp->b_bcount; 775 biodone(bp); 776 return; 777 } 778 779 /* Defer to thread if system is low on memory. */ 780 bufq_put(cs->sc_bufq, bp); 781 if (__predict_false(ccdbackoff(cs))) { 782 mutex_exit(cs->sc_iolock); 783 #ifdef DEBUG 784 if (ccddebug & CCDB_FOLLOW) 785 printf("ccdstrategy: holding off on I/O\n"); 786 #endif 787 return; 788 } 789 ccdstart(cs); 790 } 791 792 static void 793 ccdstart(struct ccd_softc *cs) 794 { 795 daddr_t blkno; 796 int wlabel; 797 struct disklabel *lp; 798 long bcount, rcount; 799 struct ccdbuf *cbp; 800 char *addr; 801 daddr_t bn; 802 vnode_t *vp; 803 buf_t *bp; 804 805 KASSERT(mutex_owned(cs->sc_iolock)); 806 807 bp = bufq_get(cs->sc_bufq); 808 KASSERT(bp != NULL); 809 810 disk_busy(&cs->sc_dkdev); 811 812 #ifdef DEBUG 813 if (ccddebug & CCDB_FOLLOW) 814 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 815 #endif 816 817 /* If it's a nil transfer, wake up the top half now. */ 818 if (bp->b_bcount == 0) 819 goto done; 820 821 lp = cs->sc_dkdev.dk_label; 822 823 /* 824 * Do bounds checking and adjust transfer. If there's an 825 * error, the bounds check will flag that for us. Convert 826 * the partition relative block number to an absolute. 827 */ 828 blkno = bp->b_blkno; 829 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 830 if (DISKPART(bp->b_dev) != RAW_PART) { 831 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 832 goto done; 833 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 834 } 835 mutex_exit(cs->sc_iolock); 836 bp->b_rawblkno = blkno; 837 838 /* Allocate the component buffers and start I/O! */ 839 bp->b_resid = bp->b_bcount; 840 bn = bp->b_rawblkno; 841 addr = bp->b_data; 842 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 843 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 844 rcount = cbp->cb_buf.b_bcount; 845 bn += btodb(rcount); 846 addr += rcount; 847 vp = cbp->cb_buf.b_vp; 848 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 849 mutex_enter(vp->v_interlock); 850 vp->v_numoutput++; 851 mutex_exit(vp->v_interlock); 852 } 853 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 854 } 855 return; 856 857 done: 858 disk_unbusy(&cs->sc_dkdev, 0, 0); 859 cv_broadcast(&cs->sc_stop); 860 cv_broadcast(&cs->sc_push); 861 mutex_exit(cs->sc_iolock); 862 bp->b_resid = bp->b_bcount; 863 biodone(bp); 864 } 865 866 /* 867 * Build a component buffer header. 868 */ 869 static struct ccdbuf * 870 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 871 long bcount) 872 { 873 struct ccdcinfo *ci; 874 struct ccdbuf *cbp; 875 daddr_t cbn, cboff; 876 u_int64_t cbc; 877 int ccdisk; 878 879 #ifdef DEBUG 880 if (ccddebug & CCDB_IO) 881 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 882 cs, bp, bn, addr, bcount); 883 #endif 884 /* 885 * Determine which component bn falls in. 886 */ 887 cbn = bn; 888 cboff = 0; 889 890 /* 891 * Serially concatenated 892 */ 893 if (cs->sc_ileave == 0) { 894 daddr_t sblk; 895 896 sblk = 0; 897 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 898 cbn >= sblk + ci->ci_size; 899 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 900 sblk += ci->ci_size; 901 cbn -= sblk; 902 } 903 /* 904 * Interleaved 905 */ 906 else { 907 struct ccdiinfo *ii; 908 int off; 909 910 cboff = cbn % cs->sc_ileave; 911 cbn /= cs->sc_ileave; 912 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 913 if (ii->ii_startblk > cbn) 914 break; 915 ii--; 916 off = cbn - ii->ii_startblk; 917 if (ii->ii_ndisk == 1) { 918 ccdisk = ii->ii_index[0]; 919 cbn = ii->ii_startoff + off; 920 } else { 921 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 922 cbn = ii->ii_startoff + off / ii->ii_ndisk; 923 } 924 cbn *= cs->sc_ileave; 925 ci = &cs->sc_cinfo[ccdisk]; 926 } 927 928 /* 929 * Fill in the component buf structure. 930 */ 931 cbp = CCD_GETBUF(); 932 KASSERT(cbp != NULL); 933 buf_init(&cbp->cb_buf); 934 cbp->cb_buf.b_flags = bp->b_flags; 935 cbp->cb_buf.b_oflags = bp->b_oflags; 936 cbp->cb_buf.b_cflags = bp->b_cflags; 937 cbp->cb_buf.b_iodone = ccdiodone; 938 cbp->cb_buf.b_proc = bp->b_proc; 939 cbp->cb_buf.b_dev = ci->ci_dev; 940 cbp->cb_buf.b_blkno = cbn + cboff; 941 cbp->cb_buf.b_data = addr; 942 cbp->cb_buf.b_vp = ci->ci_vp; 943 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 944 if (cs->sc_ileave == 0) 945 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 946 else 947 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 948 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 949 950 /* 951 * context for ccdiodone 952 */ 953 cbp->cb_obp = bp; 954 cbp->cb_sc = cs; 955 cbp->cb_comp = ccdisk; 956 957 BIO_COPYPRIO(&cbp->cb_buf, bp); 958 959 #ifdef DEBUG 960 if (ccddebug & CCDB_IO) 961 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 962 " bcnt %d\n", 963 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 964 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 965 cbp->cb_buf.b_bcount); 966 #endif 967 968 return (cbp); 969 } 970 971 /* 972 * Called at interrupt time. 973 * Mark the component as done and if all components are done, 974 * take a ccd interrupt. 975 */ 976 static void 977 ccdiodone(struct buf *vbp) 978 { 979 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 980 struct buf *bp = cbp->cb_obp; 981 struct ccd_softc *cs = cbp->cb_sc; 982 int count; 983 984 #ifdef DEBUG 985 if (ccddebug & CCDB_FOLLOW) 986 printf("ccdiodone(%p)\n", cbp); 987 if (ccddebug & CCDB_IO) { 988 printf("ccdiodone: bp %p bcount %d resid %d\n", 989 bp, bp->b_bcount, bp->b_resid); 990 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 991 " bcnt %d\n", 992 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 993 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 994 cbp->cb_buf.b_bcount); 995 } 996 #endif 997 998 if (cbp->cb_buf.b_error != 0) { 999 bp->b_error = cbp->cb_buf.b_error; 1000 printf("%s: error %d on component %d\n", 1001 cs->sc_xname, bp->b_error, cbp->cb_comp); 1002 } 1003 count = cbp->cb_buf.b_bcount; 1004 buf_destroy(&cbp->cb_buf); 1005 CCD_PUTBUF(cbp); 1006 1007 /* 1008 * If all done, "interrupt". 1009 */ 1010 mutex_enter(cs->sc_iolock); 1011 bp->b_resid -= count; 1012 if (bp->b_resid < 0) 1013 panic("ccdiodone: count"); 1014 if (bp->b_resid == 0) { 1015 /* 1016 * Request is done for better or worse, wakeup the top half. 1017 */ 1018 if (bp->b_error != 0) 1019 bp->b_resid = bp->b_bcount; 1020 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1021 (bp->b_flags & B_READ)); 1022 if (!disk_isbusy(&cs->sc_dkdev)) { 1023 if (bufq_peek(cs->sc_bufq) != NULL) { 1024 cv_broadcast(&cs->sc_push); 1025 } 1026 cv_broadcast(&cs->sc_stop); 1027 } 1028 mutex_exit(cs->sc_iolock); 1029 biodone(bp); 1030 } else 1031 mutex_exit(cs->sc_iolock); 1032 } 1033 1034 /* ARGSUSED */ 1035 static int 1036 ccdread(dev_t dev, struct uio *uio, int flags) 1037 { 1038 int unit = ccdunit(dev); 1039 struct ccd_softc *cs; 1040 1041 #ifdef DEBUG 1042 if (ccddebug & CCDB_FOLLOW) 1043 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1044 #endif 1045 if ((cs = ccdget(unit, 0)) == NULL) 1046 return 0; 1047 1048 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1049 if ((cs->sc_flags & CCDF_INITED) == 0) 1050 return (ENXIO); 1051 1052 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1053 } 1054 1055 /* ARGSUSED */ 1056 static int 1057 ccdwrite(dev_t dev, struct uio *uio, int flags) 1058 { 1059 int unit = ccdunit(dev); 1060 struct ccd_softc *cs; 1061 1062 #ifdef DEBUG 1063 if (ccddebug & CCDB_FOLLOW) 1064 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1065 #endif 1066 if ((cs = ccdget(unit, 0)) == NULL) 1067 return ENOENT; 1068 1069 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1070 if ((cs->sc_flags & CCDF_INITED) == 0) 1071 return (ENXIO); 1072 1073 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1074 } 1075 1076 int (*compat_ccd_ioctl_60)(dev_t, u_long, void *, int, struct lwp *, 1077 int (*)(dev_t, u_long, void *, int, struct lwp *)) = (void *)enosys; 1078 1079 static int 1080 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1081 { 1082 int unit = ccdunit(dev); 1083 int i, j, lookedup = 0, error = 0; 1084 int part, pmask, make, hook; 1085 struct ccd_softc *cs; 1086 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1087 kauth_cred_t uc; 1088 char **cpp; 1089 struct pathbuf *pb; 1090 struct vnode **vpp; 1091 #ifdef __HAVE_OLD_DISKLABEL 1092 struct disklabel newlabel; 1093 #endif 1094 1095 switch (cmd) { 1096 case CCDIOCSET: 1097 make = 1; 1098 break; 1099 default: 1100 MODULE_HOOK_CALL(ccd_ioctl_60_hook, 1101 (0, cmd, NULL, 0, NULL, NULL), 1102 enosys(), hook); 1103 if (hook == 0) 1104 make = 1; 1105 else 1106 make = 0; 1107 break; 1108 } 1109 1110 if ((cs = ccdget(unit, make)) == NULL) 1111 return ENOENT; 1112 uc = kauth_cred_get(); 1113 1114 MODULE_HOOK_CALL(ccd_ioctl_60_hook, 1115 (dev, cmd, data, flag, l, ccdioctl), 1116 enosys(), error); 1117 if (error != ENOSYS) 1118 return error; 1119 1120 /* Must be open for writes for these commands... */ 1121 switch (cmd) { 1122 case CCDIOCSET: 1123 case CCDIOCCLR: 1124 case DIOCSDINFO: 1125 case DIOCWDINFO: 1126 case DIOCCACHESYNC: 1127 case DIOCAWEDGE: 1128 case DIOCDWEDGE: 1129 case DIOCRMWEDGES: 1130 case DIOCMWEDGES: 1131 #ifdef __HAVE_OLD_DISKLABEL 1132 case ODIOCSDINFO: 1133 case ODIOCWDINFO: 1134 #endif 1135 case DIOCKLABEL: 1136 case DIOCWLABEL: 1137 if ((flag & FWRITE) == 0) 1138 return (EBADF); 1139 } 1140 1141 mutex_enter(&cs->sc_dvlock); 1142 1143 /* Must be initialized for these... */ 1144 switch (cmd) { 1145 case CCDIOCCLR: 1146 case DIOCGDINFO: 1147 case DIOCGSTRATEGY: 1148 case DIOCGCACHE: 1149 case DIOCCACHESYNC: 1150 case DIOCAWEDGE: 1151 case DIOCDWEDGE: 1152 case DIOCLWEDGES: 1153 case DIOCMWEDGES: 1154 case DIOCSDINFO: 1155 case DIOCWDINFO: 1156 case DIOCGPARTINFO: 1157 case DIOCWLABEL: 1158 case DIOCKLABEL: 1159 case DIOCGDEFLABEL: 1160 #ifdef __HAVE_OLD_DISKLABEL 1161 case ODIOCGDINFO: 1162 case ODIOCSDINFO: 1163 case ODIOCWDINFO: 1164 case ODIOCGDEFLABEL: 1165 #endif 1166 if ((cs->sc_flags & CCDF_INITED) == 0) { 1167 error = ENXIO; 1168 goto out; 1169 } 1170 } 1171 1172 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1173 if (error != EPASSTHROUGH) 1174 goto out; 1175 1176 error = 0; 1177 switch (cmd) { 1178 case CCDIOCSET: 1179 if (cs->sc_flags & CCDF_INITED) { 1180 error = EBUSY; 1181 goto out; 1182 } 1183 1184 /* Validate the flags. */ 1185 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1186 error = EINVAL; 1187 goto out; 1188 } 1189 1190 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1191 ccio->ccio_ndisks == 0) { 1192 error = EINVAL; 1193 goto out; 1194 } 1195 1196 /* Fill in some important bits. */ 1197 cs->sc_ileave = ccio->ccio_ileave; 1198 cs->sc_nccdisks = ccio->ccio_ndisks; 1199 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1200 1201 /* 1202 * Allocate space for and copy in the array of 1203 * component pathnames and device numbers. 1204 */ 1205 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1206 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1207 error = copyin(ccio->ccio_disks, cpp, 1208 ccio->ccio_ndisks * sizeof(*cpp)); 1209 if (error) { 1210 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1211 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1212 goto out; 1213 } 1214 1215 #ifdef DEBUG 1216 if (ccddebug & CCDB_INIT) 1217 for (i = 0; i < ccio->ccio_ndisks; ++i) 1218 printf("ccdioctl: component %d: %p\n", 1219 i, cpp[i]); 1220 #endif 1221 1222 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1223 #ifdef DEBUG 1224 if (ccddebug & CCDB_INIT) 1225 printf("ccdioctl: lookedup = %d\n", lookedup); 1226 #endif 1227 error = pathbuf_copyin(cpp[i], &pb); 1228 if (error == 0) { 1229 error = dk_lookup(pb, l, &vpp[i]); 1230 } 1231 pathbuf_destroy(pb); 1232 if (error != 0) { 1233 for (j = 0; j < lookedup; ++j) 1234 (void)vn_close(vpp[j], FREAD|FWRITE, 1235 uc); 1236 kmem_free(vpp, ccio->ccio_ndisks * 1237 sizeof(*vpp)); 1238 kmem_free(cpp, ccio->ccio_ndisks * 1239 sizeof(*cpp)); 1240 goto out; 1241 } 1242 ++lookedup; 1243 } 1244 1245 /* Attach the disk. */ 1246 disk_attach(&cs->sc_dkdev); 1247 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1248 1249 /* 1250 * Initialize the ccd. Fills in the softc for us. 1251 */ 1252 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1253 for (j = 0; j < lookedup; ++j) 1254 (void)vn_close(vpp[j], FREAD|FWRITE, 1255 uc); 1256 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1257 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1258 disk_detach(&cs->sc_dkdev); 1259 mutex_exit(&cs->sc_dvlock); 1260 bufq_free(cs->sc_bufq); 1261 return error; 1262 } 1263 1264 /* We can free the temporary variables now. */ 1265 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1266 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1267 1268 /* 1269 * The ccd has been successfully initialized, so 1270 * we can place it into the array. Don't try to 1271 * read the disklabel until the disk has been attached, 1272 * because space for the disklabel is allocated 1273 * in disk_attach(); 1274 */ 1275 ccio->ccio_unit = unit; 1276 ccio->ccio_size = cs->sc_size; 1277 1278 /* Try and read the disklabel. */ 1279 ccdgetdisklabel(dev); 1280 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1281 1282 /* discover wedges */ 1283 mutex_exit(&cs->sc_dvlock); 1284 dkwedge_discover(&cs->sc_dkdev); 1285 return 0; 1286 1287 case CCDIOCCLR: 1288 /* 1289 * Don't unconfigure if any other partitions are open 1290 * or if both the character and block flavors of this 1291 * partition are open. 1292 */ 1293 part = DISKPART(dev); 1294 pmask = (1 << part); 1295 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1296 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1297 (cs->sc_dkdev.dk_copenmask & pmask))) { 1298 error = EBUSY; 1299 goto out; 1300 } 1301 1302 /* Delete all of our wedges. */ 1303 dkwedge_delall(&cs->sc_dkdev); 1304 1305 /* Stop new I/O, wait for in-flight I/O to complete. */ 1306 mutex_enter(cs->sc_iolock); 1307 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1308 cs->sc_zap = true; 1309 while (disk_isbusy(&cs->sc_dkdev) || 1310 bufq_peek(cs->sc_bufq) != NULL || 1311 cs->sc_thread != NULL) { 1312 cv_broadcast(&cs->sc_push); 1313 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1314 } 1315 mutex_exit(cs->sc_iolock); 1316 1317 /* 1318 * Free ccd_softc information and clear entry. 1319 */ 1320 1321 /* Close the components and free their pathnames. */ 1322 for (i = 0; i < cs->sc_nccdisks; ++i) { 1323 /* 1324 * XXX: this close could potentially fail and 1325 * cause Bad Things. Maybe we need to force 1326 * the close to happen? 1327 */ 1328 #ifdef DEBUG 1329 if (ccddebug & CCDB_VNODE) 1330 vprint("CCDIOCCLR: vnode info", 1331 cs->sc_cinfo[i].ci_vp); 1332 #endif 1333 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1334 uc); 1335 kmem_free(cs->sc_cinfo[i].ci_path, 1336 cs->sc_cinfo[i].ci_pathlen); 1337 } 1338 1339 /* Free interleave index. */ 1340 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1341 kmem_free(cs->sc_itable[i].ii_index, 1342 cs->sc_itable[i].ii_indexsz); 1343 } 1344 1345 /* Free component info and interleave table. */ 1346 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1347 sizeof(struct ccdcinfo)); 1348 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1349 sizeof(struct ccdiinfo)); 1350 1351 aprint_normal("%s: detached\n", cs->sc_xname); 1352 1353 /* Detach the disk. */ 1354 disk_detach(&cs->sc_dkdev); 1355 bufq_free(cs->sc_bufq); 1356 ccdput(cs); 1357 /* Don't break, otherwise cs is read again. */ 1358 return 0; 1359 1360 case DIOCGSTRATEGY: 1361 { 1362 struct disk_strategy *dks = (void *)data; 1363 1364 mutex_enter(cs->sc_iolock); 1365 if (cs->sc_bufq != NULL) 1366 strlcpy(dks->dks_name, 1367 bufq_getstrategyname(cs->sc_bufq), 1368 sizeof(dks->dks_name)); 1369 else 1370 error = EINVAL; 1371 mutex_exit(cs->sc_iolock); 1372 dks->dks_paramlen = 0; 1373 break; 1374 } 1375 1376 case DIOCGCACHE: 1377 { 1378 int dkcache = 0; 1379 1380 /* 1381 * We pass this call down to all components and report 1382 * intersection of the flags returned by the components. 1383 * If any errors out, we return error. CCD components 1384 * can not change unless the device is unconfigured, so 1385 * device feature flags will remain static. RCE/WCE can change 1386 * of course, if set directly on underlying device. 1387 */ 1388 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1389 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1390 flag, uc); 1391 if (error) 1392 break; 1393 1394 if (i == 0) 1395 dkcache = j; 1396 else 1397 dkcache = DKCACHE_COMBINE(dkcache, j); 1398 } 1399 1400 *((int *)data) = dkcache; 1401 break; 1402 } 1403 1404 case DIOCCACHESYNC: 1405 /* 1406 * We pass this call down to all components and report 1407 * the first error we encounter. 1408 */ 1409 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1410 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1411 flag, uc); 1412 if (j != 0 && error == 0) 1413 error = j; 1414 } 1415 break; 1416 1417 case DIOCWDINFO: 1418 case DIOCSDINFO: 1419 #ifdef __HAVE_OLD_DISKLABEL 1420 case ODIOCWDINFO: 1421 case ODIOCSDINFO: 1422 #endif 1423 { 1424 struct disklabel *lp; 1425 #ifdef __HAVE_OLD_DISKLABEL 1426 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1427 memset(&newlabel, 0, sizeof newlabel); 1428 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1429 lp = &newlabel; 1430 } else 1431 #endif 1432 lp = (struct disklabel *)data; 1433 1434 cs->sc_flags |= CCDF_LABELLING; 1435 1436 error = setdisklabel(cs->sc_dkdev.dk_label, 1437 lp, 0, cs->sc_dkdev.dk_cpulabel); 1438 if (error == 0) { 1439 if (cmd == DIOCWDINFO 1440 #ifdef __HAVE_OLD_DISKLABEL 1441 || cmd == ODIOCWDINFO 1442 #endif 1443 ) 1444 error = writedisklabel(CCDLABELDEV(dev), 1445 ccdstrategy, cs->sc_dkdev.dk_label, 1446 cs->sc_dkdev.dk_cpulabel); 1447 } 1448 1449 cs->sc_flags &= ~CCDF_LABELLING; 1450 break; 1451 } 1452 1453 case DIOCKLABEL: 1454 if (*(int *)data != 0) 1455 cs->sc_flags |= CCDF_KLABEL; 1456 else 1457 cs->sc_flags &= ~CCDF_KLABEL; 1458 break; 1459 1460 case DIOCWLABEL: 1461 if (*(int *)data != 0) 1462 cs->sc_flags |= CCDF_WLABEL; 1463 else 1464 cs->sc_flags &= ~CCDF_WLABEL; 1465 break; 1466 1467 case DIOCGDEFLABEL: 1468 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1469 break; 1470 1471 #ifdef __HAVE_OLD_DISKLABEL 1472 case ODIOCGDEFLABEL: 1473 ccdgetdefaultlabel(cs, &newlabel); 1474 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1475 return ENOTTY; 1476 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1477 break; 1478 #endif 1479 1480 default: 1481 error = ENOTTY; 1482 } 1483 1484 out: 1485 mutex_exit(&cs->sc_dvlock); 1486 return (error); 1487 } 1488 1489 static int 1490 ccdsize(dev_t dev) 1491 { 1492 struct ccd_softc *cs; 1493 struct disklabel *lp; 1494 int part, unit, omask, size; 1495 1496 unit = ccdunit(dev); 1497 if ((cs = ccdget(unit, 0)) == NULL) 1498 return -1; 1499 1500 if ((cs->sc_flags & CCDF_INITED) == 0) 1501 return (-1); 1502 1503 part = DISKPART(dev); 1504 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1505 lp = cs->sc_dkdev.dk_label; 1506 1507 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1508 return (-1); 1509 1510 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1511 size = -1; 1512 else 1513 size = lp->d_partitions[part].p_size * 1514 (lp->d_secsize / DEV_BSIZE); 1515 1516 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1517 return (-1); 1518 1519 return (size); 1520 } 1521 1522 static void 1523 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1524 { 1525 struct ccdgeom *ccg = &cs->sc_geom; 1526 1527 memset(lp, 0, sizeof(*lp)); 1528 1529 if (cs->sc_size > UINT32_MAX) 1530 lp->d_secperunit = UINT32_MAX; 1531 else 1532 lp->d_secperunit = cs->sc_size; 1533 lp->d_secsize = ccg->ccg_secsize; 1534 lp->d_nsectors = ccg->ccg_nsectors; 1535 lp->d_ntracks = ccg->ccg_ntracks; 1536 lp->d_ncylinders = ccg->ccg_ncylinders; 1537 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1538 1539 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1540 lp->d_type = DKTYPE_CCD; 1541 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1542 lp->d_rpm = 3600; 1543 lp->d_interleave = 1; 1544 lp->d_flags = 0; 1545 1546 lp->d_partitions[RAW_PART].p_offset = 0; 1547 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1548 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1549 lp->d_npartitions = RAW_PART + 1; 1550 1551 lp->d_magic = DISKMAGIC; 1552 lp->d_magic2 = DISKMAGIC; 1553 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1554 } 1555 1556 /* 1557 * Read the disklabel from the ccd. If one is not present, fake one 1558 * up. 1559 */ 1560 static void 1561 ccdgetdisklabel(dev_t dev) 1562 { 1563 int unit = ccdunit(dev); 1564 struct ccd_softc *cs; 1565 const char *errstring; 1566 struct disklabel *lp; 1567 struct cpu_disklabel *clp; 1568 1569 if ((cs = ccdget(unit, 0)) == NULL) 1570 return; 1571 lp = cs->sc_dkdev.dk_label; 1572 clp = cs->sc_dkdev.dk_cpulabel; 1573 KASSERT(mutex_owned(&cs->sc_dvlock)); 1574 1575 memset(clp, 0, sizeof(*clp)); 1576 1577 ccdgetdefaultlabel(cs, lp); 1578 1579 /* 1580 * Call the generic disklabel extraction routine. 1581 */ 1582 cs->sc_flags |= CCDF_RLABEL; 1583 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1584 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1585 else 1586 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1587 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1588 if (errstring) 1589 ccdmakedisklabel(cs); 1590 else { 1591 int i; 1592 struct partition *pp; 1593 1594 /* 1595 * Sanity check whether the found disklabel is valid. 1596 * 1597 * This is necessary since total size of ccd may vary 1598 * when an interleave is changed even though exactly 1599 * same componets are used, and old disklabel may used 1600 * if that is found. 1601 */ 1602 if (lp->d_secperunit < UINT32_MAX ? 1603 lp->d_secperunit != cs->sc_size : 1604 lp->d_secperunit > cs->sc_size) 1605 printf("WARNING: %s: " 1606 "total sector size in disklabel (%ju) != " 1607 "the size of ccd (%ju)\n", cs->sc_xname, 1608 (uintmax_t)lp->d_secperunit, 1609 (uintmax_t)cs->sc_size); 1610 for (i = 0; i < lp->d_npartitions; i++) { 1611 pp = &lp->d_partitions[i]; 1612 if (pp->p_offset + pp->p_size > cs->sc_size) 1613 printf("WARNING: %s: end of partition `%c' " 1614 "exceeds the size of ccd (%ju)\n", 1615 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1616 } 1617 } 1618 1619 #ifdef DEBUG 1620 /* It's actually extremely common to have unlabeled ccds. */ 1621 if (ccddebug & CCDB_LABEL) 1622 if (errstring != NULL) 1623 printf("%s: %s\n", cs->sc_xname, errstring); 1624 #endif 1625 1626 /* In-core label now valid. */ 1627 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1628 } 1629 1630 /* 1631 * Take care of things one might want to take care of in the event 1632 * that a disklabel isn't present. 1633 */ 1634 static void 1635 ccdmakedisklabel(struct ccd_softc *cs) 1636 { 1637 struct disklabel *lp = cs->sc_dkdev.dk_label; 1638 1639 /* 1640 * For historical reasons, if there's no disklabel present 1641 * the raw partition must be marked FS_BSDFFS. 1642 */ 1643 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1644 1645 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1646 1647 lp->d_checksum = dkcksum(lp); 1648 } 1649 1650 #ifdef DEBUG 1651 static void 1652 printiinfo(struct ccdiinfo *ii) 1653 { 1654 int ix, i; 1655 1656 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1657 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1658 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1659 for (i = 0; i < ii->ii_ndisk; i++) 1660 printf(" %d", ii->ii_index[i]); 1661 printf("\n"); 1662 } 1663 } 1664 #endif 1665 1666 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr,bufq_fcfs"); 1667 1668 static int 1669 ccd_modcmd(modcmd_t cmd, void *arg) 1670 { 1671 int error = 0; 1672 #ifdef _MODULE 1673 int bmajor = -1, cmajor = -1; 1674 #endif 1675 1676 1677 switch (cmd) { 1678 case MODULE_CMD_INIT: 1679 #ifdef _MODULE 1680 ccdattach(0); 1681 1682 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1683 &ccd_cdevsw, &cmajor); 1684 sysctl_kern_ccd_setup(&ccd_clog); 1685 #endif 1686 break; 1687 1688 case MODULE_CMD_FINI: 1689 #ifdef _MODULE 1690 mutex_enter(&ccd_lock); 1691 if (!LIST_EMPTY(&ccds)) { 1692 mutex_exit(&ccd_lock); 1693 error = EBUSY; 1694 } else { 1695 mutex_exit(&ccd_lock); 1696 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1697 ccddetach(); 1698 } 1699 sysctl_teardown(&ccd_clog); 1700 #endif 1701 break; 1702 1703 case MODULE_CMD_STAT: 1704 return ENOTTY; 1705 1706 default: 1707 return ENOTTY; 1708 } 1709 1710 return error; 1711 } 1712 1713 static int 1714 ccd_units_sysctl(SYSCTLFN_ARGS) 1715 { 1716 struct sysctlnode node; 1717 struct ccd_softc *sc; 1718 int error, i, nccd, *units; 1719 size_t size; 1720 1721 nccd = 0; 1722 mutex_enter(&ccd_lock); 1723 LIST_FOREACH(sc, &ccds, sc_link) 1724 nccd++; 1725 mutex_exit(&ccd_lock); 1726 1727 if (nccd != 0) { 1728 size = nccd * sizeof(*units); 1729 units = kmem_zalloc(size, KM_SLEEP); 1730 i = 0; 1731 mutex_enter(&ccd_lock); 1732 LIST_FOREACH(sc, &ccds, sc_link) { 1733 if (i >= nccd) 1734 break; 1735 units[i] = sc->sc_unit; 1736 } 1737 mutex_exit(&ccd_lock); 1738 } else { 1739 units = NULL; 1740 size = 0; 1741 } 1742 1743 node = *rnode; 1744 node.sysctl_data = units; 1745 node.sysctl_size = size; 1746 1747 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1748 if (units) 1749 kmem_free(units, size); 1750 return error; 1751 } 1752 1753 static int 1754 ccd_info_sysctl(SYSCTLFN_ARGS) 1755 { 1756 struct sysctlnode node; 1757 struct ccddiskinfo ccd; 1758 struct ccd_softc *sc; 1759 int unit; 1760 1761 if (newp == NULL || newlen != sizeof(int)) 1762 return EINVAL; 1763 1764 unit = *(const int *)newp; 1765 newp = NULL; 1766 newlen = 0; 1767 ccd.ccd_ndisks = ~0; 1768 mutex_enter(&ccd_lock); 1769 LIST_FOREACH(sc, &ccds, sc_link) { 1770 if (sc->sc_unit == unit) { 1771 ccd.ccd_ileave = sc->sc_ileave; 1772 ccd.ccd_size = sc->sc_size; 1773 ccd.ccd_ndisks = sc->sc_nccdisks; 1774 ccd.ccd_flags = sc->sc_flags; 1775 break; 1776 } 1777 } 1778 mutex_exit(&ccd_lock); 1779 1780 if (ccd.ccd_ndisks == ~0) 1781 return ENOENT; 1782 1783 node = *rnode; 1784 node.sysctl_data = &ccd; 1785 node.sysctl_size = sizeof(ccd); 1786 1787 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1788 } 1789 1790 static int 1791 ccd_components_sysctl(SYSCTLFN_ARGS) 1792 { 1793 struct sysctlnode node; 1794 int error, unit; 1795 size_t size; 1796 char *names, *p, *ep; 1797 struct ccd_softc *sc; 1798 1799 if (newp == NULL || newlen != sizeof(int)) 1800 return EINVAL; 1801 1802 size = 0; 1803 unit = *(const int *)newp; 1804 newp = NULL; 1805 newlen = 0; 1806 mutex_enter(&ccd_lock); 1807 LIST_FOREACH(sc, &ccds, sc_link) 1808 if (sc->sc_unit == unit) { 1809 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1810 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1811 break; 1812 } 1813 mutex_exit(&ccd_lock); 1814 1815 if (size == 0) 1816 return ENOENT; 1817 names = kmem_zalloc(size, KM_SLEEP); 1818 p = names; 1819 ep = names + size; 1820 mutex_enter(&ccd_lock); 1821 LIST_FOREACH(sc, &ccds, sc_link) 1822 if (sc->sc_unit == unit) { 1823 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1824 char *d = sc->sc_cinfo[i].ci_path; 1825 while (p < ep && (*p++ = *d++) != '\0') 1826 continue; 1827 } 1828 break; 1829 } 1830 mutex_exit(&ccd_lock); 1831 1832 node = *rnode; 1833 node.sysctl_data = names; 1834 node.sysctl_size = ep - names; 1835 1836 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1837 kmem_free(names, size); 1838 return error; 1839 } 1840 1841 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1842 { 1843 const struct sysctlnode *node = NULL; 1844 1845 sysctl_createv(clog, 0, NULL, &node, 1846 CTLFLAG_PERMANENT, 1847 CTLTYPE_NODE, "ccd", 1848 SYSCTL_DESCR("ConCatenated Disk state"), 1849 NULL, 0, NULL, 0, 1850 CTL_KERN, CTL_CREATE, CTL_EOL); 1851 1852 if (node == NULL) 1853 return; 1854 1855 sysctl_createv(clog, 0, &node, NULL, 1856 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1857 CTLTYPE_STRUCT, "units", 1858 SYSCTL_DESCR("List of ccd unit numbers"), 1859 ccd_units_sysctl, 0, NULL, 0, 1860 CTL_CREATE, CTL_EOL); 1861 sysctl_createv(clog, 0, &node, NULL, 1862 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1863 CTLTYPE_STRUCT, "info", 1864 SYSCTL_DESCR("Information about a CCD unit"), 1865 ccd_info_sysctl, 0, NULL, 0, 1866 CTL_CREATE, CTL_EOL); 1867 sysctl_createv(clog, 0, &node, NULL, 1868 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1869 CTLTYPE_STRUCT, "components", 1870 SYSCTL_DESCR("Information about CCD components"), 1871 ccd_components_sysctl, 0, NULL, 0, 1872 CTL_CREATE, CTL_EOL); 1873 } 1874