1 /* $NetBSD: ccd.c,v 1.166 2015/12/08 20:36:14 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.166 2015/12/08 20:36:14 christos Exp $"); 92 93 #if defined(_KERNEL_OPT) 94 #include "opt_compat_netbsd.h" 95 #endif 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/kernel.h> 100 #include <sys/proc.h> 101 #include <sys/errno.h> 102 #include <sys/buf.h> 103 #include <sys/kmem.h> 104 #include <sys/pool.h> 105 #include <sys/module.h> 106 #include <sys/namei.h> 107 #include <sys/stat.h> 108 #include <sys/ioctl.h> 109 #include <sys/disklabel.h> 110 #include <sys/device.h> 111 #include <sys/disk.h> 112 #include <sys/syslog.h> 113 #include <sys/fcntl.h> 114 #include <sys/vnode.h> 115 #include <sys/conf.h> 116 #include <sys/mutex.h> 117 #include <sys/queue.h> 118 #include <sys/kauth.h> 119 #include <sys/kthread.h> 120 #include <sys/bufq.h> 121 #include <sys/sysctl.h> 122 123 #include <uvm/uvm_extern.h> 124 125 #include <dev/ccdvar.h> 126 #include <dev/dkvar.h> 127 128 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 129 130 #include "ioconf.h" 131 132 #if defined(CCDDEBUG) && !defined(DEBUG) 133 #define DEBUG 134 #endif 135 136 #ifdef DEBUG 137 #define CCDB_FOLLOW 0x01 138 #define CCDB_INIT 0x02 139 #define CCDB_IO 0x04 140 #define CCDB_LABEL 0x08 141 #define CCDB_VNODE 0x10 142 int ccddebug = 0x00; 143 #endif 144 145 #define ccdunit(x) DISKUNIT(x) 146 147 struct ccdbuf { 148 struct buf cb_buf; /* new I/O buf */ 149 struct buf *cb_obp; /* ptr. to original I/O buf */ 150 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 151 int cb_comp; /* target component */ 152 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 153 }; 154 155 /* component buffer pool */ 156 static pool_cache_t ccd_cache; 157 158 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 159 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 160 161 #define CCDLABELDEV(dev) \ 162 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 163 164 /* called by main() at boot time */ 165 void ccddetach(void); 166 167 /* called by biodone() at interrupt time */ 168 static void ccdiodone(struct buf *); 169 170 static void ccdinterleave(struct ccd_softc *); 171 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 172 struct lwp *); 173 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 174 daddr_t, void *, long); 175 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 176 static void ccdgetdisklabel(dev_t); 177 static void ccdmakedisklabel(struct ccd_softc *); 178 static void ccdstart(struct ccd_softc *); 179 static void ccdthread(void *); 180 181 static dev_type_open(ccdopen); 182 static dev_type_close(ccdclose); 183 static dev_type_read(ccdread); 184 static dev_type_write(ccdwrite); 185 static dev_type_ioctl(ccdioctl); 186 static dev_type_strategy(ccdstrategy); 187 static dev_type_size(ccdsize); 188 189 const struct bdevsw ccd_bdevsw = { 190 .d_open = ccdopen, 191 .d_close = ccdclose, 192 .d_strategy = ccdstrategy, 193 .d_ioctl = ccdioctl, 194 .d_dump = nodump, 195 .d_psize = ccdsize, 196 .d_discard = nodiscard, 197 .d_flag = D_DISK | D_MPSAFE 198 }; 199 200 const struct cdevsw ccd_cdevsw = { 201 .d_open = ccdopen, 202 .d_close = ccdclose, 203 .d_read = ccdread, 204 .d_write = ccdwrite, 205 .d_ioctl = ccdioctl, 206 .d_stop = nostop, 207 .d_tty = notty, 208 .d_poll = nopoll, 209 .d_mmap = nommap, 210 .d_kqfilter = nokqfilter, 211 .d_discard = nodiscard, 212 .d_flag = D_DISK | D_MPSAFE 213 }; 214 215 #ifdef DEBUG 216 static void printiinfo(struct ccdiinfo *); 217 #endif 218 219 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 220 static kmutex_t ccd_lock; 221 static size_t ccd_nactive = 0; 222 223 static struct ccd_softc * 224 ccdcreate(int unit) { 225 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 226 if (sc == NULL) { 227 #ifdef DIAGNOSTIC 228 printf("%s: out of memory\n", __func__); 229 #endif 230 return NULL; 231 } 232 /* Initialize per-softc structures. */ 233 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 234 sc->sc_unit = unit; 235 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 236 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 237 cv_init(&sc->sc_stop, "ccdstop"); 238 cv_init(&sc->sc_push, "ccdthr"); 239 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 240 return sc; 241 } 242 243 static void 244 ccddestroy(struct ccd_softc *sc) { 245 mutex_obj_free(sc->sc_iolock); 246 mutex_exit(&sc->sc_dvlock); 247 mutex_destroy(&sc->sc_dvlock); 248 cv_destroy(&sc->sc_stop); 249 cv_destroy(&sc->sc_push); 250 disk_destroy(&sc->sc_dkdev); 251 kmem_free(sc, sizeof(*sc)); 252 } 253 254 static struct ccd_softc * 255 ccdget(int unit, int make) { 256 struct ccd_softc *sc; 257 if (unit < 0) { 258 #ifdef DIAGNOSTIC 259 panic("%s: unit %d!", __func__, unit); 260 #endif 261 return NULL; 262 } 263 mutex_enter(&ccd_lock); 264 LIST_FOREACH(sc, &ccds, sc_link) { 265 if (sc->sc_unit == unit) { 266 mutex_exit(&ccd_lock); 267 return sc; 268 } 269 } 270 mutex_exit(&ccd_lock); 271 if (!make) 272 return NULL; 273 if ((sc = ccdcreate(unit)) == NULL) 274 return NULL; 275 mutex_enter(&ccd_lock); 276 LIST_INSERT_HEAD(&ccds, sc, sc_link); 277 ccd_nactive++; 278 mutex_exit(&ccd_lock); 279 return sc; 280 } 281 282 static void 283 ccdput(struct ccd_softc *sc) { 284 mutex_enter(&ccd_lock); 285 LIST_REMOVE(sc, sc_link); 286 ccd_nactive--; 287 mutex_exit(&ccd_lock); 288 ccddestroy(sc); 289 } 290 291 /* 292 * Called by main() during pseudo-device attachment. All we need 293 * to do is allocate enough space for devices to be configured later. 294 */ 295 void 296 ccdattach(int num) 297 { 298 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 299 300 /* Initialize the component buffer pool. */ 301 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 302 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 303 } 304 305 void 306 ccddetach(void) 307 { 308 pool_cache_destroy(ccd_cache); 309 mutex_destroy(&ccd_lock); 310 } 311 312 static int 313 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 314 struct lwp *l) 315 { 316 struct ccdcinfo *ci = NULL; 317 int ix; 318 struct ccdgeom *ccg = &cs->sc_geom; 319 char *tmppath; 320 int error, path_alloced; 321 uint64_t psize, minsize; 322 unsigned secsize, maxsecsize; 323 struct disk_geom *dg; 324 325 #ifdef DEBUG 326 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 327 printf("%s: ccdinit\n", cs->sc_xname); 328 #endif 329 330 /* Allocate space for the component info. */ 331 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 332 KM_SLEEP); 333 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 334 335 cs->sc_size = 0; 336 337 /* 338 * Verify that each component piece exists and record 339 * relevant information about it. 340 */ 341 maxsecsize = 0; 342 minsize = 0; 343 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 344 ci = &cs->sc_cinfo[ix]; 345 ci->ci_vp = vpp[ix]; 346 347 /* 348 * Copy in the pathname of the component. 349 */ 350 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 351 error = copyinstr(cpaths[ix], tmppath, 352 MAXPATHLEN, &ci->ci_pathlen); 353 if (ci->ci_pathlen == 0) 354 error = EINVAL; 355 if (error) { 356 #ifdef DEBUG 357 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 358 printf("%s: can't copy path, error = %d\n", 359 cs->sc_xname, error); 360 #endif 361 goto out; 362 } 363 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 364 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 365 path_alloced++; 366 367 /* 368 * XXX: Cache the component's dev_t. 369 */ 370 ci->ci_dev = vpp[ix]->v_rdev; 371 372 /* 373 * Get partition information for the component. 374 */ 375 error = getdisksize(vpp[ix], &psize, &secsize); 376 if (error) { 377 #ifdef DEBUG 378 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 379 printf("%s: %s: disksize failed, error = %d\n", 380 cs->sc_xname, ci->ci_path, error); 381 #endif 382 goto out; 383 } 384 385 /* 386 * Calculate the size, truncating to an interleave 387 * boundary if necessary. 388 */ 389 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 390 if (cs->sc_ileave > 1) 391 psize -= psize % cs->sc_ileave; 392 393 if (psize == 0) { 394 #ifdef DEBUG 395 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 396 printf("%s: %s: size == 0\n", 397 cs->sc_xname, ci->ci_path); 398 #endif 399 error = ENODEV; 400 goto out; 401 } 402 403 if (minsize == 0 || psize < minsize) 404 minsize = psize; 405 ci->ci_size = psize; 406 cs->sc_size += psize; 407 } 408 409 /* 410 * Don't allow the interleave to be smaller than 411 * the biggest component sector. 412 */ 413 if ((cs->sc_ileave > 0) && 414 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 415 #ifdef DEBUG 416 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 417 printf("%s: interleave must be at least %d\n", 418 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 419 #endif 420 error = EINVAL; 421 goto out; 422 } 423 424 /* 425 * If uniform interleave is desired set all sizes to that of 426 * the smallest component. 427 */ 428 if (cs->sc_flags & CCDF_UNIFORM) { 429 for (ci = cs->sc_cinfo; 430 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 431 ci->ci_size = minsize; 432 433 cs->sc_size = cs->sc_nccdisks * minsize; 434 } 435 436 /* 437 * Construct the interleave table. 438 */ 439 ccdinterleave(cs); 440 441 /* 442 * Create pseudo-geometry based on 1MB cylinders. It's 443 * pretty close. 444 */ 445 ccg->ccg_secsize = DEV_BSIZE; 446 ccg->ccg_ntracks = 1; 447 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 448 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 449 450 dg = &cs->sc_dkdev.dk_geom; 451 memset(dg, 0, sizeof(*dg)); 452 dg->dg_secperunit = cs->sc_size; 453 dg->dg_secsize = ccg->ccg_secsize; 454 dg->dg_nsectors = ccg->ccg_nsectors; 455 dg->dg_ntracks = ccg->ccg_ntracks; 456 dg->dg_ncylinders = ccg->ccg_ncylinders; 457 458 if (cs->sc_ileave > 0) 459 aprint_normal("%s: Interleaving %d component%s " 460 "(%d block interleave)\n", cs->sc_xname, 461 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 462 cs->sc_ileave); 463 else 464 aprint_normal("%s: Concatenating %d component%s\n", 465 cs->sc_xname, 466 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 467 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 468 ci = &cs->sc_cinfo[ix]; 469 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 470 ci->ci_path, (uintmax_t)ci->ci_size); 471 } 472 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 473 474 /* 475 * Create thread to handle deferred I/O. 476 */ 477 cs->sc_zap = false; 478 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 479 cs, &cs->sc_thread, "%s", cs->sc_xname); 480 if (error) { 481 printf("ccdinit: can't create thread: %d\n", error); 482 goto out; 483 } 484 485 /* 486 * Only now that everything is set up can we enable the device. 487 */ 488 mutex_enter(cs->sc_iolock); 489 cs->sc_flags |= CCDF_INITED; 490 mutex_exit(cs->sc_iolock); 491 kmem_free(tmppath, MAXPATHLEN); 492 return (0); 493 494 out: 495 for (ix = 0; ix < path_alloced; ix++) { 496 kmem_free(cs->sc_cinfo[ix].ci_path, 497 cs->sc_cinfo[ix].ci_pathlen); 498 } 499 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 500 kmem_free(tmppath, MAXPATHLEN); 501 return (error); 502 } 503 504 static void 505 ccdinterleave(struct ccd_softc *cs) 506 { 507 struct ccdcinfo *ci, *smallci; 508 struct ccdiinfo *ii; 509 daddr_t bn, lbn; 510 int ix; 511 u_long size; 512 513 #ifdef DEBUG 514 if (ccddebug & CCDB_INIT) 515 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 516 #endif 517 /* 518 * Allocate an interleave table. 519 * Chances are this is too big, but we don't care. 520 */ 521 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 522 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 523 524 /* 525 * Trivial case: no interleave (actually interleave of disk size). 526 * Each table entry represents a single component in its entirety. 527 */ 528 if (cs->sc_ileave == 0) { 529 bn = 0; 530 ii = cs->sc_itable; 531 532 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 533 /* Allocate space for ii_index. */ 534 ii->ii_indexsz = sizeof(int); 535 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 536 ii->ii_ndisk = 1; 537 ii->ii_startblk = bn; 538 ii->ii_startoff = 0; 539 ii->ii_index[0] = ix; 540 bn += cs->sc_cinfo[ix].ci_size; 541 ii++; 542 } 543 ii->ii_ndisk = 0; 544 #ifdef DEBUG 545 if (ccddebug & CCDB_INIT) 546 printiinfo(cs->sc_itable); 547 #endif 548 return; 549 } 550 551 /* 552 * The following isn't fast or pretty; it doesn't have to be. 553 */ 554 size = 0; 555 bn = lbn = 0; 556 for (ii = cs->sc_itable; ; ii++) { 557 /* Allocate space for ii_index. */ 558 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 559 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 560 561 /* 562 * Locate the smallest of the remaining components 563 */ 564 smallci = NULL; 565 for (ci = cs->sc_cinfo; 566 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 567 if (ci->ci_size > size && 568 (smallci == NULL || 569 ci->ci_size < smallci->ci_size)) 570 smallci = ci; 571 572 /* 573 * Nobody left, all done 574 */ 575 if (smallci == NULL) { 576 ii->ii_ndisk = 0; 577 break; 578 } 579 580 /* 581 * Record starting logical block and component offset 582 */ 583 ii->ii_startblk = bn / cs->sc_ileave; 584 ii->ii_startoff = lbn; 585 586 /* 587 * Determine how many disks take part in this interleave 588 * and record their indices. 589 */ 590 ix = 0; 591 for (ci = cs->sc_cinfo; 592 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 593 if (ci->ci_size >= smallci->ci_size) 594 ii->ii_index[ix++] = ci - cs->sc_cinfo; 595 ii->ii_ndisk = ix; 596 bn += ix * (smallci->ci_size - size); 597 lbn = smallci->ci_size / cs->sc_ileave; 598 size = smallci->ci_size; 599 } 600 #ifdef DEBUG 601 if (ccddebug & CCDB_INIT) 602 printiinfo(cs->sc_itable); 603 #endif 604 } 605 606 /* ARGSUSED */ 607 static int 608 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 609 { 610 int unit = ccdunit(dev); 611 struct ccd_softc *cs; 612 struct disklabel *lp; 613 int error = 0, part, pmask; 614 615 #ifdef DEBUG 616 if (ccddebug & CCDB_FOLLOW) 617 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 618 #endif 619 if ((cs = ccdget(unit, 1)) == NULL) 620 return ENXIO; 621 622 mutex_enter(&cs->sc_dvlock); 623 624 lp = cs->sc_dkdev.dk_label; 625 626 part = DISKPART(dev); 627 pmask = (1 << part); 628 629 /* 630 * If we're initialized, check to see if there are any other 631 * open partitions. If not, then it's safe to update 632 * the in-core disklabel. Only read the disklabel if it is 633 * not already valid. 634 */ 635 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 636 cs->sc_dkdev.dk_openmask == 0) 637 ccdgetdisklabel(dev); 638 639 /* Check that the partition exists. */ 640 if (part != RAW_PART) { 641 if (((cs->sc_flags & CCDF_INITED) == 0) || 642 ((part >= lp->d_npartitions) || 643 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 644 error = ENXIO; 645 goto done; 646 } 647 } 648 649 /* Prevent our unit from being unconfigured while open. */ 650 switch (fmt) { 651 case S_IFCHR: 652 cs->sc_dkdev.dk_copenmask |= pmask; 653 break; 654 655 case S_IFBLK: 656 cs->sc_dkdev.dk_bopenmask |= pmask; 657 break; 658 } 659 cs->sc_dkdev.dk_openmask = 660 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 661 662 done: 663 mutex_exit(&cs->sc_dvlock); 664 return (error); 665 } 666 667 /* ARGSUSED */ 668 static int 669 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 670 { 671 int unit = ccdunit(dev); 672 struct ccd_softc *cs; 673 int part; 674 675 #ifdef DEBUG 676 if (ccddebug & CCDB_FOLLOW) 677 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 678 #endif 679 680 if ((cs = ccdget(unit, 0)) == NULL) 681 return ENXIO; 682 683 mutex_enter(&cs->sc_dvlock); 684 685 part = DISKPART(dev); 686 687 /* ...that much closer to allowing unconfiguration... */ 688 switch (fmt) { 689 case S_IFCHR: 690 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 691 break; 692 693 case S_IFBLK: 694 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 695 break; 696 } 697 cs->sc_dkdev.dk_openmask = 698 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 699 700 if (cs->sc_dkdev.dk_openmask == 0) { 701 if ((cs->sc_flags & CCDF_KLABEL) == 0) 702 cs->sc_flags &= ~CCDF_VLABEL; 703 } 704 705 mutex_exit(&cs->sc_dvlock); 706 return (0); 707 } 708 709 static bool 710 ccdbackoff(struct ccd_softc *cs) 711 { 712 713 /* XXX Arbitrary, should be a uvm call. */ 714 return uvmexp.free < (uvmexp.freemin >> 1) && 715 disk_isbusy(&cs->sc_dkdev); 716 } 717 718 static void 719 ccdthread(void *cookie) 720 { 721 struct ccd_softc *cs; 722 723 cs = cookie; 724 725 #ifdef DEBUG 726 if (ccddebug & CCDB_FOLLOW) 727 printf("ccdthread: hello\n"); 728 #endif 729 730 mutex_enter(cs->sc_iolock); 731 while (__predict_true(!cs->sc_zap)) { 732 if (bufq_peek(cs->sc_bufq) == NULL) { 733 /* Nothing to do. */ 734 cv_wait(&cs->sc_push, cs->sc_iolock); 735 continue; 736 } 737 if (ccdbackoff(cs)) { 738 /* Wait for memory to become available. */ 739 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 740 continue; 741 } 742 #ifdef DEBUG 743 if (ccddebug & CCDB_FOLLOW) 744 printf("ccdthread: dispatching I/O\n"); 745 #endif 746 ccdstart(cs); 747 mutex_enter(cs->sc_iolock); 748 } 749 cs->sc_thread = NULL; 750 mutex_exit(cs->sc_iolock); 751 #ifdef DEBUG 752 if (ccddebug & CCDB_FOLLOW) 753 printf("ccdthread: goodbye\n"); 754 #endif 755 kthread_exit(0); 756 } 757 758 static void 759 ccdstrategy(struct buf *bp) 760 { 761 int unit = ccdunit(bp->b_dev); 762 struct ccd_softc *cs; 763 if ((cs = ccdget(unit, 0)) == NULL) 764 return; 765 766 /* Must be open or reading label. */ 767 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 768 (cs->sc_flags & CCDF_RLABEL) != 0); 769 770 mutex_enter(cs->sc_iolock); 771 /* Synchronize with device init/uninit. */ 772 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 773 mutex_exit(cs->sc_iolock); 774 #ifdef DEBUG 775 if (ccddebug & CCDB_FOLLOW) 776 printf("ccdstrategy: unit %d: not inited\n", unit); 777 #endif 778 bp->b_error = ENXIO; 779 bp->b_resid = bp->b_bcount; 780 biodone(bp); 781 return; 782 } 783 784 /* Defer to thread if system is low on memory. */ 785 bufq_put(cs->sc_bufq, bp); 786 if (__predict_false(ccdbackoff(cs))) { 787 mutex_exit(cs->sc_iolock); 788 #ifdef DEBUG 789 if (ccddebug & CCDB_FOLLOW) 790 printf("ccdstrategy: holding off on I/O\n"); 791 #endif 792 return; 793 } 794 ccdstart(cs); 795 } 796 797 static void 798 ccdstart(struct ccd_softc *cs) 799 { 800 daddr_t blkno; 801 int wlabel; 802 struct disklabel *lp; 803 long bcount, rcount; 804 struct ccdbuf *cbp; 805 char *addr; 806 daddr_t bn; 807 vnode_t *vp; 808 buf_t *bp; 809 810 KASSERT(mutex_owned(cs->sc_iolock)); 811 812 disk_busy(&cs->sc_dkdev); 813 bp = bufq_get(cs->sc_bufq); 814 KASSERT(bp != NULL); 815 816 #ifdef DEBUG 817 if (ccddebug & CCDB_FOLLOW) 818 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 819 #endif 820 821 /* If it's a nil transfer, wake up the top half now. */ 822 if (bp->b_bcount == 0) 823 goto done; 824 825 lp = cs->sc_dkdev.dk_label; 826 827 /* 828 * Do bounds checking and adjust transfer. If there's an 829 * error, the bounds check will flag that for us. Convert 830 * the partition relative block number to an absolute. 831 */ 832 blkno = bp->b_blkno; 833 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 834 if (DISKPART(bp->b_dev) != RAW_PART) { 835 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 836 goto done; 837 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 838 } 839 mutex_exit(cs->sc_iolock); 840 bp->b_rawblkno = blkno; 841 842 /* Allocate the component buffers and start I/O! */ 843 bp->b_resid = bp->b_bcount; 844 bn = bp->b_rawblkno; 845 addr = bp->b_data; 846 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 847 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 848 rcount = cbp->cb_buf.b_bcount; 849 bn += btodb(rcount); 850 addr += rcount; 851 vp = cbp->cb_buf.b_vp; 852 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 853 mutex_enter(vp->v_interlock); 854 vp->v_numoutput++; 855 mutex_exit(vp->v_interlock); 856 } 857 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 858 } 859 return; 860 861 done: 862 disk_unbusy(&cs->sc_dkdev, 0, 0); 863 cv_broadcast(&cs->sc_stop); 864 cv_broadcast(&cs->sc_push); 865 mutex_exit(cs->sc_iolock); 866 bp->b_resid = bp->b_bcount; 867 biodone(bp); 868 } 869 870 /* 871 * Build a component buffer header. 872 */ 873 static struct ccdbuf * 874 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 875 long bcount) 876 { 877 struct ccdcinfo *ci; 878 struct ccdbuf *cbp; 879 daddr_t cbn, cboff; 880 u_int64_t cbc; 881 int ccdisk; 882 883 #ifdef DEBUG 884 if (ccddebug & CCDB_IO) 885 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 886 cs, bp, bn, addr, bcount); 887 #endif 888 /* 889 * Determine which component bn falls in. 890 */ 891 cbn = bn; 892 cboff = 0; 893 894 /* 895 * Serially concatenated 896 */ 897 if (cs->sc_ileave == 0) { 898 daddr_t sblk; 899 900 sblk = 0; 901 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 902 cbn >= sblk + ci->ci_size; 903 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 904 sblk += ci->ci_size; 905 cbn -= sblk; 906 } 907 /* 908 * Interleaved 909 */ 910 else { 911 struct ccdiinfo *ii; 912 int off; 913 914 cboff = cbn % cs->sc_ileave; 915 cbn /= cs->sc_ileave; 916 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 917 if (ii->ii_startblk > cbn) 918 break; 919 ii--; 920 off = cbn - ii->ii_startblk; 921 if (ii->ii_ndisk == 1) { 922 ccdisk = ii->ii_index[0]; 923 cbn = ii->ii_startoff + off; 924 } else { 925 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 926 cbn = ii->ii_startoff + off / ii->ii_ndisk; 927 } 928 cbn *= cs->sc_ileave; 929 ci = &cs->sc_cinfo[ccdisk]; 930 } 931 932 /* 933 * Fill in the component buf structure. 934 */ 935 cbp = CCD_GETBUF(); 936 KASSERT(cbp != NULL); 937 buf_init(&cbp->cb_buf); 938 cbp->cb_buf.b_flags = bp->b_flags; 939 cbp->cb_buf.b_oflags = bp->b_oflags; 940 cbp->cb_buf.b_cflags = bp->b_cflags; 941 cbp->cb_buf.b_iodone = ccdiodone; 942 cbp->cb_buf.b_proc = bp->b_proc; 943 cbp->cb_buf.b_dev = ci->ci_dev; 944 cbp->cb_buf.b_blkno = cbn + cboff; 945 cbp->cb_buf.b_data = addr; 946 cbp->cb_buf.b_vp = ci->ci_vp; 947 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 948 if (cs->sc_ileave == 0) 949 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 950 else 951 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 952 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 953 954 /* 955 * context for ccdiodone 956 */ 957 cbp->cb_obp = bp; 958 cbp->cb_sc = cs; 959 cbp->cb_comp = ccdisk; 960 961 BIO_COPYPRIO(&cbp->cb_buf, bp); 962 963 #ifdef DEBUG 964 if (ccddebug & CCDB_IO) 965 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 966 " bcnt %d\n", 967 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 968 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 969 cbp->cb_buf.b_bcount); 970 #endif 971 972 return (cbp); 973 } 974 975 /* 976 * Called at interrupt time. 977 * Mark the component as done and if all components are done, 978 * take a ccd interrupt. 979 */ 980 static void 981 ccdiodone(struct buf *vbp) 982 { 983 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 984 struct buf *bp = cbp->cb_obp; 985 struct ccd_softc *cs = cbp->cb_sc; 986 int count; 987 988 #ifdef DEBUG 989 if (ccddebug & CCDB_FOLLOW) 990 printf("ccdiodone(%p)\n", cbp); 991 if (ccddebug & CCDB_IO) { 992 printf("ccdiodone: bp %p bcount %d resid %d\n", 993 bp, bp->b_bcount, bp->b_resid); 994 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 995 " bcnt %d\n", 996 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 997 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 998 cbp->cb_buf.b_bcount); 999 } 1000 #endif 1001 1002 if (cbp->cb_buf.b_error != 0) { 1003 bp->b_error = cbp->cb_buf.b_error; 1004 printf("%s: error %d on component %d\n", 1005 cs->sc_xname, bp->b_error, cbp->cb_comp); 1006 } 1007 count = cbp->cb_buf.b_bcount; 1008 buf_destroy(&cbp->cb_buf); 1009 CCD_PUTBUF(cbp); 1010 1011 /* 1012 * If all done, "interrupt". 1013 */ 1014 mutex_enter(cs->sc_iolock); 1015 bp->b_resid -= count; 1016 if (bp->b_resid < 0) 1017 panic("ccdiodone: count"); 1018 if (bp->b_resid == 0) { 1019 /* 1020 * Request is done for better or worse, wakeup the top half. 1021 */ 1022 if (bp->b_error != 0) 1023 bp->b_resid = bp->b_bcount; 1024 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1025 (bp->b_flags & B_READ)); 1026 if (!disk_isbusy(&cs->sc_dkdev)) { 1027 if (bufq_peek(cs->sc_bufq) != NULL) { 1028 cv_broadcast(&cs->sc_push); 1029 } 1030 cv_broadcast(&cs->sc_stop); 1031 } 1032 mutex_exit(cs->sc_iolock); 1033 biodone(bp); 1034 } else 1035 mutex_exit(cs->sc_iolock); 1036 } 1037 1038 /* ARGSUSED */ 1039 static int 1040 ccdread(dev_t dev, struct uio *uio, int flags) 1041 { 1042 int unit = ccdunit(dev); 1043 struct ccd_softc *cs; 1044 1045 #ifdef DEBUG 1046 if (ccddebug & CCDB_FOLLOW) 1047 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1048 #endif 1049 if ((cs = ccdget(unit, 0)) == NULL) 1050 return 0; 1051 1052 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1053 if ((cs->sc_flags & CCDF_INITED) == 0) 1054 return (ENXIO); 1055 1056 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1057 } 1058 1059 /* ARGSUSED */ 1060 static int 1061 ccdwrite(dev_t dev, struct uio *uio, int flags) 1062 { 1063 int unit = ccdunit(dev); 1064 struct ccd_softc *cs; 1065 1066 #ifdef DEBUG 1067 if (ccddebug & CCDB_FOLLOW) 1068 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1069 #endif 1070 if ((cs = ccdget(unit, 0)) == NULL) 1071 return ENOENT; 1072 1073 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1074 if ((cs->sc_flags & CCDF_INITED) == 0) 1075 return (ENXIO); 1076 1077 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1078 } 1079 1080 static int 1081 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1082 { 1083 int unit = ccdunit(dev); 1084 int i, j, lookedup = 0, error = 0; 1085 int part, pmask, make; 1086 struct ccd_softc *cs; 1087 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1088 kauth_cred_t uc; 1089 char **cpp; 1090 struct pathbuf *pb; 1091 struct vnode **vpp; 1092 #ifdef __HAVE_OLD_DISKLABEL 1093 struct disklabel newlabel; 1094 #endif 1095 1096 switch (cmd) { 1097 #if defined(COMPAT_60) && !defined(_LP64) 1098 case CCDIOCSET_60: 1099 #endif 1100 case CCDIOCSET: 1101 make = 1; 1102 break; 1103 default: 1104 make = 0; 1105 break; 1106 } 1107 1108 if ((cs = ccdget(unit, make)) == NULL) 1109 return ENOENT; 1110 uc = kauth_cred_get(); 1111 1112 /* 1113 * Compat code must not be called if on a platform where 1114 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will 1115 * be the same as CCDIOCSET_60 1116 */ 1117 #if defined(COMPAT_60) && !defined(_LP64) 1118 switch (cmd) { 1119 case CCDIOCSET_60: { 1120 struct ccd_ioctl ccionew; 1121 struct ccd_ioctl_60 *ccio60 = 1122 (struct ccd_ioctl_60 *)data; 1123 ccionew.ccio_disks = ccio->ccio_disks; 1124 ccionew.ccio_ndisks = ccio->ccio_ndisks; 1125 ccionew.ccio_ileave = ccio->ccio_ileave; 1126 ccionew.ccio_flags = ccio->ccio_flags; 1127 ccionew.ccio_unit = ccio->ccio_unit; 1128 error = ccdioctl(dev, CCDIOCSET, &ccionew, flag, l); 1129 if (!error) { 1130 /* Copy data back, adjust types if necessary */ 1131 ccio60->ccio_disks = ccionew.ccio_disks; 1132 ccio60->ccio_ndisks = ccionew.ccio_ndisks; 1133 ccio60->ccio_ileave = ccionew.ccio_ileave; 1134 ccio60->ccio_flags = ccionew.ccio_flags; 1135 ccio60->ccio_unit = ccionew.ccio_unit; 1136 ccio60->ccio_size = (size_t)ccionew.ccio_size; 1137 } 1138 return error; 1139 } 1140 break; 1141 1142 case CCDIOCCLR_60: 1143 /* 1144 * ccio_size member not used, so existing struct OK 1145 * drop through to existing non-compat version 1146 */ 1147 cmd = CCDIOCCLR; 1148 break; 1149 } 1150 #endif /* COMPAT_60 && !_LP64*/ 1151 1152 /* Must be open for writes for these commands... */ 1153 switch (cmd) { 1154 case CCDIOCSET: 1155 case CCDIOCCLR: 1156 case DIOCSDINFO: 1157 case DIOCWDINFO: 1158 case DIOCCACHESYNC: 1159 case DIOCAWEDGE: 1160 case DIOCDWEDGE: 1161 case DIOCMWEDGES: 1162 #ifdef __HAVE_OLD_DISKLABEL 1163 case ODIOCSDINFO: 1164 case ODIOCWDINFO: 1165 #endif 1166 case DIOCKLABEL: 1167 case DIOCWLABEL: 1168 if ((flag & FWRITE) == 0) 1169 return (EBADF); 1170 } 1171 1172 mutex_enter(&cs->sc_dvlock); 1173 1174 /* Must be initialized for these... */ 1175 switch (cmd) { 1176 case CCDIOCCLR: 1177 case DIOCGDINFO: 1178 case DIOCCACHESYNC: 1179 case DIOCAWEDGE: 1180 case DIOCDWEDGE: 1181 case DIOCLWEDGES: 1182 case DIOCMWEDGES: 1183 case DIOCSDINFO: 1184 case DIOCWDINFO: 1185 case DIOCGPARTINFO: 1186 case DIOCWLABEL: 1187 case DIOCKLABEL: 1188 case DIOCGDEFLABEL: 1189 #ifdef __HAVE_OLD_DISKLABEL 1190 case ODIOCGDINFO: 1191 case ODIOCSDINFO: 1192 case ODIOCWDINFO: 1193 case ODIOCGDEFLABEL: 1194 #endif 1195 if ((cs->sc_flags & CCDF_INITED) == 0) { 1196 error = ENXIO; 1197 goto out; 1198 } 1199 } 1200 1201 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1202 if (error != EPASSTHROUGH) 1203 goto out; 1204 1205 error = 0; 1206 switch (cmd) { 1207 case CCDIOCSET: 1208 if (cs->sc_flags & CCDF_INITED) { 1209 error = EBUSY; 1210 goto out; 1211 } 1212 1213 /* Validate the flags. */ 1214 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1215 error = EINVAL; 1216 goto out; 1217 } 1218 1219 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1220 ccio->ccio_ndisks == 0) { 1221 error = EINVAL; 1222 goto out; 1223 } 1224 1225 /* Fill in some important bits. */ 1226 cs->sc_ileave = ccio->ccio_ileave; 1227 cs->sc_nccdisks = ccio->ccio_ndisks; 1228 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1229 1230 /* 1231 * Allocate space for and copy in the array of 1232 * component pathnames and device numbers. 1233 */ 1234 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1235 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1236 error = copyin(ccio->ccio_disks, cpp, 1237 ccio->ccio_ndisks * sizeof(*cpp)); 1238 if (error) { 1239 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1240 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1241 goto out; 1242 } 1243 1244 #ifdef DEBUG 1245 if (ccddebug & CCDB_INIT) 1246 for (i = 0; i < ccio->ccio_ndisks; ++i) 1247 printf("ccdioctl: component %d: %p\n", 1248 i, cpp[i]); 1249 #endif 1250 1251 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1252 #ifdef DEBUG 1253 if (ccddebug & CCDB_INIT) 1254 printf("ccdioctl: lookedup = %d\n", lookedup); 1255 #endif 1256 error = pathbuf_copyin(cpp[i], &pb); 1257 if (error == 0) { 1258 error = dk_lookup(pb, l, &vpp[i]); 1259 } 1260 pathbuf_destroy(pb); 1261 if (error != 0) { 1262 for (j = 0; j < lookedup; ++j) 1263 (void)vn_close(vpp[j], FREAD|FWRITE, 1264 uc); 1265 kmem_free(vpp, ccio->ccio_ndisks * 1266 sizeof(*vpp)); 1267 kmem_free(cpp, ccio->ccio_ndisks * 1268 sizeof(*cpp)); 1269 goto out; 1270 } 1271 ++lookedup; 1272 } 1273 1274 /* Attach the disk. */ 1275 disk_attach(&cs->sc_dkdev); 1276 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1277 1278 /* 1279 * Initialize the ccd. Fills in the softc for us. 1280 */ 1281 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1282 for (j = 0; j < lookedup; ++j) 1283 (void)vn_close(vpp[j], FREAD|FWRITE, 1284 uc); 1285 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1286 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1287 disk_detach(&cs->sc_dkdev); 1288 bufq_free(cs->sc_bufq); 1289 goto out; 1290 } 1291 1292 /* We can free the temporary variables now. */ 1293 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1294 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1295 1296 /* 1297 * The ccd has been successfully initialized, so 1298 * we can place it into the array. Don't try to 1299 * read the disklabel until the disk has been attached, 1300 * because space for the disklabel is allocated 1301 * in disk_attach(); 1302 */ 1303 ccio->ccio_unit = unit; 1304 ccio->ccio_size = cs->sc_size; 1305 1306 /* Try and read the disklabel. */ 1307 ccdgetdisklabel(dev); 1308 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1309 1310 /* discover wedges */ 1311 mutex_exit(&cs->sc_dvlock); 1312 dkwedge_discover(&cs->sc_dkdev); 1313 return 0; 1314 1315 case CCDIOCCLR: 1316 /* 1317 * Don't unconfigure if any other partitions are open 1318 * or if both the character and block flavors of this 1319 * partition are open. 1320 */ 1321 part = DISKPART(dev); 1322 pmask = (1 << part); 1323 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1324 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1325 (cs->sc_dkdev.dk_copenmask & pmask))) { 1326 error = EBUSY; 1327 goto out; 1328 } 1329 1330 /* Delete all of our wedges. */ 1331 dkwedge_delall(&cs->sc_dkdev); 1332 1333 /* Stop new I/O, wait for in-flight I/O to complete. */ 1334 mutex_enter(cs->sc_iolock); 1335 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1336 cs->sc_zap = true; 1337 while (disk_isbusy(&cs->sc_dkdev) || 1338 bufq_peek(cs->sc_bufq) != NULL || 1339 cs->sc_thread != NULL) { 1340 cv_broadcast(&cs->sc_push); 1341 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1342 } 1343 mutex_exit(cs->sc_iolock); 1344 1345 /* 1346 * Free ccd_softc information and clear entry. 1347 */ 1348 1349 /* Close the components and free their pathnames. */ 1350 for (i = 0; i < cs->sc_nccdisks; ++i) { 1351 /* 1352 * XXX: this close could potentially fail and 1353 * cause Bad Things. Maybe we need to force 1354 * the close to happen? 1355 */ 1356 #ifdef DEBUG 1357 if (ccddebug & CCDB_VNODE) 1358 vprint("CCDIOCCLR: vnode info", 1359 cs->sc_cinfo[i].ci_vp); 1360 #endif 1361 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1362 uc); 1363 kmem_free(cs->sc_cinfo[i].ci_path, 1364 cs->sc_cinfo[i].ci_pathlen); 1365 } 1366 1367 /* Free interleave index. */ 1368 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1369 kmem_free(cs->sc_itable[i].ii_index, 1370 cs->sc_itable[i].ii_indexsz); 1371 } 1372 1373 /* Free component info and interleave table. */ 1374 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1375 sizeof(struct ccdcinfo)); 1376 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1377 sizeof(struct ccdiinfo)); 1378 1379 aprint_normal("%s: detached\n", cs->sc_xname); 1380 1381 /* Detach the disk. */ 1382 disk_detach(&cs->sc_dkdev); 1383 bufq_free(cs->sc_bufq); 1384 ccdput(cs); 1385 /* Don't break, otherwise cs is read again. */ 1386 return 0; 1387 1388 case DIOCCACHESYNC: 1389 /* 1390 * We pass this call down to all components and report 1391 * the first error we encounter. 1392 */ 1393 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1394 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1395 flag, uc); 1396 if (j != 0 && error == 0) 1397 error = j; 1398 } 1399 break; 1400 1401 case DIOCWDINFO: 1402 case DIOCSDINFO: 1403 #ifdef __HAVE_OLD_DISKLABEL 1404 case ODIOCWDINFO: 1405 case ODIOCSDINFO: 1406 #endif 1407 { 1408 struct disklabel *lp; 1409 #ifdef __HAVE_OLD_DISKLABEL 1410 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1411 memset(&newlabel, 0, sizeof newlabel); 1412 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1413 lp = &newlabel; 1414 } else 1415 #endif 1416 lp = (struct disklabel *)data; 1417 1418 cs->sc_flags |= CCDF_LABELLING; 1419 1420 error = setdisklabel(cs->sc_dkdev.dk_label, 1421 lp, 0, cs->sc_dkdev.dk_cpulabel); 1422 if (error == 0) { 1423 if (cmd == DIOCWDINFO 1424 #ifdef __HAVE_OLD_DISKLABEL 1425 || cmd == ODIOCWDINFO 1426 #endif 1427 ) 1428 error = writedisklabel(CCDLABELDEV(dev), 1429 ccdstrategy, cs->sc_dkdev.dk_label, 1430 cs->sc_dkdev.dk_cpulabel); 1431 } 1432 1433 cs->sc_flags &= ~CCDF_LABELLING; 1434 break; 1435 } 1436 1437 case DIOCKLABEL: 1438 if (*(int *)data != 0) 1439 cs->sc_flags |= CCDF_KLABEL; 1440 else 1441 cs->sc_flags &= ~CCDF_KLABEL; 1442 break; 1443 1444 case DIOCWLABEL: 1445 if (*(int *)data != 0) 1446 cs->sc_flags |= CCDF_WLABEL; 1447 else 1448 cs->sc_flags &= ~CCDF_WLABEL; 1449 break; 1450 1451 case DIOCGDEFLABEL: 1452 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1453 break; 1454 1455 #ifdef __HAVE_OLD_DISKLABEL 1456 case ODIOCGDEFLABEL: 1457 ccdgetdefaultlabel(cs, &newlabel); 1458 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1459 return ENOTTY; 1460 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1461 break; 1462 #endif 1463 1464 default: 1465 error = ENOTTY; 1466 } 1467 1468 out: 1469 mutex_exit(&cs->sc_dvlock); 1470 return (error); 1471 } 1472 1473 static int 1474 ccdsize(dev_t dev) 1475 { 1476 struct ccd_softc *cs; 1477 struct disklabel *lp; 1478 int part, unit, omask, size; 1479 1480 unit = ccdunit(dev); 1481 if ((cs = ccdget(unit, 0)) == NULL) 1482 return -1; 1483 1484 if ((cs->sc_flags & CCDF_INITED) == 0) 1485 return (-1); 1486 1487 part = DISKPART(dev); 1488 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1489 lp = cs->sc_dkdev.dk_label; 1490 1491 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1492 return (-1); 1493 1494 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1495 size = -1; 1496 else 1497 size = lp->d_partitions[part].p_size * 1498 (lp->d_secsize / DEV_BSIZE); 1499 1500 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1501 return (-1); 1502 1503 return (size); 1504 } 1505 1506 static void 1507 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1508 { 1509 struct ccdgeom *ccg = &cs->sc_geom; 1510 1511 memset(lp, 0, sizeof(*lp)); 1512 1513 if (cs->sc_size > UINT32_MAX) 1514 lp->d_secperunit = UINT32_MAX; 1515 else 1516 lp->d_secperunit = cs->sc_size; 1517 lp->d_secsize = ccg->ccg_secsize; 1518 lp->d_nsectors = ccg->ccg_nsectors; 1519 lp->d_ntracks = ccg->ccg_ntracks; 1520 lp->d_ncylinders = ccg->ccg_ncylinders; 1521 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1522 1523 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1524 lp->d_type = DKTYPE_CCD; 1525 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1526 lp->d_rpm = 3600; 1527 lp->d_interleave = 1; 1528 lp->d_flags = 0; 1529 1530 lp->d_partitions[RAW_PART].p_offset = 0; 1531 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1532 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1533 lp->d_npartitions = RAW_PART + 1; 1534 1535 lp->d_magic = DISKMAGIC; 1536 lp->d_magic2 = DISKMAGIC; 1537 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1538 } 1539 1540 /* 1541 * Read the disklabel from the ccd. If one is not present, fake one 1542 * up. 1543 */ 1544 static void 1545 ccdgetdisklabel(dev_t dev) 1546 { 1547 int unit = ccdunit(dev); 1548 struct ccd_softc *cs; 1549 const char *errstring; 1550 struct disklabel *lp; 1551 struct cpu_disklabel *clp; 1552 1553 if ((cs = ccdget(unit, 0)) == NULL) 1554 return; 1555 lp = cs->sc_dkdev.dk_label; 1556 clp = cs->sc_dkdev.dk_cpulabel; 1557 KASSERT(mutex_owned(&cs->sc_dvlock)); 1558 1559 memset(clp, 0, sizeof(*clp)); 1560 1561 ccdgetdefaultlabel(cs, lp); 1562 1563 /* 1564 * Call the generic disklabel extraction routine. 1565 */ 1566 cs->sc_flags |= CCDF_RLABEL; 1567 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1568 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1569 else 1570 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1571 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1572 if (errstring) 1573 ccdmakedisklabel(cs); 1574 else { 1575 int i; 1576 struct partition *pp; 1577 1578 /* 1579 * Sanity check whether the found disklabel is valid. 1580 * 1581 * This is necessary since total size of ccd may vary 1582 * when an interleave is changed even though exactly 1583 * same componets are used, and old disklabel may used 1584 * if that is found. 1585 */ 1586 if (lp->d_secperunit < UINT32_MAX ? 1587 lp->d_secperunit != cs->sc_size : 1588 lp->d_secperunit > cs->sc_size) 1589 printf("WARNING: %s: " 1590 "total sector size in disklabel (%ju) != " 1591 "the size of ccd (%ju)\n", cs->sc_xname, 1592 (uintmax_t)lp->d_secperunit, 1593 (uintmax_t)cs->sc_size); 1594 for (i = 0; i < lp->d_npartitions; i++) { 1595 pp = &lp->d_partitions[i]; 1596 if (pp->p_offset + pp->p_size > cs->sc_size) 1597 printf("WARNING: %s: end of partition `%c' " 1598 "exceeds the size of ccd (%ju)\n", 1599 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1600 } 1601 } 1602 1603 #ifdef DEBUG 1604 /* It's actually extremely common to have unlabeled ccds. */ 1605 if (ccddebug & CCDB_LABEL) 1606 if (errstring != NULL) 1607 printf("%s: %s\n", cs->sc_xname, errstring); 1608 #endif 1609 1610 /* In-core label now valid. */ 1611 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1612 } 1613 1614 /* 1615 * Take care of things one might want to take care of in the event 1616 * that a disklabel isn't present. 1617 */ 1618 static void 1619 ccdmakedisklabel(struct ccd_softc *cs) 1620 { 1621 struct disklabel *lp = cs->sc_dkdev.dk_label; 1622 1623 /* 1624 * For historical reasons, if there's no disklabel present 1625 * the raw partition must be marked FS_BSDFFS. 1626 */ 1627 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1628 1629 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1630 1631 lp->d_checksum = dkcksum(lp); 1632 } 1633 1634 #ifdef DEBUG 1635 static void 1636 printiinfo(struct ccdiinfo *ii) 1637 { 1638 int ix, i; 1639 1640 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1641 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1642 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1643 for (i = 0; i < ii->ii_ndisk; i++) 1644 printf(" %d", ii->ii_index[i]); 1645 printf("\n"); 1646 } 1647 } 1648 #endif 1649 1650 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1651 1652 static int 1653 ccd_modcmd(modcmd_t cmd, void *arg) 1654 { 1655 int error = 0; 1656 #ifdef _MODULE 1657 int bmajor = -1, cmajor = -1; 1658 #endif 1659 1660 1661 switch (cmd) { 1662 case MODULE_CMD_INIT: 1663 #ifdef _MODULE 1664 ccdattach(0); 1665 1666 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1667 &ccd_cdevsw, &cmajor); 1668 #endif 1669 break; 1670 1671 case MODULE_CMD_FINI: 1672 #ifdef _MODULE 1673 mutex_enter(&ccd_lock); 1674 if (ccd_nactive) { 1675 mutex_exit(&ccd_lock); 1676 error = EBUSY; 1677 } else { 1678 mutex_exit(&ccd_lock); 1679 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1680 ccddetach(); 1681 } 1682 #endif 1683 break; 1684 1685 case MODULE_CMD_STAT: 1686 return ENOTTY; 1687 1688 default: 1689 return ENOTTY; 1690 } 1691 1692 return error; 1693 } 1694 1695 static int 1696 ccd_units_sysctl(SYSCTLFN_ARGS) 1697 { 1698 struct sysctlnode node; 1699 struct ccd_softc *sc; 1700 int error, i, nccd, *units; 1701 size_t size; 1702 1703 nccd = 0; 1704 mutex_enter(&ccd_lock); 1705 LIST_FOREACH(sc, &ccds, sc_link) 1706 nccd++; 1707 mutex_exit(&ccd_lock); 1708 1709 if (nccd != 0) { 1710 size = nccd * sizeof(*units); 1711 units = kmem_zalloc(size, KM_SLEEP); 1712 if (units == NULL) 1713 return ENOMEM; 1714 1715 i = 0; 1716 mutex_enter(&ccd_lock); 1717 LIST_FOREACH(sc, &ccds, sc_link) { 1718 if (i >= nccd) 1719 break; 1720 units[i] = sc->sc_unit; 1721 } 1722 mutex_exit(&ccd_lock); 1723 } else { 1724 units = NULL; 1725 size = 0; 1726 } 1727 1728 node = *rnode; 1729 node.sysctl_data = units; 1730 node.sysctl_size = size; 1731 1732 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1733 if (units) 1734 kmem_free(units, size); 1735 return error; 1736 } 1737 1738 static int 1739 ccd_info_sysctl(SYSCTLFN_ARGS) 1740 { 1741 struct sysctlnode node; 1742 struct ccddiskinfo ccd; 1743 struct ccd_softc *sc; 1744 int unit; 1745 1746 if (newp == NULL || newlen != sizeof(int)) 1747 return EINVAL; 1748 1749 unit = *(const int *)newp; 1750 newp = NULL; 1751 newlen = 0; 1752 ccd.ccd_ndisks = ~0; 1753 mutex_enter(&ccd_lock); 1754 LIST_FOREACH(sc, &ccds, sc_link) { 1755 if (sc->sc_unit == unit) { 1756 ccd.ccd_ileave = sc->sc_ileave; 1757 ccd.ccd_size = sc->sc_size; 1758 ccd.ccd_ndisks = sc->sc_nccdisks; 1759 ccd.ccd_flags = sc->sc_flags; 1760 break; 1761 } 1762 } 1763 mutex_exit(&ccd_lock); 1764 1765 if (ccd.ccd_ndisks == ~0) 1766 return ENOENT; 1767 1768 node = *rnode; 1769 node.sysctl_data = &ccd; 1770 node.sysctl_size = sizeof(ccd); 1771 1772 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1773 } 1774 1775 static int 1776 ccd_components_sysctl(SYSCTLFN_ARGS) 1777 { 1778 struct sysctlnode node; 1779 int error, unit; 1780 size_t size; 1781 char *names, *p, *ep; 1782 struct ccd_softc *sc; 1783 1784 if (newp == NULL || newlen != sizeof(int)) 1785 return EINVAL; 1786 1787 size = 0; 1788 unit = *(const int *)newp; 1789 newp = NULL; 1790 newlen = 0; 1791 mutex_enter(&ccd_lock); 1792 LIST_FOREACH(sc, &ccds, sc_link) 1793 if (sc->sc_unit == unit) { 1794 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1795 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1796 break; 1797 } 1798 mutex_exit(&ccd_lock); 1799 1800 if (size == 0) 1801 return ENOENT; 1802 names = kmem_zalloc(size, KM_SLEEP); 1803 if (names == NULL) 1804 return ENOMEM; 1805 1806 p = names; 1807 ep = names + size; 1808 mutex_enter(&ccd_lock); 1809 LIST_FOREACH(sc, &ccds, sc_link) 1810 if (sc->sc_unit == unit) { 1811 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1812 char *d = sc->sc_cinfo[i].ci_path; 1813 while (p < ep && (*p++ = *d++) != '\0') 1814 continue; 1815 } 1816 break; 1817 } 1818 mutex_exit(&ccd_lock); 1819 1820 node = *rnode; 1821 node.sysctl_data = names; 1822 node.sysctl_size = ep - names; 1823 1824 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1825 kmem_free(names, size); 1826 return error; 1827 } 1828 1829 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1830 { 1831 const struct sysctlnode *node = NULL; 1832 1833 sysctl_createv(clog, 0, NULL, &node, 1834 CTLFLAG_PERMANENT, 1835 CTLTYPE_NODE, "ccd", 1836 SYSCTL_DESCR("ConCatenated Disk state"), 1837 NULL, 0, NULL, 0, 1838 CTL_KERN, CTL_CREATE, CTL_EOL); 1839 1840 if (node == NULL) 1841 return; 1842 1843 sysctl_createv(clog, 0, &node, NULL, 1844 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1845 CTLTYPE_STRUCT, "units", 1846 SYSCTL_DESCR("List of ccd unit numbers"), 1847 ccd_units_sysctl, 0, NULL, 0, 1848 CTL_CREATE, CTL_EOL); 1849 sysctl_createv(clog, 0, &node, NULL, 1850 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1851 CTLTYPE_STRUCT, "info", 1852 SYSCTL_DESCR("Information about a CCD unit"), 1853 ccd_info_sysctl, 0, NULL, 0, 1854 CTL_CREATE, CTL_EOL); 1855 sysctl_createv(clog, 0, &node, NULL, 1856 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1857 CTLTYPE_STRUCT, "components", 1858 SYSCTL_DESCR("Information about CCD components"), 1859 ccd_components_sysctl, 0, NULL, 0, 1860 CTL_CREATE, CTL_EOL); 1861 } 1862