1 /* $NetBSD: ccd.c,v 1.148 2014/04/06 00:56:39 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.148 2014/04/06 00:56:39 joerg Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 119 #include <uvm/uvm_extern.h> 120 121 #include <dev/ccdvar.h> 122 #include <dev/dkvar.h> 123 124 #if defined(CCDDEBUG) && !defined(DEBUG) 125 #define DEBUG 126 #endif 127 128 #ifdef DEBUG 129 #define CCDB_FOLLOW 0x01 130 #define CCDB_INIT 0x02 131 #define CCDB_IO 0x04 132 #define CCDB_LABEL 0x08 133 #define CCDB_VNODE 0x10 134 int ccddebug = 0x00; 135 #endif 136 137 #define ccdunit(x) DISKUNIT(x) 138 139 struct ccdbuf { 140 struct buf cb_buf; /* new I/O buf */ 141 struct buf *cb_obp; /* ptr. to original I/O buf */ 142 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 143 int cb_comp; /* target component */ 144 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 145 }; 146 147 /* component buffer pool */ 148 static pool_cache_t ccd_cache; 149 150 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 151 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 152 153 #define CCDLABELDEV(dev) \ 154 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 155 156 /* called by main() at boot time */ 157 void ccdattach(int); 158 159 /* called by biodone() at interrupt time */ 160 static void ccdiodone(struct buf *); 161 162 static void ccdinterleave(struct ccd_softc *); 163 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 164 struct lwp *); 165 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 166 daddr_t, void *, long); 167 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 168 static void ccdgetdisklabel(dev_t); 169 static void ccdmakedisklabel(struct ccd_softc *); 170 static void ccdstart(struct ccd_softc *); 171 static void ccdthread(void *); 172 173 static dev_type_open(ccdopen); 174 static dev_type_close(ccdclose); 175 static dev_type_read(ccdread); 176 static dev_type_write(ccdwrite); 177 static dev_type_ioctl(ccdioctl); 178 static dev_type_strategy(ccdstrategy); 179 static dev_type_size(ccdsize); 180 181 const struct bdevsw ccd_bdevsw = { 182 .d_open = ccdopen, 183 .d_close = ccdclose, 184 .d_strategy = ccdstrategy, 185 .d_ioctl = ccdioctl, 186 .d_dump = nodump, 187 .d_psize = ccdsize, 188 .d_flag = D_DISK | D_MPSAFE 189 }; 190 191 const struct cdevsw ccd_cdevsw = { 192 .d_open = ccdopen, 193 .d_close = ccdclose, 194 .d_read = ccdread, 195 .d_write = ccdwrite, 196 .d_ioctl = ccdioctl, 197 .d_stop = nostop, 198 .d_tty = notty, 199 .d_poll = nopoll, 200 .d_mmap = nommap, 201 .d_kqfilter = nokqfilter, 202 .d_flag = D_DISK | D_MPSAFE 203 }; 204 205 #ifdef DEBUG 206 static void printiinfo(struct ccdiinfo *); 207 #endif 208 209 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 210 static kmutex_t ccd_lock; 211 212 static struct ccd_softc * 213 ccdcreate(int unit) { 214 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 215 if (sc == NULL) { 216 #ifdef DIAGNOSTIC 217 printf("%s: out of memory\n", __func__); 218 #endif 219 return NULL; 220 } 221 /* Initialize per-softc structures. */ 222 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 223 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 224 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 225 cv_init(&sc->sc_stop, "ccdstop"); 226 cv_init(&sc->sc_push, "ccdthr"); 227 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 228 return sc; 229 } 230 231 static void 232 ccddestroy(struct ccd_softc *sc) { 233 mutex_obj_free(sc->sc_iolock); 234 mutex_exit(&sc->sc_dvlock); 235 mutex_destroy(&sc->sc_dvlock); 236 cv_destroy(&sc->sc_stop); 237 cv_destroy(&sc->sc_push); 238 disk_destroy(&sc->sc_dkdev); 239 kmem_free(sc, sizeof(*sc)); 240 } 241 242 static struct ccd_softc * 243 ccdget(int unit) { 244 struct ccd_softc *sc; 245 if (unit < 0) { 246 #ifdef DIAGNOSTIC 247 panic("%s: unit %d!", __func__, unit); 248 #endif 249 return NULL; 250 } 251 mutex_enter(&ccd_lock); 252 LIST_FOREACH(sc, &ccds, sc_link) { 253 if (sc->sc_unit == unit) { 254 mutex_exit(&ccd_lock); 255 return sc; 256 } 257 } 258 mutex_exit(&ccd_lock); 259 if ((sc = ccdcreate(unit)) == NULL) 260 return NULL; 261 mutex_enter(&ccd_lock); 262 LIST_INSERT_HEAD(&ccds, sc, sc_link); 263 mutex_exit(&ccd_lock); 264 return sc; 265 } 266 267 static void 268 ccdput(struct ccd_softc *sc) { 269 mutex_enter(&ccd_lock); 270 LIST_REMOVE(sc, sc_link); 271 mutex_exit(&ccd_lock); 272 ccddestroy(sc); 273 } 274 275 /* 276 * Called by main() during pseudo-device attachment. All we need 277 * to do is allocate enough space for devices to be configured later. 278 */ 279 void 280 ccdattach(int num) 281 { 282 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 283 284 /* Initialize the component buffer pool. */ 285 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 286 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 287 } 288 289 static int 290 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 291 struct lwp *l) 292 { 293 struct ccdcinfo *ci = NULL; 294 int ix; 295 struct vattr va; 296 struct ccdgeom *ccg = &cs->sc_geom; 297 char *tmppath; 298 int error, path_alloced; 299 uint64_t psize, minsize; 300 unsigned secsize, maxsecsize; 301 302 #ifdef DEBUG 303 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 304 printf("%s: ccdinit\n", cs->sc_xname); 305 #endif 306 307 /* Allocate space for the component info. */ 308 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 309 KM_SLEEP); 310 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 311 312 cs->sc_size = 0; 313 314 /* 315 * Verify that each component piece exists and record 316 * relevant information about it. 317 */ 318 maxsecsize = 0; 319 minsize = 0; 320 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 321 ci = &cs->sc_cinfo[ix]; 322 ci->ci_vp = vpp[ix]; 323 324 /* 325 * Copy in the pathname of the component. 326 */ 327 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 328 error = copyinstr(cpaths[ix], tmppath, 329 MAXPATHLEN, &ci->ci_pathlen); 330 if (ci->ci_pathlen == 0) 331 error = EINVAL; 332 if (error) { 333 #ifdef DEBUG 334 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 335 printf("%s: can't copy path, error = %d\n", 336 cs->sc_xname, error); 337 #endif 338 goto out; 339 } 340 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 341 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 342 path_alloced++; 343 344 /* 345 * XXX: Cache the component's dev_t. 346 */ 347 vn_lock(vpp[ix], LK_SHARED | LK_RETRY); 348 error = VOP_GETATTR(vpp[ix], &va, l->l_cred); 349 VOP_UNLOCK(vpp[ix]); 350 if (error != 0) { 351 #ifdef DEBUG 352 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 353 printf("%s: %s: getattr failed %s = %d\n", 354 cs->sc_xname, ci->ci_path, 355 "error", error); 356 #endif 357 goto out; 358 } 359 ci->ci_dev = va.va_rdev; 360 361 /* 362 * Get partition information for the component. 363 */ 364 error = getdisksize(vpp[ix], &psize, &secsize); 365 if (error) { 366 #ifdef DEBUG 367 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 368 printf("%s: %s: disksize failed, error = %d\n", 369 cs->sc_xname, ci->ci_path, error); 370 #endif 371 goto out; 372 } 373 374 /* 375 * Calculate the size, truncating to an interleave 376 * boundary if necessary. 377 */ 378 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 379 if (cs->sc_ileave > 1) 380 psize -= psize % cs->sc_ileave; 381 382 if (psize == 0) { 383 #ifdef DEBUG 384 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 385 printf("%s: %s: size == 0\n", 386 cs->sc_xname, ci->ci_path); 387 #endif 388 error = ENODEV; 389 goto out; 390 } 391 392 if (minsize == 0 || psize < minsize) 393 minsize = psize; 394 ci->ci_size = psize; 395 cs->sc_size += psize; 396 } 397 398 /* 399 * Don't allow the interleave to be smaller than 400 * the biggest component sector. 401 */ 402 if ((cs->sc_ileave > 0) && 403 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 404 #ifdef DEBUG 405 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 406 printf("%s: interleave must be at least %d\n", 407 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 408 #endif 409 error = EINVAL; 410 goto out; 411 } 412 413 /* 414 * If uniform interleave is desired set all sizes to that of 415 * the smallest component. 416 */ 417 if (cs->sc_flags & CCDF_UNIFORM) { 418 for (ci = cs->sc_cinfo; 419 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 420 ci->ci_size = minsize; 421 422 cs->sc_size = cs->sc_nccdisks * minsize; 423 } 424 425 /* 426 * Construct the interleave table. 427 */ 428 ccdinterleave(cs); 429 430 /* 431 * Create pseudo-geometry based on 1MB cylinders. It's 432 * pretty close. 433 */ 434 ccg->ccg_secsize = DEV_BSIZE; 435 ccg->ccg_ntracks = 1; 436 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 437 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 438 439 /* 440 * Create thread to handle deferred I/O. 441 */ 442 cs->sc_zap = false; 443 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 444 cs, &cs->sc_thread, "%s", cs->sc_xname); 445 if (error) { 446 printf("ccdinit: can't create thread: %d\n", error); 447 goto out; 448 } 449 450 /* 451 * Only now that everything is set up can we enable the device. 452 */ 453 mutex_enter(cs->sc_iolock); 454 cs->sc_flags |= CCDF_INITED; 455 mutex_exit(cs->sc_iolock); 456 kmem_free(tmppath, MAXPATHLEN); 457 return (0); 458 459 out: 460 for (ix = 0; ix < path_alloced; ix++) { 461 kmem_free(cs->sc_cinfo[ix].ci_path, 462 cs->sc_cinfo[ix].ci_pathlen); 463 } 464 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 465 kmem_free(tmppath, MAXPATHLEN); 466 return (error); 467 } 468 469 static void 470 ccdinterleave(struct ccd_softc *cs) 471 { 472 struct ccdcinfo *ci, *smallci; 473 struct ccdiinfo *ii; 474 daddr_t bn, lbn; 475 int ix; 476 u_long size; 477 478 #ifdef DEBUG 479 if (ccddebug & CCDB_INIT) 480 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 481 #endif 482 /* 483 * Allocate an interleave table. 484 * Chances are this is too big, but we don't care. 485 */ 486 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 487 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 488 489 /* 490 * Trivial case: no interleave (actually interleave of disk size). 491 * Each table entry represents a single component in its entirety. 492 */ 493 if (cs->sc_ileave == 0) { 494 bn = 0; 495 ii = cs->sc_itable; 496 497 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 498 /* Allocate space for ii_index. */ 499 ii->ii_indexsz = sizeof(int); 500 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 501 ii->ii_ndisk = 1; 502 ii->ii_startblk = bn; 503 ii->ii_startoff = 0; 504 ii->ii_index[0] = ix; 505 bn += cs->sc_cinfo[ix].ci_size; 506 ii++; 507 } 508 ii->ii_ndisk = 0; 509 #ifdef DEBUG 510 if (ccddebug & CCDB_INIT) 511 printiinfo(cs->sc_itable); 512 #endif 513 return; 514 } 515 516 /* 517 * The following isn't fast or pretty; it doesn't have to be. 518 */ 519 size = 0; 520 bn = lbn = 0; 521 for (ii = cs->sc_itable; ; ii++) { 522 /* Allocate space for ii_index. */ 523 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 524 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 525 526 /* 527 * Locate the smallest of the remaining components 528 */ 529 smallci = NULL; 530 for (ci = cs->sc_cinfo; 531 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 532 if (ci->ci_size > size && 533 (smallci == NULL || 534 ci->ci_size < smallci->ci_size)) 535 smallci = ci; 536 537 /* 538 * Nobody left, all done 539 */ 540 if (smallci == NULL) { 541 ii->ii_ndisk = 0; 542 break; 543 } 544 545 /* 546 * Record starting logical block and component offset 547 */ 548 ii->ii_startblk = bn / cs->sc_ileave; 549 ii->ii_startoff = lbn; 550 551 /* 552 * Determine how many disks take part in this interleave 553 * and record their indices. 554 */ 555 ix = 0; 556 for (ci = cs->sc_cinfo; 557 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 558 if (ci->ci_size >= smallci->ci_size) 559 ii->ii_index[ix++] = ci - cs->sc_cinfo; 560 ii->ii_ndisk = ix; 561 bn += ix * (smallci->ci_size - size); 562 lbn = smallci->ci_size / cs->sc_ileave; 563 size = smallci->ci_size; 564 } 565 #ifdef DEBUG 566 if (ccddebug & CCDB_INIT) 567 printiinfo(cs->sc_itable); 568 #endif 569 } 570 571 /* ARGSUSED */ 572 static int 573 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 574 { 575 int unit = ccdunit(dev); 576 struct ccd_softc *cs; 577 struct disklabel *lp; 578 int error = 0, part, pmask; 579 580 #ifdef DEBUG 581 if (ccddebug & CCDB_FOLLOW) 582 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 583 #endif 584 if ((cs = ccdget(unit)) == NULL) 585 return ENXIO; 586 587 mutex_enter(&cs->sc_dvlock); 588 589 lp = cs->sc_dkdev.dk_label; 590 591 part = DISKPART(dev); 592 pmask = (1 << part); 593 594 /* 595 * If we're initialized, check to see if there are any other 596 * open partitions. If not, then it's safe to update 597 * the in-core disklabel. Only read the disklabel if it is 598 * not already valid. 599 */ 600 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 601 cs->sc_dkdev.dk_openmask == 0) 602 ccdgetdisklabel(dev); 603 604 /* Check that the partition exists. */ 605 if (part != RAW_PART) { 606 if (((cs->sc_flags & CCDF_INITED) == 0) || 607 ((part >= lp->d_npartitions) || 608 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 609 error = ENXIO; 610 goto done; 611 } 612 } 613 614 /* Prevent our unit from being unconfigured while open. */ 615 switch (fmt) { 616 case S_IFCHR: 617 cs->sc_dkdev.dk_copenmask |= pmask; 618 break; 619 620 case S_IFBLK: 621 cs->sc_dkdev.dk_bopenmask |= pmask; 622 break; 623 } 624 cs->sc_dkdev.dk_openmask = 625 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 626 627 done: 628 mutex_exit(&cs->sc_dvlock); 629 return (error); 630 } 631 632 /* ARGSUSED */ 633 static int 634 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 635 { 636 int unit = ccdunit(dev); 637 struct ccd_softc *cs; 638 int part; 639 640 #ifdef DEBUG 641 if (ccddebug & CCDB_FOLLOW) 642 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 643 #endif 644 645 if ((cs = ccdget(unit)) == NULL) 646 return ENXIO; 647 648 mutex_enter(&cs->sc_dvlock); 649 650 part = DISKPART(dev); 651 652 /* ...that much closer to allowing unconfiguration... */ 653 switch (fmt) { 654 case S_IFCHR: 655 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 656 break; 657 658 case S_IFBLK: 659 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 660 break; 661 } 662 cs->sc_dkdev.dk_openmask = 663 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 664 665 if (cs->sc_dkdev.dk_openmask == 0) { 666 if ((cs->sc_flags & CCDF_KLABEL) == 0) 667 cs->sc_flags &= ~CCDF_VLABEL; 668 } 669 670 mutex_exit(&cs->sc_dvlock); 671 return (0); 672 } 673 674 static bool 675 ccdbackoff(struct ccd_softc *cs) 676 { 677 678 /* XXX Arbitrary, should be a uvm call. */ 679 return uvmexp.free < (uvmexp.freemin >> 1) && 680 disk_isbusy(&cs->sc_dkdev); 681 } 682 683 static void 684 ccdthread(void *cookie) 685 { 686 struct ccd_softc *cs; 687 688 cs = cookie; 689 690 #ifdef DEBUG 691 if (ccddebug & CCDB_FOLLOW) 692 printf("ccdthread: hello\n"); 693 #endif 694 695 mutex_enter(cs->sc_iolock); 696 while (__predict_true(!cs->sc_zap)) { 697 if (bufq_peek(cs->sc_bufq) == NULL) { 698 /* Nothing to do. */ 699 cv_wait(&cs->sc_push, cs->sc_iolock); 700 continue; 701 } 702 if (ccdbackoff(cs)) { 703 /* Wait for memory to become available. */ 704 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 705 continue; 706 } 707 #ifdef DEBUG 708 if (ccddebug & CCDB_FOLLOW) 709 printf("ccdthread: dispatching I/O\n"); 710 #endif 711 ccdstart(cs); 712 mutex_enter(cs->sc_iolock); 713 } 714 cs->sc_thread = NULL; 715 mutex_exit(cs->sc_iolock); 716 #ifdef DEBUG 717 if (ccddebug & CCDB_FOLLOW) 718 printf("ccdthread: goodbye\n"); 719 #endif 720 kthread_exit(0); 721 } 722 723 static void 724 ccdstrategy(struct buf *bp) 725 { 726 int unit = ccdunit(bp->b_dev); 727 struct ccd_softc *cs; 728 if ((cs = ccdget(unit)) == NULL) 729 return; 730 731 /* Must be open or reading label. */ 732 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 733 (cs->sc_flags & CCDF_RLABEL) != 0); 734 735 mutex_enter(cs->sc_iolock); 736 /* Synchronize with device init/uninit. */ 737 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 738 mutex_exit(cs->sc_iolock); 739 #ifdef DEBUG 740 if (ccddebug & CCDB_FOLLOW) 741 printf("ccdstrategy: unit %d: not inited\n", unit); 742 #endif 743 bp->b_error = ENXIO; 744 bp->b_resid = bp->b_bcount; 745 biodone(bp); 746 return; 747 } 748 749 /* Defer to thread if system is low on memory. */ 750 bufq_put(cs->sc_bufq, bp); 751 if (__predict_false(ccdbackoff(cs))) { 752 mutex_exit(cs->sc_iolock); 753 #ifdef DEBUG 754 if (ccddebug & CCDB_FOLLOW) 755 printf("ccdstrategy: holding off on I/O\n"); 756 #endif 757 return; 758 } 759 ccdstart(cs); 760 } 761 762 static void 763 ccdstart(struct ccd_softc *cs) 764 { 765 daddr_t blkno; 766 int wlabel; 767 struct disklabel *lp; 768 long bcount, rcount; 769 struct ccdbuf *cbp; 770 char *addr; 771 daddr_t bn; 772 vnode_t *vp; 773 buf_t *bp; 774 775 KASSERT(mutex_owned(cs->sc_iolock)); 776 777 disk_busy(&cs->sc_dkdev); 778 bp = bufq_get(cs->sc_bufq); 779 KASSERT(bp != NULL); 780 781 #ifdef DEBUG 782 if (ccddebug & CCDB_FOLLOW) 783 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 784 #endif 785 786 /* If it's a nil transfer, wake up the top half now. */ 787 if (bp->b_bcount == 0) 788 goto done; 789 790 lp = cs->sc_dkdev.dk_label; 791 792 /* 793 * Do bounds checking and adjust transfer. If there's an 794 * error, the bounds check will flag that for us. Convert 795 * the partition relative block number to an absolute. 796 */ 797 blkno = bp->b_blkno; 798 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 799 if (DISKPART(bp->b_dev) != RAW_PART) { 800 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 801 goto done; 802 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 803 } 804 mutex_exit(cs->sc_iolock); 805 bp->b_rawblkno = blkno; 806 807 /* Allocate the component buffers and start I/O! */ 808 bp->b_resid = bp->b_bcount; 809 bn = bp->b_rawblkno; 810 addr = bp->b_data; 811 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 812 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 813 rcount = cbp->cb_buf.b_bcount; 814 bn += btodb(rcount); 815 addr += rcount; 816 vp = cbp->cb_buf.b_vp; 817 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 818 mutex_enter(vp->v_interlock); 819 vp->v_numoutput++; 820 mutex_exit(vp->v_interlock); 821 } 822 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 823 } 824 return; 825 826 done: 827 disk_unbusy(&cs->sc_dkdev, 0, 0); 828 cv_broadcast(&cs->sc_stop); 829 cv_broadcast(&cs->sc_push); 830 mutex_exit(cs->sc_iolock); 831 bp->b_resid = bp->b_bcount; 832 biodone(bp); 833 } 834 835 /* 836 * Build a component buffer header. 837 */ 838 static struct ccdbuf * 839 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 840 long bcount) 841 { 842 struct ccdcinfo *ci; 843 struct ccdbuf *cbp; 844 daddr_t cbn, cboff; 845 u_int64_t cbc; 846 int ccdisk; 847 848 #ifdef DEBUG 849 if (ccddebug & CCDB_IO) 850 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 851 cs, bp, bn, addr, bcount); 852 #endif 853 /* 854 * Determine which component bn falls in. 855 */ 856 cbn = bn; 857 cboff = 0; 858 859 /* 860 * Serially concatenated 861 */ 862 if (cs->sc_ileave == 0) { 863 daddr_t sblk; 864 865 sblk = 0; 866 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 867 cbn >= sblk + ci->ci_size; 868 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 869 sblk += ci->ci_size; 870 cbn -= sblk; 871 } 872 /* 873 * Interleaved 874 */ 875 else { 876 struct ccdiinfo *ii; 877 int off; 878 879 cboff = cbn % cs->sc_ileave; 880 cbn /= cs->sc_ileave; 881 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 882 if (ii->ii_startblk > cbn) 883 break; 884 ii--; 885 off = cbn - ii->ii_startblk; 886 if (ii->ii_ndisk == 1) { 887 ccdisk = ii->ii_index[0]; 888 cbn = ii->ii_startoff + off; 889 } else { 890 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 891 cbn = ii->ii_startoff + off / ii->ii_ndisk; 892 } 893 cbn *= cs->sc_ileave; 894 ci = &cs->sc_cinfo[ccdisk]; 895 } 896 897 /* 898 * Fill in the component buf structure. 899 */ 900 cbp = CCD_GETBUF(); 901 KASSERT(cbp != NULL); 902 buf_init(&cbp->cb_buf); 903 cbp->cb_buf.b_flags = bp->b_flags; 904 cbp->cb_buf.b_oflags = bp->b_oflags; 905 cbp->cb_buf.b_cflags = bp->b_cflags; 906 cbp->cb_buf.b_iodone = ccdiodone; 907 cbp->cb_buf.b_proc = bp->b_proc; 908 cbp->cb_buf.b_dev = ci->ci_dev; 909 cbp->cb_buf.b_blkno = cbn + cboff; 910 cbp->cb_buf.b_data = addr; 911 cbp->cb_buf.b_vp = ci->ci_vp; 912 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 913 if (cs->sc_ileave == 0) 914 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 915 else 916 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 917 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 918 919 /* 920 * context for ccdiodone 921 */ 922 cbp->cb_obp = bp; 923 cbp->cb_sc = cs; 924 cbp->cb_comp = ccdisk; 925 926 BIO_COPYPRIO(&cbp->cb_buf, bp); 927 928 #ifdef DEBUG 929 if (ccddebug & CCDB_IO) 930 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 931 " bcnt %d\n", 932 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 933 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 934 cbp->cb_buf.b_bcount); 935 #endif 936 937 return (cbp); 938 } 939 940 /* 941 * Called at interrupt time. 942 * Mark the component as done and if all components are done, 943 * take a ccd interrupt. 944 */ 945 static void 946 ccdiodone(struct buf *vbp) 947 { 948 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 949 struct buf *bp = cbp->cb_obp; 950 struct ccd_softc *cs = cbp->cb_sc; 951 int count; 952 953 #ifdef DEBUG 954 if (ccddebug & CCDB_FOLLOW) 955 printf("ccdiodone(%p)\n", cbp); 956 if (ccddebug & CCDB_IO) { 957 printf("ccdiodone: bp %p bcount %d resid %d\n", 958 bp, bp->b_bcount, bp->b_resid); 959 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 960 " bcnt %d\n", 961 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 962 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 963 cbp->cb_buf.b_bcount); 964 } 965 #endif 966 967 if (cbp->cb_buf.b_error != 0) { 968 bp->b_error = cbp->cb_buf.b_error; 969 printf("%s: error %d on component %d\n", 970 cs->sc_xname, bp->b_error, cbp->cb_comp); 971 } 972 count = cbp->cb_buf.b_bcount; 973 buf_destroy(&cbp->cb_buf); 974 CCD_PUTBUF(cbp); 975 976 /* 977 * If all done, "interrupt". 978 */ 979 mutex_enter(cs->sc_iolock); 980 bp->b_resid -= count; 981 if (bp->b_resid < 0) 982 panic("ccdiodone: count"); 983 if (bp->b_resid == 0) { 984 /* 985 * Request is done for better or worse, wakeup the top half. 986 */ 987 if (bp->b_error != 0) 988 bp->b_resid = bp->b_bcount; 989 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 990 (bp->b_flags & B_READ)); 991 if (!disk_isbusy(&cs->sc_dkdev)) { 992 if (bufq_peek(cs->sc_bufq) != NULL) { 993 cv_broadcast(&cs->sc_push); 994 } 995 cv_broadcast(&cs->sc_stop); 996 } 997 mutex_exit(cs->sc_iolock); 998 biodone(bp); 999 } else 1000 mutex_exit(cs->sc_iolock); 1001 } 1002 1003 /* ARGSUSED */ 1004 static int 1005 ccdread(dev_t dev, struct uio *uio, int flags) 1006 { 1007 int unit = ccdunit(dev); 1008 struct ccd_softc *cs; 1009 1010 #ifdef DEBUG 1011 if (ccddebug & CCDB_FOLLOW) 1012 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1013 #endif 1014 if ((cs = ccdget(unit)) == NULL) 1015 return 0; 1016 1017 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1018 if ((cs->sc_flags & CCDF_INITED) == 0) 1019 return (ENXIO); 1020 1021 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1022 } 1023 1024 /* ARGSUSED */ 1025 static int 1026 ccdwrite(dev_t dev, struct uio *uio, int flags) 1027 { 1028 int unit = ccdunit(dev); 1029 struct ccd_softc *cs; 1030 1031 #ifdef DEBUG 1032 if (ccddebug & CCDB_FOLLOW) 1033 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1034 #endif 1035 if ((cs = ccdget(unit)) == NULL) 1036 return ENOENT; 1037 1038 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1039 if ((cs->sc_flags & CCDF_INITED) == 0) 1040 return (ENXIO); 1041 1042 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1043 } 1044 1045 static int 1046 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1047 { 1048 int unit = ccdunit(dev); 1049 int i, j, lookedup = 0, error = 0; 1050 int part, pmask; 1051 struct ccd_softc *cs; 1052 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1053 kauth_cred_t uc; 1054 char **cpp; 1055 struct pathbuf *pb; 1056 struct vnode **vpp; 1057 #ifdef __HAVE_OLD_DISKLABEL 1058 struct disklabel newlabel; 1059 #endif 1060 1061 if ((cs = ccdget(unit)) == NULL) 1062 return ENOENT; 1063 uc = kauth_cred_get(); 1064 1065 /* Must be open for writes for these commands... */ 1066 switch (cmd) { 1067 case CCDIOCSET: 1068 case CCDIOCCLR: 1069 case DIOCSDINFO: 1070 case DIOCWDINFO: 1071 #ifdef __HAVE_OLD_DISKLABEL 1072 case ODIOCSDINFO: 1073 case ODIOCWDINFO: 1074 #endif 1075 case DIOCKLABEL: 1076 case DIOCWLABEL: 1077 if ((flag & FWRITE) == 0) 1078 return (EBADF); 1079 } 1080 1081 mutex_enter(&cs->sc_dvlock); 1082 1083 /* Must be initialized for these... */ 1084 switch (cmd) { 1085 case CCDIOCCLR: 1086 case DIOCGDINFO: 1087 case DIOCCACHESYNC: 1088 case DIOCSDINFO: 1089 case DIOCWDINFO: 1090 case DIOCGPART: 1091 case DIOCWLABEL: 1092 case DIOCKLABEL: 1093 case DIOCGDEFLABEL: 1094 #ifdef __HAVE_OLD_DISKLABEL 1095 case ODIOCGDINFO: 1096 case ODIOCSDINFO: 1097 case ODIOCWDINFO: 1098 case ODIOCGDEFLABEL: 1099 #endif 1100 if ((cs->sc_flags & CCDF_INITED) == 0) { 1101 error = ENXIO; 1102 goto out; 1103 } 1104 } 1105 1106 switch (cmd) { 1107 case CCDIOCSET: 1108 if (cs->sc_flags & CCDF_INITED) { 1109 error = EBUSY; 1110 goto out; 1111 } 1112 1113 /* Validate the flags. */ 1114 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1115 error = EINVAL; 1116 goto out; 1117 } 1118 1119 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1120 ccio->ccio_ndisks == 0) { 1121 error = EINVAL; 1122 goto out; 1123 } 1124 1125 /* Fill in some important bits. */ 1126 cs->sc_ileave = ccio->ccio_ileave; 1127 cs->sc_nccdisks = ccio->ccio_ndisks; 1128 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1129 1130 /* 1131 * Allocate space for and copy in the array of 1132 * componet pathnames and device numbers. 1133 */ 1134 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1135 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1136 error = copyin(ccio->ccio_disks, cpp, 1137 ccio->ccio_ndisks * sizeof(*cpp)); 1138 if (error) { 1139 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1140 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1141 goto out; 1142 } 1143 1144 #ifdef DEBUG 1145 if (ccddebug & CCDB_INIT) 1146 for (i = 0; i < ccio->ccio_ndisks; ++i) 1147 printf("ccdioctl: component %d: %p\n", 1148 i, cpp[i]); 1149 #endif 1150 1151 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1152 #ifdef DEBUG 1153 if (ccddebug & CCDB_INIT) 1154 printf("ccdioctl: lookedup = %d\n", lookedup); 1155 #endif 1156 error = pathbuf_copyin(cpp[i], &pb); 1157 if (error == 0) { 1158 error = dk_lookup(pb, l, &vpp[i]); 1159 } 1160 pathbuf_destroy(pb); 1161 if (error != 0) { 1162 for (j = 0; j < lookedup; ++j) 1163 (void)vn_close(vpp[j], FREAD|FWRITE, 1164 uc); 1165 kmem_free(vpp, ccio->ccio_ndisks * 1166 sizeof(*vpp)); 1167 kmem_free(cpp, ccio->ccio_ndisks * 1168 sizeof(*cpp)); 1169 goto out; 1170 } 1171 ++lookedup; 1172 } 1173 1174 /* Attach the disk. */ 1175 disk_attach(&cs->sc_dkdev); 1176 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1177 1178 /* 1179 * Initialize the ccd. Fills in the softc for us. 1180 */ 1181 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1182 for (j = 0; j < lookedup; ++j) 1183 (void)vn_close(vpp[j], FREAD|FWRITE, 1184 uc); 1185 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1186 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1187 disk_detach(&cs->sc_dkdev); 1188 bufq_free(cs->sc_bufq); 1189 goto out; 1190 } 1191 1192 /* We can free the temporary variables now. */ 1193 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1194 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1195 1196 /* 1197 * The ccd has been successfully initialized, so 1198 * we can place it into the array. Don't try to 1199 * read the disklabel until the disk has been attached, 1200 * because space for the disklabel is allocated 1201 * in disk_attach(); 1202 */ 1203 ccio->ccio_unit = unit; 1204 ccio->ccio_size = cs->sc_size; 1205 1206 /* Try and read the disklabel. */ 1207 ccdgetdisklabel(dev); 1208 break; 1209 1210 case CCDIOCCLR: 1211 /* 1212 * Don't unconfigure if any other partitions are open 1213 * or if both the character and block flavors of this 1214 * partition are open. 1215 */ 1216 part = DISKPART(dev); 1217 pmask = (1 << part); 1218 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1219 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1220 (cs->sc_dkdev.dk_copenmask & pmask))) { 1221 error = EBUSY; 1222 goto out; 1223 } 1224 1225 /* Stop new I/O, wait for in-flight I/O to complete. */ 1226 mutex_enter(cs->sc_iolock); 1227 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1228 cs->sc_zap = true; 1229 while (disk_isbusy(&cs->sc_dkdev) || 1230 bufq_peek(cs->sc_bufq) != NULL || 1231 cs->sc_thread != NULL) { 1232 cv_broadcast(&cs->sc_push); 1233 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1234 } 1235 mutex_exit(cs->sc_iolock); 1236 1237 /* 1238 * Free ccd_softc information and clear entry. 1239 */ 1240 1241 /* Close the components and free their pathnames. */ 1242 for (i = 0; i < cs->sc_nccdisks; ++i) { 1243 /* 1244 * XXX: this close could potentially fail and 1245 * cause Bad Things. Maybe we need to force 1246 * the close to happen? 1247 */ 1248 #ifdef DEBUG 1249 if (ccddebug & CCDB_VNODE) 1250 vprint("CCDIOCCLR: vnode info", 1251 cs->sc_cinfo[i].ci_vp); 1252 #endif 1253 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1254 uc); 1255 kmem_free(cs->sc_cinfo[i].ci_path, 1256 cs->sc_cinfo[i].ci_pathlen); 1257 } 1258 1259 /* Free interleave index. */ 1260 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1261 kmem_free(cs->sc_itable[i].ii_index, 1262 cs->sc_itable[i].ii_indexsz); 1263 } 1264 1265 /* Free component info and interleave table. */ 1266 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1267 sizeof(struct ccdcinfo)); 1268 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1269 sizeof(struct ccdiinfo)); 1270 1271 /* Detatch the disk. */ 1272 disk_detach(&cs->sc_dkdev); 1273 bufq_free(cs->sc_bufq); 1274 ccdput(cs); 1275 /* Don't break, otherwise cs is read again. */ 1276 return 0; 1277 1278 case DIOCGDINFO: 1279 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label); 1280 break; 1281 1282 #ifdef __HAVE_OLD_DISKLABEL 1283 case ODIOCGDINFO: 1284 newlabel = *(cs->sc_dkdev.dk_label); 1285 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1286 return ENOTTY; 1287 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1288 break; 1289 #endif 1290 1291 case DIOCGPART: 1292 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label; 1293 ((struct partinfo *)data)->part = 1294 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1295 break; 1296 1297 case DIOCCACHESYNC: 1298 /* 1299 * XXX Do we really need to care about having a writable 1300 * file descriptor here? 1301 */ 1302 if ((flag & FWRITE) == 0) 1303 return (EBADF); 1304 1305 /* 1306 * We pass this call down to all components and report 1307 * the first error we encounter. 1308 */ 1309 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1310 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1311 flag, uc); 1312 if (j != 0 && error == 0) 1313 error = j; 1314 } 1315 break; 1316 1317 case DIOCWDINFO: 1318 case DIOCSDINFO: 1319 #ifdef __HAVE_OLD_DISKLABEL 1320 case ODIOCWDINFO: 1321 case ODIOCSDINFO: 1322 #endif 1323 { 1324 struct disklabel *lp; 1325 #ifdef __HAVE_OLD_DISKLABEL 1326 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1327 memset(&newlabel, 0, sizeof newlabel); 1328 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1329 lp = &newlabel; 1330 } else 1331 #endif 1332 lp = (struct disklabel *)data; 1333 1334 cs->sc_flags |= CCDF_LABELLING; 1335 1336 error = setdisklabel(cs->sc_dkdev.dk_label, 1337 lp, 0, cs->sc_dkdev.dk_cpulabel); 1338 if (error == 0) { 1339 if (cmd == DIOCWDINFO 1340 #ifdef __HAVE_OLD_DISKLABEL 1341 || cmd == ODIOCWDINFO 1342 #endif 1343 ) 1344 error = writedisklabel(CCDLABELDEV(dev), 1345 ccdstrategy, cs->sc_dkdev.dk_label, 1346 cs->sc_dkdev.dk_cpulabel); 1347 } 1348 1349 cs->sc_flags &= ~CCDF_LABELLING; 1350 break; 1351 } 1352 1353 case DIOCKLABEL: 1354 if (*(int *)data != 0) 1355 cs->sc_flags |= CCDF_KLABEL; 1356 else 1357 cs->sc_flags &= ~CCDF_KLABEL; 1358 break; 1359 1360 case DIOCWLABEL: 1361 if (*(int *)data != 0) 1362 cs->sc_flags |= CCDF_WLABEL; 1363 else 1364 cs->sc_flags &= ~CCDF_WLABEL; 1365 break; 1366 1367 case DIOCGDEFLABEL: 1368 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1369 break; 1370 1371 #ifdef __HAVE_OLD_DISKLABEL 1372 case ODIOCGDEFLABEL: 1373 ccdgetdefaultlabel(cs, &newlabel); 1374 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1375 return ENOTTY; 1376 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1377 break; 1378 #endif 1379 1380 default: 1381 error = ENOTTY; 1382 } 1383 1384 out: 1385 mutex_exit(&cs->sc_dvlock); 1386 return (error); 1387 } 1388 1389 static int 1390 ccdsize(dev_t dev) 1391 { 1392 struct ccd_softc *cs; 1393 struct disklabel *lp; 1394 int part, unit, omask, size; 1395 1396 unit = ccdunit(dev); 1397 if ((cs = ccdget(unit)) == NULL) 1398 return -1; 1399 1400 if ((cs->sc_flags & CCDF_INITED) == 0) 1401 return (-1); 1402 1403 part = DISKPART(dev); 1404 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1405 lp = cs->sc_dkdev.dk_label; 1406 1407 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1408 return (-1); 1409 1410 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1411 size = -1; 1412 else 1413 size = lp->d_partitions[part].p_size * 1414 (lp->d_secsize / DEV_BSIZE); 1415 1416 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1417 return (-1); 1418 1419 return (size); 1420 } 1421 1422 static void 1423 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1424 { 1425 struct ccdgeom *ccg = &cs->sc_geom; 1426 1427 memset(lp, 0, sizeof(*lp)); 1428 1429 lp->d_secperunit = cs->sc_size; 1430 lp->d_secsize = ccg->ccg_secsize; 1431 lp->d_nsectors = ccg->ccg_nsectors; 1432 lp->d_ntracks = ccg->ccg_ntracks; 1433 lp->d_ncylinders = ccg->ccg_ncylinders; 1434 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1435 1436 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1437 lp->d_type = DTYPE_CCD; 1438 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1439 lp->d_rpm = 3600; 1440 lp->d_interleave = 1; 1441 lp->d_flags = 0; 1442 1443 lp->d_partitions[RAW_PART].p_offset = 0; 1444 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1445 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1446 lp->d_npartitions = RAW_PART + 1; 1447 1448 lp->d_magic = DISKMAGIC; 1449 lp->d_magic2 = DISKMAGIC; 1450 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1451 } 1452 1453 /* 1454 * Read the disklabel from the ccd. If one is not present, fake one 1455 * up. 1456 */ 1457 static void 1458 ccdgetdisklabel(dev_t dev) 1459 { 1460 int unit = ccdunit(dev); 1461 struct ccd_softc *cs; 1462 const char *errstring; 1463 struct disklabel *lp; 1464 struct cpu_disklabel *clp; 1465 1466 if ((cs = ccdget(unit)) == NULL) 1467 return; 1468 lp = cs->sc_dkdev.dk_label; 1469 clp = cs->sc_dkdev.dk_cpulabel; 1470 KASSERT(mutex_owned(&cs->sc_dvlock)); 1471 1472 memset(clp, 0, sizeof(*clp)); 1473 1474 ccdgetdefaultlabel(cs, lp); 1475 1476 /* 1477 * Call the generic disklabel extraction routine. 1478 */ 1479 cs->sc_flags |= CCDF_RLABEL; 1480 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1481 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1482 else 1483 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1484 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1485 if (errstring) 1486 ccdmakedisklabel(cs); 1487 else { 1488 int i; 1489 struct partition *pp; 1490 1491 /* 1492 * Sanity check whether the found disklabel is valid. 1493 * 1494 * This is necessary since total size of ccd may vary 1495 * when an interleave is changed even though exactly 1496 * same componets are used, and old disklabel may used 1497 * if that is found. 1498 */ 1499 if (lp->d_secperunit != cs->sc_size) 1500 printf("WARNING: %s: " 1501 "total sector size in disklabel (%d) != " 1502 "the size of ccd (%lu)\n", cs->sc_xname, 1503 lp->d_secperunit, (u_long)cs->sc_size); 1504 for (i = 0; i < lp->d_npartitions; i++) { 1505 pp = &lp->d_partitions[i]; 1506 if (pp->p_offset + pp->p_size > cs->sc_size) 1507 printf("WARNING: %s: end of partition `%c' " 1508 "exceeds the size of ccd (%lu)\n", 1509 cs->sc_xname, 'a' + i, (u_long)cs->sc_size); 1510 } 1511 } 1512 1513 #ifdef DEBUG 1514 /* It's actually extremely common to have unlabeled ccds. */ 1515 if (ccddebug & CCDB_LABEL) 1516 if (errstring != NULL) 1517 printf("%s: %s\n", cs->sc_xname, errstring); 1518 #endif 1519 1520 /* In-core label now valid. */ 1521 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1522 } 1523 1524 /* 1525 * Take care of things one might want to take care of in the event 1526 * that a disklabel isn't present. 1527 */ 1528 static void 1529 ccdmakedisklabel(struct ccd_softc *cs) 1530 { 1531 struct disklabel *lp = cs->sc_dkdev.dk_label; 1532 1533 /* 1534 * For historical reasons, if there's no disklabel present 1535 * the raw partition must be marked FS_BSDFFS. 1536 */ 1537 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1538 1539 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1540 1541 lp->d_checksum = dkcksum(lp); 1542 } 1543 1544 #ifdef DEBUG 1545 static void 1546 printiinfo(struct ccdiinfo *ii) 1547 { 1548 int ix, i; 1549 1550 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1551 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1552 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1553 for (i = 0; i < ii->ii_ndisk; i++) 1554 printf(" %d", ii->ii_index[i]); 1555 printf("\n"); 1556 } 1557 } 1558 #endif 1559 1560 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1561 1562 static int 1563 ccd_modcmd(modcmd_t cmd, void *arg) 1564 { 1565 int error = 0; 1566 #ifdef _MODULE 1567 int bmajor = -1, cmajor = -1; 1568 #endif 1569 1570 1571 switch (cmd) { 1572 case MODULE_CMD_INIT: 1573 #ifdef _MODULE 1574 ccdattach(4); 1575 1576 return devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1577 &ccd_cdevsw, &cmajor); 1578 #endif 1579 break; 1580 1581 case MODULE_CMD_FINI: 1582 #ifdef _MODULE 1583 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1584 #endif 1585 break; 1586 1587 case MODULE_CMD_STAT: 1588 return ENOTTY; 1589 1590 default: 1591 return ENOTTY; 1592 } 1593 1594 return error; 1595 } 1596 1597 static int 1598 ccd_units_sysctl(SYSCTLFN_ARGS) 1599 { 1600 struct sysctlnode node; 1601 struct ccd_softc *sc; 1602 int error, i, nccd, *units; 1603 size_t size; 1604 1605 nccd = 0; 1606 mutex_enter(&ccd_lock); 1607 LIST_FOREACH(sc, &ccds, sc_link) 1608 nccd++; 1609 mutex_exit(&ccd_lock); 1610 1611 if (nccd != 0) { 1612 size = nccd * sizeof(*units); 1613 units = kmem_zalloc(size, KM_SLEEP); 1614 if (units == NULL) 1615 return ENOMEM; 1616 1617 i = 0; 1618 mutex_enter(&ccd_lock); 1619 LIST_FOREACH(sc, &ccds, sc_link) { 1620 if (i >= nccd) 1621 break; 1622 units[i] = sc->sc_unit; 1623 } 1624 mutex_exit(&ccd_lock); 1625 } else { 1626 units = NULL; 1627 size = 0; 1628 } 1629 1630 node = *rnode; 1631 node.sysctl_data = units; 1632 node.sysctl_size = size; 1633 1634 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1635 if (units) 1636 kmem_free(units, size); 1637 return error; 1638 } 1639 1640 static int 1641 ccd_info_sysctl(SYSCTLFN_ARGS) 1642 { 1643 struct sysctlnode node; 1644 struct ccddiskinfo ccd; 1645 struct ccd_softc *sc; 1646 int unit; 1647 1648 if (newp == NULL || newlen != sizeof(int)) 1649 return EINVAL; 1650 1651 unit = *(const int *)newp; 1652 newp = NULL; 1653 newlen = 0; 1654 ccd.ccd_ndisks = ~0; 1655 mutex_enter(&ccd_lock); 1656 LIST_FOREACH(sc, &ccds, sc_link) { 1657 if (sc->sc_unit == unit) { 1658 ccd.ccd_ileave = sc->sc_ileave; 1659 ccd.ccd_size = sc->sc_size; 1660 ccd.ccd_ndisks = sc->sc_nccdisks; 1661 ccd.ccd_flags = sc->sc_flags; 1662 break; 1663 } 1664 } 1665 mutex_exit(&ccd_lock); 1666 1667 if (ccd.ccd_ndisks == ~0) 1668 return ENOENT; 1669 1670 node = *rnode; 1671 node.sysctl_data = &ccd; 1672 node.sysctl_size = sizeof(ccd); 1673 1674 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1675 } 1676 1677 static int 1678 ccd_components_sysctl(SYSCTLFN_ARGS) 1679 { 1680 struct sysctlnode node; 1681 int error, unit; 1682 size_t size; 1683 char *names, *p, *ep; 1684 struct ccd_softc *sc; 1685 1686 if (newp == NULL || newlen != sizeof(int)) 1687 return EINVAL; 1688 1689 size = 0; 1690 unit = *(const int *)newp; 1691 newp = NULL; 1692 newlen = 0; 1693 mutex_enter(&ccd_lock); 1694 LIST_FOREACH(sc, &ccds, sc_link) 1695 if (sc->sc_unit == unit) { 1696 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1697 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1698 break; 1699 } 1700 mutex_exit(&ccd_lock); 1701 1702 if (size == 0) 1703 return ENOENT; 1704 names = kmem_zalloc(size, KM_SLEEP); 1705 if (names == NULL) 1706 return ENOMEM; 1707 1708 p = names; 1709 ep = names + size; 1710 mutex_enter(&ccd_lock); 1711 LIST_FOREACH(sc, &ccds, sc_link) 1712 if (sc->sc_unit == unit) { 1713 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1714 char *d = sc->sc_cinfo[i].ci_path; 1715 while (p < ep && (*p++ = *d++) != '\0') 1716 continue; 1717 } 1718 break; 1719 } 1720 mutex_exit(&ccd_lock); 1721 1722 node = *rnode; 1723 node.sysctl_data = names; 1724 node.sysctl_size = ep - names; 1725 1726 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1727 kmem_free(names, size); 1728 return error; 1729 } 1730 1731 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1732 { 1733 const struct sysctlnode *node = NULL; 1734 1735 sysctl_createv(clog, 0, NULL, &node, 1736 CTLFLAG_PERMANENT, 1737 CTLTYPE_NODE, "ccd", 1738 SYSCTL_DESCR("ConCatenated Disk state"), 1739 NULL, 0, NULL, 0, 1740 CTL_KERN, CTL_CREATE, CTL_EOL); 1741 1742 if (node == NULL) 1743 return; 1744 1745 sysctl_createv(clog, 0, &node, NULL, 1746 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1747 CTLTYPE_STRUCT, "units", 1748 SYSCTL_DESCR("List of ccd unit numbers"), 1749 ccd_units_sysctl, 0, NULL, 0, 1750 CTL_CREATE, CTL_EOL); 1751 sysctl_createv(clog, 0, &node, NULL, 1752 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1753 CTLTYPE_STRUCT, "info", 1754 SYSCTL_DESCR("Information about a CCD unit"), 1755 ccd_info_sysctl, 0, NULL, 0, 1756 CTL_CREATE, CTL_EOL); 1757 sysctl_createv(clog, 0, &node, NULL, 1758 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1759 CTLTYPE_STRUCT, "components", 1760 SYSCTL_DESCR("Information about CCD components"), 1761 ccd_components_sysctl, 0, NULL, 0, 1762 CTL_CREATE, CTL_EOL); 1763 } 1764