1 /* $NetBSD: ccd.c,v 1.151 2014/07/25 08:10:35 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.151 2014/07/25 08:10:35 dholland Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 119 #include <uvm/uvm_extern.h> 120 121 #include <dev/ccdvar.h> 122 #include <dev/dkvar.h> 123 124 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 125 126 #if defined(CCDDEBUG) && !defined(DEBUG) 127 #define DEBUG 128 #endif 129 130 #ifdef DEBUG 131 #define CCDB_FOLLOW 0x01 132 #define CCDB_INIT 0x02 133 #define CCDB_IO 0x04 134 #define CCDB_LABEL 0x08 135 #define CCDB_VNODE 0x10 136 int ccddebug = 0x00; 137 #endif 138 139 #define ccdunit(x) DISKUNIT(x) 140 141 struct ccdbuf { 142 struct buf cb_buf; /* new I/O buf */ 143 struct buf *cb_obp; /* ptr. to original I/O buf */ 144 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 145 int cb_comp; /* target component */ 146 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 147 }; 148 149 /* component buffer pool */ 150 static pool_cache_t ccd_cache; 151 152 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 153 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 154 155 #define CCDLABELDEV(dev) \ 156 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 157 158 /* called by main() at boot time */ 159 void ccdattach(int); 160 161 /* called by biodone() at interrupt time */ 162 static void ccdiodone(struct buf *); 163 164 static void ccdinterleave(struct ccd_softc *); 165 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 166 struct lwp *); 167 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 168 daddr_t, void *, long); 169 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 170 static void ccdgetdisklabel(dev_t); 171 static void ccdmakedisklabel(struct ccd_softc *); 172 static void ccdstart(struct ccd_softc *); 173 static void ccdthread(void *); 174 175 static dev_type_open(ccdopen); 176 static dev_type_close(ccdclose); 177 static dev_type_read(ccdread); 178 static dev_type_write(ccdwrite); 179 static dev_type_ioctl(ccdioctl); 180 static dev_type_strategy(ccdstrategy); 181 static dev_type_size(ccdsize); 182 183 const struct bdevsw ccd_bdevsw = { 184 .d_open = ccdopen, 185 .d_close = ccdclose, 186 .d_strategy = ccdstrategy, 187 .d_ioctl = ccdioctl, 188 .d_dump = nodump, 189 .d_psize = ccdsize, 190 .d_discard = nodiscard, 191 .d_flag = D_DISK | D_MPSAFE 192 }; 193 194 const struct cdevsw ccd_cdevsw = { 195 .d_open = ccdopen, 196 .d_close = ccdclose, 197 .d_read = ccdread, 198 .d_write = ccdwrite, 199 .d_ioctl = ccdioctl, 200 .d_stop = nostop, 201 .d_tty = notty, 202 .d_poll = nopoll, 203 .d_mmap = nommap, 204 .d_kqfilter = nokqfilter, 205 .d_discard = nodiscard, 206 .d_flag = D_DISK | D_MPSAFE 207 }; 208 209 #ifdef DEBUG 210 static void printiinfo(struct ccdiinfo *); 211 #endif 212 213 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 214 static kmutex_t ccd_lock; 215 216 static struct ccd_softc * 217 ccdcreate(int unit) { 218 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 219 if (sc == NULL) { 220 #ifdef DIAGNOSTIC 221 printf("%s: out of memory\n", __func__); 222 #endif 223 return NULL; 224 } 225 /* Initialize per-softc structures. */ 226 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 227 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 228 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 229 cv_init(&sc->sc_stop, "ccdstop"); 230 cv_init(&sc->sc_push, "ccdthr"); 231 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 232 return sc; 233 } 234 235 static void 236 ccddestroy(struct ccd_softc *sc) { 237 mutex_obj_free(sc->sc_iolock); 238 mutex_exit(&sc->sc_dvlock); 239 mutex_destroy(&sc->sc_dvlock); 240 cv_destroy(&sc->sc_stop); 241 cv_destroy(&sc->sc_push); 242 disk_destroy(&sc->sc_dkdev); 243 kmem_free(sc, sizeof(*sc)); 244 } 245 246 static struct ccd_softc * 247 ccdget(int unit) { 248 struct ccd_softc *sc; 249 if (unit < 0) { 250 #ifdef DIAGNOSTIC 251 panic("%s: unit %d!", __func__, unit); 252 #endif 253 return NULL; 254 } 255 mutex_enter(&ccd_lock); 256 LIST_FOREACH(sc, &ccds, sc_link) { 257 if (sc->sc_unit == unit) { 258 mutex_exit(&ccd_lock); 259 return sc; 260 } 261 } 262 mutex_exit(&ccd_lock); 263 if ((sc = ccdcreate(unit)) == NULL) 264 return NULL; 265 mutex_enter(&ccd_lock); 266 LIST_INSERT_HEAD(&ccds, sc, sc_link); 267 mutex_exit(&ccd_lock); 268 return sc; 269 } 270 271 static void 272 ccdput(struct ccd_softc *sc) { 273 mutex_enter(&ccd_lock); 274 LIST_REMOVE(sc, sc_link); 275 mutex_exit(&ccd_lock); 276 ccddestroy(sc); 277 } 278 279 /* 280 * Called by main() during pseudo-device attachment. All we need 281 * to do is allocate enough space for devices to be configured later. 282 */ 283 void 284 ccdattach(int num) 285 { 286 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 287 288 /* Initialize the component buffer pool. */ 289 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 290 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 291 } 292 293 static int 294 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 295 struct lwp *l) 296 { 297 struct ccdcinfo *ci = NULL; 298 int ix; 299 struct ccdgeom *ccg = &cs->sc_geom; 300 char *tmppath; 301 int error, path_alloced; 302 uint64_t psize, minsize; 303 unsigned secsize, maxsecsize; 304 305 #ifdef DEBUG 306 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 307 printf("%s: ccdinit\n", cs->sc_xname); 308 #endif 309 310 /* Allocate space for the component info. */ 311 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 312 KM_SLEEP); 313 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 314 315 cs->sc_size = 0; 316 317 /* 318 * Verify that each component piece exists and record 319 * relevant information about it. 320 */ 321 maxsecsize = 0; 322 minsize = 0; 323 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 324 ci = &cs->sc_cinfo[ix]; 325 ci->ci_vp = vpp[ix]; 326 327 /* 328 * Copy in the pathname of the component. 329 */ 330 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 331 error = copyinstr(cpaths[ix], tmppath, 332 MAXPATHLEN, &ci->ci_pathlen); 333 if (ci->ci_pathlen == 0) 334 error = EINVAL; 335 if (error) { 336 #ifdef DEBUG 337 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 338 printf("%s: can't copy path, error = %d\n", 339 cs->sc_xname, error); 340 #endif 341 goto out; 342 } 343 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 344 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 345 path_alloced++; 346 347 /* 348 * XXX: Cache the component's dev_t. 349 */ 350 ci->ci_dev = vpp[ix]->v_rdev; 351 352 /* 353 * Get partition information for the component. 354 */ 355 error = getdisksize(vpp[ix], &psize, &secsize); 356 if (error) { 357 #ifdef DEBUG 358 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 359 printf("%s: %s: disksize failed, error = %d\n", 360 cs->sc_xname, ci->ci_path, error); 361 #endif 362 goto out; 363 } 364 365 /* 366 * Calculate the size, truncating to an interleave 367 * boundary if necessary. 368 */ 369 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 370 if (cs->sc_ileave > 1) 371 psize -= psize % cs->sc_ileave; 372 373 if (psize == 0) { 374 #ifdef DEBUG 375 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 376 printf("%s: %s: size == 0\n", 377 cs->sc_xname, ci->ci_path); 378 #endif 379 error = ENODEV; 380 goto out; 381 } 382 383 if (minsize == 0 || psize < minsize) 384 minsize = psize; 385 ci->ci_size = psize; 386 cs->sc_size += psize; 387 } 388 389 /* 390 * Don't allow the interleave to be smaller than 391 * the biggest component sector. 392 */ 393 if ((cs->sc_ileave > 0) && 394 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 395 #ifdef DEBUG 396 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 397 printf("%s: interleave must be at least %d\n", 398 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 399 #endif 400 error = EINVAL; 401 goto out; 402 } 403 404 /* 405 * If uniform interleave is desired set all sizes to that of 406 * the smallest component. 407 */ 408 if (cs->sc_flags & CCDF_UNIFORM) { 409 for (ci = cs->sc_cinfo; 410 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 411 ci->ci_size = minsize; 412 413 cs->sc_size = cs->sc_nccdisks * minsize; 414 } 415 416 /* 417 * Construct the interleave table. 418 */ 419 ccdinterleave(cs); 420 421 /* 422 * Create pseudo-geometry based on 1MB cylinders. It's 423 * pretty close. 424 */ 425 ccg->ccg_secsize = DEV_BSIZE; 426 ccg->ccg_ntracks = 1; 427 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 428 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 429 430 /* 431 * Create thread to handle deferred I/O. 432 */ 433 cs->sc_zap = false; 434 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 435 cs, &cs->sc_thread, "%s", cs->sc_xname); 436 if (error) { 437 printf("ccdinit: can't create thread: %d\n", error); 438 goto out; 439 } 440 441 /* 442 * Only now that everything is set up can we enable the device. 443 */ 444 mutex_enter(cs->sc_iolock); 445 cs->sc_flags |= CCDF_INITED; 446 mutex_exit(cs->sc_iolock); 447 kmem_free(tmppath, MAXPATHLEN); 448 return (0); 449 450 out: 451 for (ix = 0; ix < path_alloced; ix++) { 452 kmem_free(cs->sc_cinfo[ix].ci_path, 453 cs->sc_cinfo[ix].ci_pathlen); 454 } 455 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 456 kmem_free(tmppath, MAXPATHLEN); 457 return (error); 458 } 459 460 static void 461 ccdinterleave(struct ccd_softc *cs) 462 { 463 struct ccdcinfo *ci, *smallci; 464 struct ccdiinfo *ii; 465 daddr_t bn, lbn; 466 int ix; 467 u_long size; 468 469 #ifdef DEBUG 470 if (ccddebug & CCDB_INIT) 471 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 472 #endif 473 /* 474 * Allocate an interleave table. 475 * Chances are this is too big, but we don't care. 476 */ 477 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 478 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 479 480 /* 481 * Trivial case: no interleave (actually interleave of disk size). 482 * Each table entry represents a single component in its entirety. 483 */ 484 if (cs->sc_ileave == 0) { 485 bn = 0; 486 ii = cs->sc_itable; 487 488 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 489 /* Allocate space for ii_index. */ 490 ii->ii_indexsz = sizeof(int); 491 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 492 ii->ii_ndisk = 1; 493 ii->ii_startblk = bn; 494 ii->ii_startoff = 0; 495 ii->ii_index[0] = ix; 496 bn += cs->sc_cinfo[ix].ci_size; 497 ii++; 498 } 499 ii->ii_ndisk = 0; 500 #ifdef DEBUG 501 if (ccddebug & CCDB_INIT) 502 printiinfo(cs->sc_itable); 503 #endif 504 return; 505 } 506 507 /* 508 * The following isn't fast or pretty; it doesn't have to be. 509 */ 510 size = 0; 511 bn = lbn = 0; 512 for (ii = cs->sc_itable; ; ii++) { 513 /* Allocate space for ii_index. */ 514 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 515 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 516 517 /* 518 * Locate the smallest of the remaining components 519 */ 520 smallci = NULL; 521 for (ci = cs->sc_cinfo; 522 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 523 if (ci->ci_size > size && 524 (smallci == NULL || 525 ci->ci_size < smallci->ci_size)) 526 smallci = ci; 527 528 /* 529 * Nobody left, all done 530 */ 531 if (smallci == NULL) { 532 ii->ii_ndisk = 0; 533 break; 534 } 535 536 /* 537 * Record starting logical block and component offset 538 */ 539 ii->ii_startblk = bn / cs->sc_ileave; 540 ii->ii_startoff = lbn; 541 542 /* 543 * Determine how many disks take part in this interleave 544 * and record their indices. 545 */ 546 ix = 0; 547 for (ci = cs->sc_cinfo; 548 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 549 if (ci->ci_size >= smallci->ci_size) 550 ii->ii_index[ix++] = ci - cs->sc_cinfo; 551 ii->ii_ndisk = ix; 552 bn += ix * (smallci->ci_size - size); 553 lbn = smallci->ci_size / cs->sc_ileave; 554 size = smallci->ci_size; 555 } 556 #ifdef DEBUG 557 if (ccddebug & CCDB_INIT) 558 printiinfo(cs->sc_itable); 559 #endif 560 } 561 562 /* ARGSUSED */ 563 static int 564 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 565 { 566 int unit = ccdunit(dev); 567 struct ccd_softc *cs; 568 struct disklabel *lp; 569 int error = 0, part, pmask; 570 571 #ifdef DEBUG 572 if (ccddebug & CCDB_FOLLOW) 573 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 574 #endif 575 if ((cs = ccdget(unit)) == NULL) 576 return ENXIO; 577 578 mutex_enter(&cs->sc_dvlock); 579 580 lp = cs->sc_dkdev.dk_label; 581 582 part = DISKPART(dev); 583 pmask = (1 << part); 584 585 /* 586 * If we're initialized, check to see if there are any other 587 * open partitions. If not, then it's safe to update 588 * the in-core disklabel. Only read the disklabel if it is 589 * not already valid. 590 */ 591 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 592 cs->sc_dkdev.dk_openmask == 0) 593 ccdgetdisklabel(dev); 594 595 /* Check that the partition exists. */ 596 if (part != RAW_PART) { 597 if (((cs->sc_flags & CCDF_INITED) == 0) || 598 ((part >= lp->d_npartitions) || 599 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 600 error = ENXIO; 601 goto done; 602 } 603 } 604 605 /* Prevent our unit from being unconfigured while open. */ 606 switch (fmt) { 607 case S_IFCHR: 608 cs->sc_dkdev.dk_copenmask |= pmask; 609 break; 610 611 case S_IFBLK: 612 cs->sc_dkdev.dk_bopenmask |= pmask; 613 break; 614 } 615 cs->sc_dkdev.dk_openmask = 616 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 617 618 done: 619 mutex_exit(&cs->sc_dvlock); 620 return (error); 621 } 622 623 /* ARGSUSED */ 624 static int 625 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 626 { 627 int unit = ccdunit(dev); 628 struct ccd_softc *cs; 629 int part; 630 631 #ifdef DEBUG 632 if (ccddebug & CCDB_FOLLOW) 633 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 634 #endif 635 636 if ((cs = ccdget(unit)) == NULL) 637 return ENXIO; 638 639 mutex_enter(&cs->sc_dvlock); 640 641 part = DISKPART(dev); 642 643 /* ...that much closer to allowing unconfiguration... */ 644 switch (fmt) { 645 case S_IFCHR: 646 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 647 break; 648 649 case S_IFBLK: 650 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 651 break; 652 } 653 cs->sc_dkdev.dk_openmask = 654 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 655 656 if (cs->sc_dkdev.dk_openmask == 0) { 657 if ((cs->sc_flags & CCDF_KLABEL) == 0) 658 cs->sc_flags &= ~CCDF_VLABEL; 659 } 660 661 mutex_exit(&cs->sc_dvlock); 662 return (0); 663 } 664 665 static bool 666 ccdbackoff(struct ccd_softc *cs) 667 { 668 669 /* XXX Arbitrary, should be a uvm call. */ 670 return uvmexp.free < (uvmexp.freemin >> 1) && 671 disk_isbusy(&cs->sc_dkdev); 672 } 673 674 static void 675 ccdthread(void *cookie) 676 { 677 struct ccd_softc *cs; 678 679 cs = cookie; 680 681 #ifdef DEBUG 682 if (ccddebug & CCDB_FOLLOW) 683 printf("ccdthread: hello\n"); 684 #endif 685 686 mutex_enter(cs->sc_iolock); 687 while (__predict_true(!cs->sc_zap)) { 688 if (bufq_peek(cs->sc_bufq) == NULL) { 689 /* Nothing to do. */ 690 cv_wait(&cs->sc_push, cs->sc_iolock); 691 continue; 692 } 693 if (ccdbackoff(cs)) { 694 /* Wait for memory to become available. */ 695 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 696 continue; 697 } 698 #ifdef DEBUG 699 if (ccddebug & CCDB_FOLLOW) 700 printf("ccdthread: dispatching I/O\n"); 701 #endif 702 ccdstart(cs); 703 mutex_enter(cs->sc_iolock); 704 } 705 cs->sc_thread = NULL; 706 mutex_exit(cs->sc_iolock); 707 #ifdef DEBUG 708 if (ccddebug & CCDB_FOLLOW) 709 printf("ccdthread: goodbye\n"); 710 #endif 711 kthread_exit(0); 712 } 713 714 static void 715 ccdstrategy(struct buf *bp) 716 { 717 int unit = ccdunit(bp->b_dev); 718 struct ccd_softc *cs; 719 if ((cs = ccdget(unit)) == NULL) 720 return; 721 722 /* Must be open or reading label. */ 723 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 724 (cs->sc_flags & CCDF_RLABEL) != 0); 725 726 mutex_enter(cs->sc_iolock); 727 /* Synchronize with device init/uninit. */ 728 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 729 mutex_exit(cs->sc_iolock); 730 #ifdef DEBUG 731 if (ccddebug & CCDB_FOLLOW) 732 printf("ccdstrategy: unit %d: not inited\n", unit); 733 #endif 734 bp->b_error = ENXIO; 735 bp->b_resid = bp->b_bcount; 736 biodone(bp); 737 return; 738 } 739 740 /* Defer to thread if system is low on memory. */ 741 bufq_put(cs->sc_bufq, bp); 742 if (__predict_false(ccdbackoff(cs))) { 743 mutex_exit(cs->sc_iolock); 744 #ifdef DEBUG 745 if (ccddebug & CCDB_FOLLOW) 746 printf("ccdstrategy: holding off on I/O\n"); 747 #endif 748 return; 749 } 750 ccdstart(cs); 751 } 752 753 static void 754 ccdstart(struct ccd_softc *cs) 755 { 756 daddr_t blkno; 757 int wlabel; 758 struct disklabel *lp; 759 long bcount, rcount; 760 struct ccdbuf *cbp; 761 char *addr; 762 daddr_t bn; 763 vnode_t *vp; 764 buf_t *bp; 765 766 KASSERT(mutex_owned(cs->sc_iolock)); 767 768 disk_busy(&cs->sc_dkdev); 769 bp = bufq_get(cs->sc_bufq); 770 KASSERT(bp != NULL); 771 772 #ifdef DEBUG 773 if (ccddebug & CCDB_FOLLOW) 774 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 775 #endif 776 777 /* If it's a nil transfer, wake up the top half now. */ 778 if (bp->b_bcount == 0) 779 goto done; 780 781 lp = cs->sc_dkdev.dk_label; 782 783 /* 784 * Do bounds checking and adjust transfer. If there's an 785 * error, the bounds check will flag that for us. Convert 786 * the partition relative block number to an absolute. 787 */ 788 blkno = bp->b_blkno; 789 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 790 if (DISKPART(bp->b_dev) != RAW_PART) { 791 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 792 goto done; 793 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 794 } 795 mutex_exit(cs->sc_iolock); 796 bp->b_rawblkno = blkno; 797 798 /* Allocate the component buffers and start I/O! */ 799 bp->b_resid = bp->b_bcount; 800 bn = bp->b_rawblkno; 801 addr = bp->b_data; 802 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 803 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 804 rcount = cbp->cb_buf.b_bcount; 805 bn += btodb(rcount); 806 addr += rcount; 807 vp = cbp->cb_buf.b_vp; 808 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 809 mutex_enter(vp->v_interlock); 810 vp->v_numoutput++; 811 mutex_exit(vp->v_interlock); 812 } 813 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 814 } 815 return; 816 817 done: 818 disk_unbusy(&cs->sc_dkdev, 0, 0); 819 cv_broadcast(&cs->sc_stop); 820 cv_broadcast(&cs->sc_push); 821 mutex_exit(cs->sc_iolock); 822 bp->b_resid = bp->b_bcount; 823 biodone(bp); 824 } 825 826 /* 827 * Build a component buffer header. 828 */ 829 static struct ccdbuf * 830 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 831 long bcount) 832 { 833 struct ccdcinfo *ci; 834 struct ccdbuf *cbp; 835 daddr_t cbn, cboff; 836 u_int64_t cbc; 837 int ccdisk; 838 839 #ifdef DEBUG 840 if (ccddebug & CCDB_IO) 841 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 842 cs, bp, bn, addr, bcount); 843 #endif 844 /* 845 * Determine which component bn falls in. 846 */ 847 cbn = bn; 848 cboff = 0; 849 850 /* 851 * Serially concatenated 852 */ 853 if (cs->sc_ileave == 0) { 854 daddr_t sblk; 855 856 sblk = 0; 857 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 858 cbn >= sblk + ci->ci_size; 859 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 860 sblk += ci->ci_size; 861 cbn -= sblk; 862 } 863 /* 864 * Interleaved 865 */ 866 else { 867 struct ccdiinfo *ii; 868 int off; 869 870 cboff = cbn % cs->sc_ileave; 871 cbn /= cs->sc_ileave; 872 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 873 if (ii->ii_startblk > cbn) 874 break; 875 ii--; 876 off = cbn - ii->ii_startblk; 877 if (ii->ii_ndisk == 1) { 878 ccdisk = ii->ii_index[0]; 879 cbn = ii->ii_startoff + off; 880 } else { 881 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 882 cbn = ii->ii_startoff + off / ii->ii_ndisk; 883 } 884 cbn *= cs->sc_ileave; 885 ci = &cs->sc_cinfo[ccdisk]; 886 } 887 888 /* 889 * Fill in the component buf structure. 890 */ 891 cbp = CCD_GETBUF(); 892 KASSERT(cbp != NULL); 893 buf_init(&cbp->cb_buf); 894 cbp->cb_buf.b_flags = bp->b_flags; 895 cbp->cb_buf.b_oflags = bp->b_oflags; 896 cbp->cb_buf.b_cflags = bp->b_cflags; 897 cbp->cb_buf.b_iodone = ccdiodone; 898 cbp->cb_buf.b_proc = bp->b_proc; 899 cbp->cb_buf.b_dev = ci->ci_dev; 900 cbp->cb_buf.b_blkno = cbn + cboff; 901 cbp->cb_buf.b_data = addr; 902 cbp->cb_buf.b_vp = ci->ci_vp; 903 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 904 if (cs->sc_ileave == 0) 905 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 906 else 907 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 908 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 909 910 /* 911 * context for ccdiodone 912 */ 913 cbp->cb_obp = bp; 914 cbp->cb_sc = cs; 915 cbp->cb_comp = ccdisk; 916 917 BIO_COPYPRIO(&cbp->cb_buf, bp); 918 919 #ifdef DEBUG 920 if (ccddebug & CCDB_IO) 921 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 922 " bcnt %d\n", 923 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 924 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 925 cbp->cb_buf.b_bcount); 926 #endif 927 928 return (cbp); 929 } 930 931 /* 932 * Called at interrupt time. 933 * Mark the component as done and if all components are done, 934 * take a ccd interrupt. 935 */ 936 static void 937 ccdiodone(struct buf *vbp) 938 { 939 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 940 struct buf *bp = cbp->cb_obp; 941 struct ccd_softc *cs = cbp->cb_sc; 942 int count; 943 944 #ifdef DEBUG 945 if (ccddebug & CCDB_FOLLOW) 946 printf("ccdiodone(%p)\n", cbp); 947 if (ccddebug & CCDB_IO) { 948 printf("ccdiodone: bp %p bcount %d resid %d\n", 949 bp, bp->b_bcount, bp->b_resid); 950 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 951 " bcnt %d\n", 952 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 953 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 954 cbp->cb_buf.b_bcount); 955 } 956 #endif 957 958 if (cbp->cb_buf.b_error != 0) { 959 bp->b_error = cbp->cb_buf.b_error; 960 printf("%s: error %d on component %d\n", 961 cs->sc_xname, bp->b_error, cbp->cb_comp); 962 } 963 count = cbp->cb_buf.b_bcount; 964 buf_destroy(&cbp->cb_buf); 965 CCD_PUTBUF(cbp); 966 967 /* 968 * If all done, "interrupt". 969 */ 970 mutex_enter(cs->sc_iolock); 971 bp->b_resid -= count; 972 if (bp->b_resid < 0) 973 panic("ccdiodone: count"); 974 if (bp->b_resid == 0) { 975 /* 976 * Request is done for better or worse, wakeup the top half. 977 */ 978 if (bp->b_error != 0) 979 bp->b_resid = bp->b_bcount; 980 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 981 (bp->b_flags & B_READ)); 982 if (!disk_isbusy(&cs->sc_dkdev)) { 983 if (bufq_peek(cs->sc_bufq) != NULL) { 984 cv_broadcast(&cs->sc_push); 985 } 986 cv_broadcast(&cs->sc_stop); 987 } 988 mutex_exit(cs->sc_iolock); 989 biodone(bp); 990 } else 991 mutex_exit(cs->sc_iolock); 992 } 993 994 /* ARGSUSED */ 995 static int 996 ccdread(dev_t dev, struct uio *uio, int flags) 997 { 998 int unit = ccdunit(dev); 999 struct ccd_softc *cs; 1000 1001 #ifdef DEBUG 1002 if (ccddebug & CCDB_FOLLOW) 1003 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1004 #endif 1005 if ((cs = ccdget(unit)) == NULL) 1006 return 0; 1007 1008 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1009 if ((cs->sc_flags & CCDF_INITED) == 0) 1010 return (ENXIO); 1011 1012 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1013 } 1014 1015 /* ARGSUSED */ 1016 static int 1017 ccdwrite(dev_t dev, struct uio *uio, int flags) 1018 { 1019 int unit = ccdunit(dev); 1020 struct ccd_softc *cs; 1021 1022 #ifdef DEBUG 1023 if (ccddebug & CCDB_FOLLOW) 1024 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1025 #endif 1026 if ((cs = ccdget(unit)) == NULL) 1027 return ENOENT; 1028 1029 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1030 if ((cs->sc_flags & CCDF_INITED) == 0) 1031 return (ENXIO); 1032 1033 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1034 } 1035 1036 static int 1037 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1038 { 1039 int unit = ccdunit(dev); 1040 int i, j, lookedup = 0, error = 0; 1041 int part, pmask; 1042 struct ccd_softc *cs; 1043 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1044 kauth_cred_t uc; 1045 char **cpp; 1046 struct pathbuf *pb; 1047 struct vnode **vpp; 1048 #ifdef __HAVE_OLD_DISKLABEL 1049 struct disklabel newlabel; 1050 #endif 1051 1052 if ((cs = ccdget(unit)) == NULL) 1053 return ENOENT; 1054 uc = kauth_cred_get(); 1055 1056 /* Must be open for writes for these commands... */ 1057 switch (cmd) { 1058 case CCDIOCSET: 1059 case CCDIOCCLR: 1060 case DIOCSDINFO: 1061 case DIOCWDINFO: 1062 #ifdef __HAVE_OLD_DISKLABEL 1063 case ODIOCSDINFO: 1064 case ODIOCWDINFO: 1065 #endif 1066 case DIOCKLABEL: 1067 case DIOCWLABEL: 1068 if ((flag & FWRITE) == 0) 1069 return (EBADF); 1070 } 1071 1072 mutex_enter(&cs->sc_dvlock); 1073 1074 /* Must be initialized for these... */ 1075 switch (cmd) { 1076 case CCDIOCCLR: 1077 case DIOCGDINFO: 1078 case DIOCCACHESYNC: 1079 case DIOCSDINFO: 1080 case DIOCWDINFO: 1081 case DIOCGPART: 1082 case DIOCWLABEL: 1083 case DIOCKLABEL: 1084 case DIOCGDEFLABEL: 1085 #ifdef __HAVE_OLD_DISKLABEL 1086 case ODIOCGDINFO: 1087 case ODIOCSDINFO: 1088 case ODIOCWDINFO: 1089 case ODIOCGDEFLABEL: 1090 #endif 1091 if ((cs->sc_flags & CCDF_INITED) == 0) { 1092 error = ENXIO; 1093 goto out; 1094 } 1095 } 1096 1097 switch (cmd) { 1098 case CCDIOCSET: 1099 if (cs->sc_flags & CCDF_INITED) { 1100 error = EBUSY; 1101 goto out; 1102 } 1103 1104 /* Validate the flags. */ 1105 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1106 error = EINVAL; 1107 goto out; 1108 } 1109 1110 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1111 ccio->ccio_ndisks == 0) { 1112 error = EINVAL; 1113 goto out; 1114 } 1115 1116 /* Fill in some important bits. */ 1117 cs->sc_ileave = ccio->ccio_ileave; 1118 cs->sc_nccdisks = ccio->ccio_ndisks; 1119 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1120 1121 /* 1122 * Allocate space for and copy in the array of 1123 * componet pathnames and device numbers. 1124 */ 1125 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1126 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1127 error = copyin(ccio->ccio_disks, cpp, 1128 ccio->ccio_ndisks * sizeof(*cpp)); 1129 if (error) { 1130 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1131 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1132 goto out; 1133 } 1134 1135 #ifdef DEBUG 1136 if (ccddebug & CCDB_INIT) 1137 for (i = 0; i < ccio->ccio_ndisks; ++i) 1138 printf("ccdioctl: component %d: %p\n", 1139 i, cpp[i]); 1140 #endif 1141 1142 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1143 #ifdef DEBUG 1144 if (ccddebug & CCDB_INIT) 1145 printf("ccdioctl: lookedup = %d\n", lookedup); 1146 #endif 1147 error = pathbuf_copyin(cpp[i], &pb); 1148 if (error == 0) { 1149 error = dk_lookup(pb, l, &vpp[i]); 1150 } 1151 pathbuf_destroy(pb); 1152 if (error != 0) { 1153 for (j = 0; j < lookedup; ++j) 1154 (void)vn_close(vpp[j], FREAD|FWRITE, 1155 uc); 1156 kmem_free(vpp, ccio->ccio_ndisks * 1157 sizeof(*vpp)); 1158 kmem_free(cpp, ccio->ccio_ndisks * 1159 sizeof(*cpp)); 1160 goto out; 1161 } 1162 ++lookedup; 1163 } 1164 1165 /* Attach the disk. */ 1166 disk_attach(&cs->sc_dkdev); 1167 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1168 1169 /* 1170 * Initialize the ccd. Fills in the softc for us. 1171 */ 1172 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1173 for (j = 0; j < lookedup; ++j) 1174 (void)vn_close(vpp[j], FREAD|FWRITE, 1175 uc); 1176 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1177 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1178 disk_detach(&cs->sc_dkdev); 1179 bufq_free(cs->sc_bufq); 1180 goto out; 1181 } 1182 1183 /* We can free the temporary variables now. */ 1184 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1185 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1186 1187 /* 1188 * The ccd has been successfully initialized, so 1189 * we can place it into the array. Don't try to 1190 * read the disklabel until the disk has been attached, 1191 * because space for the disklabel is allocated 1192 * in disk_attach(); 1193 */ 1194 ccio->ccio_unit = unit; 1195 ccio->ccio_size = cs->sc_size; 1196 1197 /* Try and read the disklabel. */ 1198 ccdgetdisklabel(dev); 1199 break; 1200 1201 case CCDIOCCLR: 1202 /* 1203 * Don't unconfigure if any other partitions are open 1204 * or if both the character and block flavors of this 1205 * partition are open. 1206 */ 1207 part = DISKPART(dev); 1208 pmask = (1 << part); 1209 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1210 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1211 (cs->sc_dkdev.dk_copenmask & pmask))) { 1212 error = EBUSY; 1213 goto out; 1214 } 1215 1216 /* Stop new I/O, wait for in-flight I/O to complete. */ 1217 mutex_enter(cs->sc_iolock); 1218 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1219 cs->sc_zap = true; 1220 while (disk_isbusy(&cs->sc_dkdev) || 1221 bufq_peek(cs->sc_bufq) != NULL || 1222 cs->sc_thread != NULL) { 1223 cv_broadcast(&cs->sc_push); 1224 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1225 } 1226 mutex_exit(cs->sc_iolock); 1227 1228 /* 1229 * Free ccd_softc information and clear entry. 1230 */ 1231 1232 /* Close the components and free their pathnames. */ 1233 for (i = 0; i < cs->sc_nccdisks; ++i) { 1234 /* 1235 * XXX: this close could potentially fail and 1236 * cause Bad Things. Maybe we need to force 1237 * the close to happen? 1238 */ 1239 #ifdef DEBUG 1240 if (ccddebug & CCDB_VNODE) 1241 vprint("CCDIOCCLR: vnode info", 1242 cs->sc_cinfo[i].ci_vp); 1243 #endif 1244 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1245 uc); 1246 kmem_free(cs->sc_cinfo[i].ci_path, 1247 cs->sc_cinfo[i].ci_pathlen); 1248 } 1249 1250 /* Free interleave index. */ 1251 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1252 kmem_free(cs->sc_itable[i].ii_index, 1253 cs->sc_itable[i].ii_indexsz); 1254 } 1255 1256 /* Free component info and interleave table. */ 1257 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1258 sizeof(struct ccdcinfo)); 1259 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1260 sizeof(struct ccdiinfo)); 1261 1262 /* Detatch the disk. */ 1263 disk_detach(&cs->sc_dkdev); 1264 bufq_free(cs->sc_bufq); 1265 ccdput(cs); 1266 /* Don't break, otherwise cs is read again. */ 1267 return 0; 1268 1269 case DIOCGDINFO: 1270 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label); 1271 break; 1272 1273 #ifdef __HAVE_OLD_DISKLABEL 1274 case ODIOCGDINFO: 1275 newlabel = *(cs->sc_dkdev.dk_label); 1276 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1277 return ENOTTY; 1278 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1279 break; 1280 #endif 1281 1282 case DIOCGPART: 1283 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label; 1284 ((struct partinfo *)data)->part = 1285 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1286 break; 1287 1288 case DIOCCACHESYNC: 1289 /* 1290 * XXX Do we really need to care about having a writable 1291 * file descriptor here? 1292 */ 1293 if ((flag & FWRITE) == 0) 1294 return (EBADF); 1295 1296 /* 1297 * We pass this call down to all components and report 1298 * the first error we encounter. 1299 */ 1300 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1301 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1302 flag, uc); 1303 if (j != 0 && error == 0) 1304 error = j; 1305 } 1306 break; 1307 1308 case DIOCWDINFO: 1309 case DIOCSDINFO: 1310 #ifdef __HAVE_OLD_DISKLABEL 1311 case ODIOCWDINFO: 1312 case ODIOCSDINFO: 1313 #endif 1314 { 1315 struct disklabel *lp; 1316 #ifdef __HAVE_OLD_DISKLABEL 1317 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1318 memset(&newlabel, 0, sizeof newlabel); 1319 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1320 lp = &newlabel; 1321 } else 1322 #endif 1323 lp = (struct disklabel *)data; 1324 1325 cs->sc_flags |= CCDF_LABELLING; 1326 1327 error = setdisklabel(cs->sc_dkdev.dk_label, 1328 lp, 0, cs->sc_dkdev.dk_cpulabel); 1329 if (error == 0) { 1330 if (cmd == DIOCWDINFO 1331 #ifdef __HAVE_OLD_DISKLABEL 1332 || cmd == ODIOCWDINFO 1333 #endif 1334 ) 1335 error = writedisklabel(CCDLABELDEV(dev), 1336 ccdstrategy, cs->sc_dkdev.dk_label, 1337 cs->sc_dkdev.dk_cpulabel); 1338 } 1339 1340 cs->sc_flags &= ~CCDF_LABELLING; 1341 break; 1342 } 1343 1344 case DIOCKLABEL: 1345 if (*(int *)data != 0) 1346 cs->sc_flags |= CCDF_KLABEL; 1347 else 1348 cs->sc_flags &= ~CCDF_KLABEL; 1349 break; 1350 1351 case DIOCWLABEL: 1352 if (*(int *)data != 0) 1353 cs->sc_flags |= CCDF_WLABEL; 1354 else 1355 cs->sc_flags &= ~CCDF_WLABEL; 1356 break; 1357 1358 case DIOCGDEFLABEL: 1359 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1360 break; 1361 1362 #ifdef __HAVE_OLD_DISKLABEL 1363 case ODIOCGDEFLABEL: 1364 ccdgetdefaultlabel(cs, &newlabel); 1365 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1366 return ENOTTY; 1367 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1368 break; 1369 #endif 1370 1371 default: 1372 error = ENOTTY; 1373 } 1374 1375 out: 1376 mutex_exit(&cs->sc_dvlock); 1377 return (error); 1378 } 1379 1380 static int 1381 ccdsize(dev_t dev) 1382 { 1383 struct ccd_softc *cs; 1384 struct disklabel *lp; 1385 int part, unit, omask, size; 1386 1387 unit = ccdunit(dev); 1388 if ((cs = ccdget(unit)) == NULL) 1389 return -1; 1390 1391 if ((cs->sc_flags & CCDF_INITED) == 0) 1392 return (-1); 1393 1394 part = DISKPART(dev); 1395 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1396 lp = cs->sc_dkdev.dk_label; 1397 1398 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1399 return (-1); 1400 1401 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1402 size = -1; 1403 else 1404 size = lp->d_partitions[part].p_size * 1405 (lp->d_secsize / DEV_BSIZE); 1406 1407 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1408 return (-1); 1409 1410 return (size); 1411 } 1412 1413 static void 1414 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1415 { 1416 struct ccdgeom *ccg = &cs->sc_geom; 1417 1418 memset(lp, 0, sizeof(*lp)); 1419 1420 lp->d_secperunit = cs->sc_size; 1421 lp->d_secsize = ccg->ccg_secsize; 1422 lp->d_nsectors = ccg->ccg_nsectors; 1423 lp->d_ntracks = ccg->ccg_ntracks; 1424 lp->d_ncylinders = ccg->ccg_ncylinders; 1425 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1426 1427 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1428 lp->d_type = DTYPE_CCD; 1429 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1430 lp->d_rpm = 3600; 1431 lp->d_interleave = 1; 1432 lp->d_flags = 0; 1433 1434 lp->d_partitions[RAW_PART].p_offset = 0; 1435 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1436 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1437 lp->d_npartitions = RAW_PART + 1; 1438 1439 lp->d_magic = DISKMAGIC; 1440 lp->d_magic2 = DISKMAGIC; 1441 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1442 } 1443 1444 /* 1445 * Read the disklabel from the ccd. If one is not present, fake one 1446 * up. 1447 */ 1448 static void 1449 ccdgetdisklabel(dev_t dev) 1450 { 1451 int unit = ccdunit(dev); 1452 struct ccd_softc *cs; 1453 const char *errstring; 1454 struct disklabel *lp; 1455 struct cpu_disklabel *clp; 1456 1457 if ((cs = ccdget(unit)) == NULL) 1458 return; 1459 lp = cs->sc_dkdev.dk_label; 1460 clp = cs->sc_dkdev.dk_cpulabel; 1461 KASSERT(mutex_owned(&cs->sc_dvlock)); 1462 1463 memset(clp, 0, sizeof(*clp)); 1464 1465 ccdgetdefaultlabel(cs, lp); 1466 1467 /* 1468 * Call the generic disklabel extraction routine. 1469 */ 1470 cs->sc_flags |= CCDF_RLABEL; 1471 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1472 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1473 else 1474 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1475 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1476 if (errstring) 1477 ccdmakedisklabel(cs); 1478 else { 1479 int i; 1480 struct partition *pp; 1481 1482 /* 1483 * Sanity check whether the found disklabel is valid. 1484 * 1485 * This is necessary since total size of ccd may vary 1486 * when an interleave is changed even though exactly 1487 * same componets are used, and old disklabel may used 1488 * if that is found. 1489 */ 1490 if (lp->d_secperunit != cs->sc_size) 1491 printf("WARNING: %s: " 1492 "total sector size in disklabel (%d) != " 1493 "the size of ccd (%lu)\n", cs->sc_xname, 1494 lp->d_secperunit, (u_long)cs->sc_size); 1495 for (i = 0; i < lp->d_npartitions; i++) { 1496 pp = &lp->d_partitions[i]; 1497 if (pp->p_offset + pp->p_size > cs->sc_size) 1498 printf("WARNING: %s: end of partition `%c' " 1499 "exceeds the size of ccd (%lu)\n", 1500 cs->sc_xname, 'a' + i, (u_long)cs->sc_size); 1501 } 1502 } 1503 1504 #ifdef DEBUG 1505 /* It's actually extremely common to have unlabeled ccds. */ 1506 if (ccddebug & CCDB_LABEL) 1507 if (errstring != NULL) 1508 printf("%s: %s\n", cs->sc_xname, errstring); 1509 #endif 1510 1511 /* In-core label now valid. */ 1512 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1513 } 1514 1515 /* 1516 * Take care of things one might want to take care of in the event 1517 * that a disklabel isn't present. 1518 */ 1519 static void 1520 ccdmakedisklabel(struct ccd_softc *cs) 1521 { 1522 struct disklabel *lp = cs->sc_dkdev.dk_label; 1523 1524 /* 1525 * For historical reasons, if there's no disklabel present 1526 * the raw partition must be marked FS_BSDFFS. 1527 */ 1528 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1529 1530 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1531 1532 lp->d_checksum = dkcksum(lp); 1533 } 1534 1535 #ifdef DEBUG 1536 static void 1537 printiinfo(struct ccdiinfo *ii) 1538 { 1539 int ix, i; 1540 1541 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1542 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1543 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1544 for (i = 0; i < ii->ii_ndisk; i++) 1545 printf(" %d", ii->ii_index[i]); 1546 printf("\n"); 1547 } 1548 } 1549 #endif 1550 1551 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1552 1553 static int 1554 ccd_modcmd(modcmd_t cmd, void *arg) 1555 { 1556 int error = 0; 1557 #ifdef _MODULE 1558 int bmajor = -1, cmajor = -1; 1559 #endif 1560 1561 1562 switch (cmd) { 1563 case MODULE_CMD_INIT: 1564 #ifdef _MODULE 1565 ccdattach(4); 1566 1567 return devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1568 &ccd_cdevsw, &cmajor); 1569 #endif 1570 break; 1571 1572 case MODULE_CMD_FINI: 1573 #ifdef _MODULE 1574 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1575 #endif 1576 break; 1577 1578 case MODULE_CMD_STAT: 1579 return ENOTTY; 1580 1581 default: 1582 return ENOTTY; 1583 } 1584 1585 return error; 1586 } 1587 1588 static int 1589 ccd_units_sysctl(SYSCTLFN_ARGS) 1590 { 1591 struct sysctlnode node; 1592 struct ccd_softc *sc; 1593 int error, i, nccd, *units; 1594 size_t size; 1595 1596 nccd = 0; 1597 mutex_enter(&ccd_lock); 1598 LIST_FOREACH(sc, &ccds, sc_link) 1599 nccd++; 1600 mutex_exit(&ccd_lock); 1601 1602 if (nccd != 0) { 1603 size = nccd * sizeof(*units); 1604 units = kmem_zalloc(size, KM_SLEEP); 1605 if (units == NULL) 1606 return ENOMEM; 1607 1608 i = 0; 1609 mutex_enter(&ccd_lock); 1610 LIST_FOREACH(sc, &ccds, sc_link) { 1611 if (i >= nccd) 1612 break; 1613 units[i] = sc->sc_unit; 1614 } 1615 mutex_exit(&ccd_lock); 1616 } else { 1617 units = NULL; 1618 size = 0; 1619 } 1620 1621 node = *rnode; 1622 node.sysctl_data = units; 1623 node.sysctl_size = size; 1624 1625 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1626 if (units) 1627 kmem_free(units, size); 1628 return error; 1629 } 1630 1631 static int 1632 ccd_info_sysctl(SYSCTLFN_ARGS) 1633 { 1634 struct sysctlnode node; 1635 struct ccddiskinfo ccd; 1636 struct ccd_softc *sc; 1637 int unit; 1638 1639 if (newp == NULL || newlen != sizeof(int)) 1640 return EINVAL; 1641 1642 unit = *(const int *)newp; 1643 newp = NULL; 1644 newlen = 0; 1645 ccd.ccd_ndisks = ~0; 1646 mutex_enter(&ccd_lock); 1647 LIST_FOREACH(sc, &ccds, sc_link) { 1648 if (sc->sc_unit == unit) { 1649 ccd.ccd_ileave = sc->sc_ileave; 1650 ccd.ccd_size = sc->sc_size; 1651 ccd.ccd_ndisks = sc->sc_nccdisks; 1652 ccd.ccd_flags = sc->sc_flags; 1653 break; 1654 } 1655 } 1656 mutex_exit(&ccd_lock); 1657 1658 if (ccd.ccd_ndisks == ~0) 1659 return ENOENT; 1660 1661 node = *rnode; 1662 node.sysctl_data = &ccd; 1663 node.sysctl_size = sizeof(ccd); 1664 1665 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1666 } 1667 1668 static int 1669 ccd_components_sysctl(SYSCTLFN_ARGS) 1670 { 1671 struct sysctlnode node; 1672 int error, unit; 1673 size_t size; 1674 char *names, *p, *ep; 1675 struct ccd_softc *sc; 1676 1677 if (newp == NULL || newlen != sizeof(int)) 1678 return EINVAL; 1679 1680 size = 0; 1681 unit = *(const int *)newp; 1682 newp = NULL; 1683 newlen = 0; 1684 mutex_enter(&ccd_lock); 1685 LIST_FOREACH(sc, &ccds, sc_link) 1686 if (sc->sc_unit == unit) { 1687 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1688 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1689 break; 1690 } 1691 mutex_exit(&ccd_lock); 1692 1693 if (size == 0) 1694 return ENOENT; 1695 names = kmem_zalloc(size, KM_SLEEP); 1696 if (names == NULL) 1697 return ENOMEM; 1698 1699 p = names; 1700 ep = names + size; 1701 mutex_enter(&ccd_lock); 1702 LIST_FOREACH(sc, &ccds, sc_link) 1703 if (sc->sc_unit == unit) { 1704 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1705 char *d = sc->sc_cinfo[i].ci_path; 1706 while (p < ep && (*p++ = *d++) != '\0') 1707 continue; 1708 } 1709 break; 1710 } 1711 mutex_exit(&ccd_lock); 1712 1713 node = *rnode; 1714 node.sysctl_data = names; 1715 node.sysctl_size = ep - names; 1716 1717 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1718 kmem_free(names, size); 1719 return error; 1720 } 1721 1722 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1723 { 1724 const struct sysctlnode *node = NULL; 1725 1726 sysctl_createv(clog, 0, NULL, &node, 1727 CTLFLAG_PERMANENT, 1728 CTLTYPE_NODE, "ccd", 1729 SYSCTL_DESCR("ConCatenated Disk state"), 1730 NULL, 0, NULL, 0, 1731 CTL_KERN, CTL_CREATE, CTL_EOL); 1732 1733 if (node == NULL) 1734 return; 1735 1736 sysctl_createv(clog, 0, &node, NULL, 1737 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1738 CTLTYPE_STRUCT, "units", 1739 SYSCTL_DESCR("List of ccd unit numbers"), 1740 ccd_units_sysctl, 0, NULL, 0, 1741 CTL_CREATE, CTL_EOL); 1742 sysctl_createv(clog, 0, &node, NULL, 1743 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1744 CTLTYPE_STRUCT, "info", 1745 SYSCTL_DESCR("Information about a CCD unit"), 1746 ccd_info_sysctl, 0, NULL, 0, 1747 CTL_CREATE, CTL_EOL); 1748 sysctl_createv(clog, 0, &node, NULL, 1749 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1750 CTLTYPE_STRUCT, "components", 1751 SYSCTL_DESCR("Information about CCD components"), 1752 ccd_components_sysctl, 0, NULL, 0, 1753 CTL_CREATE, CTL_EOL); 1754 } 1755