1 /* $NetBSD: ccd.c,v 1.149 2014/06/14 07:39:00 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.149 2014/06/14 07:39:00 hannken Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 119 #include <uvm/uvm_extern.h> 120 121 #include <dev/ccdvar.h> 122 #include <dev/dkvar.h> 123 124 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 125 126 #if defined(CCDDEBUG) && !defined(DEBUG) 127 #define DEBUG 128 #endif 129 130 #ifdef DEBUG 131 #define CCDB_FOLLOW 0x01 132 #define CCDB_INIT 0x02 133 #define CCDB_IO 0x04 134 #define CCDB_LABEL 0x08 135 #define CCDB_VNODE 0x10 136 int ccddebug = 0x00; 137 #endif 138 139 #define ccdunit(x) DISKUNIT(x) 140 141 struct ccdbuf { 142 struct buf cb_buf; /* new I/O buf */ 143 struct buf *cb_obp; /* ptr. to original I/O buf */ 144 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 145 int cb_comp; /* target component */ 146 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 147 }; 148 149 /* component buffer pool */ 150 static pool_cache_t ccd_cache; 151 152 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 153 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 154 155 #define CCDLABELDEV(dev) \ 156 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 157 158 /* called by main() at boot time */ 159 void ccdattach(int); 160 161 /* called by biodone() at interrupt time */ 162 static void ccdiodone(struct buf *); 163 164 static void ccdinterleave(struct ccd_softc *); 165 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 166 struct lwp *); 167 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 168 daddr_t, void *, long); 169 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 170 static void ccdgetdisklabel(dev_t); 171 static void ccdmakedisklabel(struct ccd_softc *); 172 static void ccdstart(struct ccd_softc *); 173 static void ccdthread(void *); 174 175 static dev_type_open(ccdopen); 176 static dev_type_close(ccdclose); 177 static dev_type_read(ccdread); 178 static dev_type_write(ccdwrite); 179 static dev_type_ioctl(ccdioctl); 180 static dev_type_strategy(ccdstrategy); 181 static dev_type_size(ccdsize); 182 183 const struct bdevsw ccd_bdevsw = { 184 .d_open = ccdopen, 185 .d_close = ccdclose, 186 .d_strategy = ccdstrategy, 187 .d_ioctl = ccdioctl, 188 .d_dump = nodump, 189 .d_psize = ccdsize, 190 .d_flag = D_DISK | D_MPSAFE 191 }; 192 193 const struct cdevsw ccd_cdevsw = { 194 .d_open = ccdopen, 195 .d_close = ccdclose, 196 .d_read = ccdread, 197 .d_write = ccdwrite, 198 .d_ioctl = ccdioctl, 199 .d_stop = nostop, 200 .d_tty = notty, 201 .d_poll = nopoll, 202 .d_mmap = nommap, 203 .d_kqfilter = nokqfilter, 204 .d_flag = D_DISK | D_MPSAFE 205 }; 206 207 #ifdef DEBUG 208 static void printiinfo(struct ccdiinfo *); 209 #endif 210 211 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 212 static kmutex_t ccd_lock; 213 214 static struct ccd_softc * 215 ccdcreate(int unit) { 216 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 217 if (sc == NULL) { 218 #ifdef DIAGNOSTIC 219 printf("%s: out of memory\n", __func__); 220 #endif 221 return NULL; 222 } 223 /* Initialize per-softc structures. */ 224 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 225 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 226 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 227 cv_init(&sc->sc_stop, "ccdstop"); 228 cv_init(&sc->sc_push, "ccdthr"); 229 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 230 return sc; 231 } 232 233 static void 234 ccddestroy(struct ccd_softc *sc) { 235 mutex_obj_free(sc->sc_iolock); 236 mutex_exit(&sc->sc_dvlock); 237 mutex_destroy(&sc->sc_dvlock); 238 cv_destroy(&sc->sc_stop); 239 cv_destroy(&sc->sc_push); 240 disk_destroy(&sc->sc_dkdev); 241 kmem_free(sc, sizeof(*sc)); 242 } 243 244 static struct ccd_softc * 245 ccdget(int unit) { 246 struct ccd_softc *sc; 247 if (unit < 0) { 248 #ifdef DIAGNOSTIC 249 panic("%s: unit %d!", __func__, unit); 250 #endif 251 return NULL; 252 } 253 mutex_enter(&ccd_lock); 254 LIST_FOREACH(sc, &ccds, sc_link) { 255 if (sc->sc_unit == unit) { 256 mutex_exit(&ccd_lock); 257 return sc; 258 } 259 } 260 mutex_exit(&ccd_lock); 261 if ((sc = ccdcreate(unit)) == NULL) 262 return NULL; 263 mutex_enter(&ccd_lock); 264 LIST_INSERT_HEAD(&ccds, sc, sc_link); 265 mutex_exit(&ccd_lock); 266 return sc; 267 } 268 269 static void 270 ccdput(struct ccd_softc *sc) { 271 mutex_enter(&ccd_lock); 272 LIST_REMOVE(sc, sc_link); 273 mutex_exit(&ccd_lock); 274 ccddestroy(sc); 275 } 276 277 /* 278 * Called by main() during pseudo-device attachment. All we need 279 * to do is allocate enough space for devices to be configured later. 280 */ 281 void 282 ccdattach(int num) 283 { 284 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 285 286 /* Initialize the component buffer pool. */ 287 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 288 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 289 } 290 291 static int 292 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 293 struct lwp *l) 294 { 295 struct ccdcinfo *ci = NULL; 296 int ix; 297 struct ccdgeom *ccg = &cs->sc_geom; 298 char *tmppath; 299 int error, path_alloced; 300 uint64_t psize, minsize; 301 unsigned secsize, maxsecsize; 302 303 #ifdef DEBUG 304 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 305 printf("%s: ccdinit\n", cs->sc_xname); 306 #endif 307 308 /* Allocate space for the component info. */ 309 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 310 KM_SLEEP); 311 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 312 313 cs->sc_size = 0; 314 315 /* 316 * Verify that each component piece exists and record 317 * relevant information about it. 318 */ 319 maxsecsize = 0; 320 minsize = 0; 321 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 322 ci = &cs->sc_cinfo[ix]; 323 ci->ci_vp = vpp[ix]; 324 325 /* 326 * Copy in the pathname of the component. 327 */ 328 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 329 error = copyinstr(cpaths[ix], tmppath, 330 MAXPATHLEN, &ci->ci_pathlen); 331 if (ci->ci_pathlen == 0) 332 error = EINVAL; 333 if (error) { 334 #ifdef DEBUG 335 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 336 printf("%s: can't copy path, error = %d\n", 337 cs->sc_xname, error); 338 #endif 339 goto out; 340 } 341 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 342 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 343 path_alloced++; 344 345 /* 346 * XXX: Cache the component's dev_t. 347 */ 348 ci->ci_dev = vpp[ix]->v_rdev; 349 350 /* 351 * Get partition information for the component. 352 */ 353 error = getdisksize(vpp[ix], &psize, &secsize); 354 if (error) { 355 #ifdef DEBUG 356 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 357 printf("%s: %s: disksize failed, error = %d\n", 358 cs->sc_xname, ci->ci_path, error); 359 #endif 360 goto out; 361 } 362 363 /* 364 * Calculate the size, truncating to an interleave 365 * boundary if necessary. 366 */ 367 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 368 if (cs->sc_ileave > 1) 369 psize -= psize % cs->sc_ileave; 370 371 if (psize == 0) { 372 #ifdef DEBUG 373 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 374 printf("%s: %s: size == 0\n", 375 cs->sc_xname, ci->ci_path); 376 #endif 377 error = ENODEV; 378 goto out; 379 } 380 381 if (minsize == 0 || psize < minsize) 382 minsize = psize; 383 ci->ci_size = psize; 384 cs->sc_size += psize; 385 } 386 387 /* 388 * Don't allow the interleave to be smaller than 389 * the biggest component sector. 390 */ 391 if ((cs->sc_ileave > 0) && 392 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 393 #ifdef DEBUG 394 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 395 printf("%s: interleave must be at least %d\n", 396 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 397 #endif 398 error = EINVAL; 399 goto out; 400 } 401 402 /* 403 * If uniform interleave is desired set all sizes to that of 404 * the smallest component. 405 */ 406 if (cs->sc_flags & CCDF_UNIFORM) { 407 for (ci = cs->sc_cinfo; 408 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 409 ci->ci_size = minsize; 410 411 cs->sc_size = cs->sc_nccdisks * minsize; 412 } 413 414 /* 415 * Construct the interleave table. 416 */ 417 ccdinterleave(cs); 418 419 /* 420 * Create pseudo-geometry based on 1MB cylinders. It's 421 * pretty close. 422 */ 423 ccg->ccg_secsize = DEV_BSIZE; 424 ccg->ccg_ntracks = 1; 425 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 426 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 427 428 /* 429 * Create thread to handle deferred I/O. 430 */ 431 cs->sc_zap = false; 432 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 433 cs, &cs->sc_thread, "%s", cs->sc_xname); 434 if (error) { 435 printf("ccdinit: can't create thread: %d\n", error); 436 goto out; 437 } 438 439 /* 440 * Only now that everything is set up can we enable the device. 441 */ 442 mutex_enter(cs->sc_iolock); 443 cs->sc_flags |= CCDF_INITED; 444 mutex_exit(cs->sc_iolock); 445 kmem_free(tmppath, MAXPATHLEN); 446 return (0); 447 448 out: 449 for (ix = 0; ix < path_alloced; ix++) { 450 kmem_free(cs->sc_cinfo[ix].ci_path, 451 cs->sc_cinfo[ix].ci_pathlen); 452 } 453 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 454 kmem_free(tmppath, MAXPATHLEN); 455 return (error); 456 } 457 458 static void 459 ccdinterleave(struct ccd_softc *cs) 460 { 461 struct ccdcinfo *ci, *smallci; 462 struct ccdiinfo *ii; 463 daddr_t bn, lbn; 464 int ix; 465 u_long size; 466 467 #ifdef DEBUG 468 if (ccddebug & CCDB_INIT) 469 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 470 #endif 471 /* 472 * Allocate an interleave table. 473 * Chances are this is too big, but we don't care. 474 */ 475 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 476 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 477 478 /* 479 * Trivial case: no interleave (actually interleave of disk size). 480 * Each table entry represents a single component in its entirety. 481 */ 482 if (cs->sc_ileave == 0) { 483 bn = 0; 484 ii = cs->sc_itable; 485 486 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 487 /* Allocate space for ii_index. */ 488 ii->ii_indexsz = sizeof(int); 489 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 490 ii->ii_ndisk = 1; 491 ii->ii_startblk = bn; 492 ii->ii_startoff = 0; 493 ii->ii_index[0] = ix; 494 bn += cs->sc_cinfo[ix].ci_size; 495 ii++; 496 } 497 ii->ii_ndisk = 0; 498 #ifdef DEBUG 499 if (ccddebug & CCDB_INIT) 500 printiinfo(cs->sc_itable); 501 #endif 502 return; 503 } 504 505 /* 506 * The following isn't fast or pretty; it doesn't have to be. 507 */ 508 size = 0; 509 bn = lbn = 0; 510 for (ii = cs->sc_itable; ; ii++) { 511 /* Allocate space for ii_index. */ 512 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 513 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 514 515 /* 516 * Locate the smallest of the remaining components 517 */ 518 smallci = NULL; 519 for (ci = cs->sc_cinfo; 520 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 521 if (ci->ci_size > size && 522 (smallci == NULL || 523 ci->ci_size < smallci->ci_size)) 524 smallci = ci; 525 526 /* 527 * Nobody left, all done 528 */ 529 if (smallci == NULL) { 530 ii->ii_ndisk = 0; 531 break; 532 } 533 534 /* 535 * Record starting logical block and component offset 536 */ 537 ii->ii_startblk = bn / cs->sc_ileave; 538 ii->ii_startoff = lbn; 539 540 /* 541 * Determine how many disks take part in this interleave 542 * and record their indices. 543 */ 544 ix = 0; 545 for (ci = cs->sc_cinfo; 546 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 547 if (ci->ci_size >= smallci->ci_size) 548 ii->ii_index[ix++] = ci - cs->sc_cinfo; 549 ii->ii_ndisk = ix; 550 bn += ix * (smallci->ci_size - size); 551 lbn = smallci->ci_size / cs->sc_ileave; 552 size = smallci->ci_size; 553 } 554 #ifdef DEBUG 555 if (ccddebug & CCDB_INIT) 556 printiinfo(cs->sc_itable); 557 #endif 558 } 559 560 /* ARGSUSED */ 561 static int 562 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 563 { 564 int unit = ccdunit(dev); 565 struct ccd_softc *cs; 566 struct disklabel *lp; 567 int error = 0, part, pmask; 568 569 #ifdef DEBUG 570 if (ccddebug & CCDB_FOLLOW) 571 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 572 #endif 573 if ((cs = ccdget(unit)) == NULL) 574 return ENXIO; 575 576 mutex_enter(&cs->sc_dvlock); 577 578 lp = cs->sc_dkdev.dk_label; 579 580 part = DISKPART(dev); 581 pmask = (1 << part); 582 583 /* 584 * If we're initialized, check to see if there are any other 585 * open partitions. If not, then it's safe to update 586 * the in-core disklabel. Only read the disklabel if it is 587 * not already valid. 588 */ 589 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 590 cs->sc_dkdev.dk_openmask == 0) 591 ccdgetdisklabel(dev); 592 593 /* Check that the partition exists. */ 594 if (part != RAW_PART) { 595 if (((cs->sc_flags & CCDF_INITED) == 0) || 596 ((part >= lp->d_npartitions) || 597 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 598 error = ENXIO; 599 goto done; 600 } 601 } 602 603 /* Prevent our unit from being unconfigured while open. */ 604 switch (fmt) { 605 case S_IFCHR: 606 cs->sc_dkdev.dk_copenmask |= pmask; 607 break; 608 609 case S_IFBLK: 610 cs->sc_dkdev.dk_bopenmask |= pmask; 611 break; 612 } 613 cs->sc_dkdev.dk_openmask = 614 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 615 616 done: 617 mutex_exit(&cs->sc_dvlock); 618 return (error); 619 } 620 621 /* ARGSUSED */ 622 static int 623 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 624 { 625 int unit = ccdunit(dev); 626 struct ccd_softc *cs; 627 int part; 628 629 #ifdef DEBUG 630 if (ccddebug & CCDB_FOLLOW) 631 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 632 #endif 633 634 if ((cs = ccdget(unit)) == NULL) 635 return ENXIO; 636 637 mutex_enter(&cs->sc_dvlock); 638 639 part = DISKPART(dev); 640 641 /* ...that much closer to allowing unconfiguration... */ 642 switch (fmt) { 643 case S_IFCHR: 644 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 645 break; 646 647 case S_IFBLK: 648 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 649 break; 650 } 651 cs->sc_dkdev.dk_openmask = 652 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 653 654 if (cs->sc_dkdev.dk_openmask == 0) { 655 if ((cs->sc_flags & CCDF_KLABEL) == 0) 656 cs->sc_flags &= ~CCDF_VLABEL; 657 } 658 659 mutex_exit(&cs->sc_dvlock); 660 return (0); 661 } 662 663 static bool 664 ccdbackoff(struct ccd_softc *cs) 665 { 666 667 /* XXX Arbitrary, should be a uvm call. */ 668 return uvmexp.free < (uvmexp.freemin >> 1) && 669 disk_isbusy(&cs->sc_dkdev); 670 } 671 672 static void 673 ccdthread(void *cookie) 674 { 675 struct ccd_softc *cs; 676 677 cs = cookie; 678 679 #ifdef DEBUG 680 if (ccddebug & CCDB_FOLLOW) 681 printf("ccdthread: hello\n"); 682 #endif 683 684 mutex_enter(cs->sc_iolock); 685 while (__predict_true(!cs->sc_zap)) { 686 if (bufq_peek(cs->sc_bufq) == NULL) { 687 /* Nothing to do. */ 688 cv_wait(&cs->sc_push, cs->sc_iolock); 689 continue; 690 } 691 if (ccdbackoff(cs)) { 692 /* Wait for memory to become available. */ 693 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 694 continue; 695 } 696 #ifdef DEBUG 697 if (ccddebug & CCDB_FOLLOW) 698 printf("ccdthread: dispatching I/O\n"); 699 #endif 700 ccdstart(cs); 701 mutex_enter(cs->sc_iolock); 702 } 703 cs->sc_thread = NULL; 704 mutex_exit(cs->sc_iolock); 705 #ifdef DEBUG 706 if (ccddebug & CCDB_FOLLOW) 707 printf("ccdthread: goodbye\n"); 708 #endif 709 kthread_exit(0); 710 } 711 712 static void 713 ccdstrategy(struct buf *bp) 714 { 715 int unit = ccdunit(bp->b_dev); 716 struct ccd_softc *cs; 717 if ((cs = ccdget(unit)) == NULL) 718 return; 719 720 /* Must be open or reading label. */ 721 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 722 (cs->sc_flags & CCDF_RLABEL) != 0); 723 724 mutex_enter(cs->sc_iolock); 725 /* Synchronize with device init/uninit. */ 726 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 727 mutex_exit(cs->sc_iolock); 728 #ifdef DEBUG 729 if (ccddebug & CCDB_FOLLOW) 730 printf("ccdstrategy: unit %d: not inited\n", unit); 731 #endif 732 bp->b_error = ENXIO; 733 bp->b_resid = bp->b_bcount; 734 biodone(bp); 735 return; 736 } 737 738 /* Defer to thread if system is low on memory. */ 739 bufq_put(cs->sc_bufq, bp); 740 if (__predict_false(ccdbackoff(cs))) { 741 mutex_exit(cs->sc_iolock); 742 #ifdef DEBUG 743 if (ccddebug & CCDB_FOLLOW) 744 printf("ccdstrategy: holding off on I/O\n"); 745 #endif 746 return; 747 } 748 ccdstart(cs); 749 } 750 751 static void 752 ccdstart(struct ccd_softc *cs) 753 { 754 daddr_t blkno; 755 int wlabel; 756 struct disklabel *lp; 757 long bcount, rcount; 758 struct ccdbuf *cbp; 759 char *addr; 760 daddr_t bn; 761 vnode_t *vp; 762 buf_t *bp; 763 764 KASSERT(mutex_owned(cs->sc_iolock)); 765 766 disk_busy(&cs->sc_dkdev); 767 bp = bufq_get(cs->sc_bufq); 768 KASSERT(bp != NULL); 769 770 #ifdef DEBUG 771 if (ccddebug & CCDB_FOLLOW) 772 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 773 #endif 774 775 /* If it's a nil transfer, wake up the top half now. */ 776 if (bp->b_bcount == 0) 777 goto done; 778 779 lp = cs->sc_dkdev.dk_label; 780 781 /* 782 * Do bounds checking and adjust transfer. If there's an 783 * error, the bounds check will flag that for us. Convert 784 * the partition relative block number to an absolute. 785 */ 786 blkno = bp->b_blkno; 787 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 788 if (DISKPART(bp->b_dev) != RAW_PART) { 789 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 790 goto done; 791 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 792 } 793 mutex_exit(cs->sc_iolock); 794 bp->b_rawblkno = blkno; 795 796 /* Allocate the component buffers and start I/O! */ 797 bp->b_resid = bp->b_bcount; 798 bn = bp->b_rawblkno; 799 addr = bp->b_data; 800 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 801 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 802 rcount = cbp->cb_buf.b_bcount; 803 bn += btodb(rcount); 804 addr += rcount; 805 vp = cbp->cb_buf.b_vp; 806 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 807 mutex_enter(vp->v_interlock); 808 vp->v_numoutput++; 809 mutex_exit(vp->v_interlock); 810 } 811 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 812 } 813 return; 814 815 done: 816 disk_unbusy(&cs->sc_dkdev, 0, 0); 817 cv_broadcast(&cs->sc_stop); 818 cv_broadcast(&cs->sc_push); 819 mutex_exit(cs->sc_iolock); 820 bp->b_resid = bp->b_bcount; 821 biodone(bp); 822 } 823 824 /* 825 * Build a component buffer header. 826 */ 827 static struct ccdbuf * 828 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 829 long bcount) 830 { 831 struct ccdcinfo *ci; 832 struct ccdbuf *cbp; 833 daddr_t cbn, cboff; 834 u_int64_t cbc; 835 int ccdisk; 836 837 #ifdef DEBUG 838 if (ccddebug & CCDB_IO) 839 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 840 cs, bp, bn, addr, bcount); 841 #endif 842 /* 843 * Determine which component bn falls in. 844 */ 845 cbn = bn; 846 cboff = 0; 847 848 /* 849 * Serially concatenated 850 */ 851 if (cs->sc_ileave == 0) { 852 daddr_t sblk; 853 854 sblk = 0; 855 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 856 cbn >= sblk + ci->ci_size; 857 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 858 sblk += ci->ci_size; 859 cbn -= sblk; 860 } 861 /* 862 * Interleaved 863 */ 864 else { 865 struct ccdiinfo *ii; 866 int off; 867 868 cboff = cbn % cs->sc_ileave; 869 cbn /= cs->sc_ileave; 870 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 871 if (ii->ii_startblk > cbn) 872 break; 873 ii--; 874 off = cbn - ii->ii_startblk; 875 if (ii->ii_ndisk == 1) { 876 ccdisk = ii->ii_index[0]; 877 cbn = ii->ii_startoff + off; 878 } else { 879 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 880 cbn = ii->ii_startoff + off / ii->ii_ndisk; 881 } 882 cbn *= cs->sc_ileave; 883 ci = &cs->sc_cinfo[ccdisk]; 884 } 885 886 /* 887 * Fill in the component buf structure. 888 */ 889 cbp = CCD_GETBUF(); 890 KASSERT(cbp != NULL); 891 buf_init(&cbp->cb_buf); 892 cbp->cb_buf.b_flags = bp->b_flags; 893 cbp->cb_buf.b_oflags = bp->b_oflags; 894 cbp->cb_buf.b_cflags = bp->b_cflags; 895 cbp->cb_buf.b_iodone = ccdiodone; 896 cbp->cb_buf.b_proc = bp->b_proc; 897 cbp->cb_buf.b_dev = ci->ci_dev; 898 cbp->cb_buf.b_blkno = cbn + cboff; 899 cbp->cb_buf.b_data = addr; 900 cbp->cb_buf.b_vp = ci->ci_vp; 901 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 902 if (cs->sc_ileave == 0) 903 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 904 else 905 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 906 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 907 908 /* 909 * context for ccdiodone 910 */ 911 cbp->cb_obp = bp; 912 cbp->cb_sc = cs; 913 cbp->cb_comp = ccdisk; 914 915 BIO_COPYPRIO(&cbp->cb_buf, bp); 916 917 #ifdef DEBUG 918 if (ccddebug & CCDB_IO) 919 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 920 " bcnt %d\n", 921 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 922 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 923 cbp->cb_buf.b_bcount); 924 #endif 925 926 return (cbp); 927 } 928 929 /* 930 * Called at interrupt time. 931 * Mark the component as done and if all components are done, 932 * take a ccd interrupt. 933 */ 934 static void 935 ccdiodone(struct buf *vbp) 936 { 937 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 938 struct buf *bp = cbp->cb_obp; 939 struct ccd_softc *cs = cbp->cb_sc; 940 int count; 941 942 #ifdef DEBUG 943 if (ccddebug & CCDB_FOLLOW) 944 printf("ccdiodone(%p)\n", cbp); 945 if (ccddebug & CCDB_IO) { 946 printf("ccdiodone: bp %p bcount %d resid %d\n", 947 bp, bp->b_bcount, bp->b_resid); 948 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 949 " bcnt %d\n", 950 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 951 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 952 cbp->cb_buf.b_bcount); 953 } 954 #endif 955 956 if (cbp->cb_buf.b_error != 0) { 957 bp->b_error = cbp->cb_buf.b_error; 958 printf("%s: error %d on component %d\n", 959 cs->sc_xname, bp->b_error, cbp->cb_comp); 960 } 961 count = cbp->cb_buf.b_bcount; 962 buf_destroy(&cbp->cb_buf); 963 CCD_PUTBUF(cbp); 964 965 /* 966 * If all done, "interrupt". 967 */ 968 mutex_enter(cs->sc_iolock); 969 bp->b_resid -= count; 970 if (bp->b_resid < 0) 971 panic("ccdiodone: count"); 972 if (bp->b_resid == 0) { 973 /* 974 * Request is done for better or worse, wakeup the top half. 975 */ 976 if (bp->b_error != 0) 977 bp->b_resid = bp->b_bcount; 978 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 979 (bp->b_flags & B_READ)); 980 if (!disk_isbusy(&cs->sc_dkdev)) { 981 if (bufq_peek(cs->sc_bufq) != NULL) { 982 cv_broadcast(&cs->sc_push); 983 } 984 cv_broadcast(&cs->sc_stop); 985 } 986 mutex_exit(cs->sc_iolock); 987 biodone(bp); 988 } else 989 mutex_exit(cs->sc_iolock); 990 } 991 992 /* ARGSUSED */ 993 static int 994 ccdread(dev_t dev, struct uio *uio, int flags) 995 { 996 int unit = ccdunit(dev); 997 struct ccd_softc *cs; 998 999 #ifdef DEBUG 1000 if (ccddebug & CCDB_FOLLOW) 1001 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1002 #endif 1003 if ((cs = ccdget(unit)) == NULL) 1004 return 0; 1005 1006 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1007 if ((cs->sc_flags & CCDF_INITED) == 0) 1008 return (ENXIO); 1009 1010 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1011 } 1012 1013 /* ARGSUSED */ 1014 static int 1015 ccdwrite(dev_t dev, struct uio *uio, int flags) 1016 { 1017 int unit = ccdunit(dev); 1018 struct ccd_softc *cs; 1019 1020 #ifdef DEBUG 1021 if (ccddebug & CCDB_FOLLOW) 1022 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1023 #endif 1024 if ((cs = ccdget(unit)) == NULL) 1025 return ENOENT; 1026 1027 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1028 if ((cs->sc_flags & CCDF_INITED) == 0) 1029 return (ENXIO); 1030 1031 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1032 } 1033 1034 static int 1035 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1036 { 1037 int unit = ccdunit(dev); 1038 int i, j, lookedup = 0, error = 0; 1039 int part, pmask; 1040 struct ccd_softc *cs; 1041 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1042 kauth_cred_t uc; 1043 char **cpp; 1044 struct pathbuf *pb; 1045 struct vnode **vpp; 1046 #ifdef __HAVE_OLD_DISKLABEL 1047 struct disklabel newlabel; 1048 #endif 1049 1050 if ((cs = ccdget(unit)) == NULL) 1051 return ENOENT; 1052 uc = kauth_cred_get(); 1053 1054 /* Must be open for writes for these commands... */ 1055 switch (cmd) { 1056 case CCDIOCSET: 1057 case CCDIOCCLR: 1058 case DIOCSDINFO: 1059 case DIOCWDINFO: 1060 #ifdef __HAVE_OLD_DISKLABEL 1061 case ODIOCSDINFO: 1062 case ODIOCWDINFO: 1063 #endif 1064 case DIOCKLABEL: 1065 case DIOCWLABEL: 1066 if ((flag & FWRITE) == 0) 1067 return (EBADF); 1068 } 1069 1070 mutex_enter(&cs->sc_dvlock); 1071 1072 /* Must be initialized for these... */ 1073 switch (cmd) { 1074 case CCDIOCCLR: 1075 case DIOCGDINFO: 1076 case DIOCCACHESYNC: 1077 case DIOCSDINFO: 1078 case DIOCWDINFO: 1079 case DIOCGPART: 1080 case DIOCWLABEL: 1081 case DIOCKLABEL: 1082 case DIOCGDEFLABEL: 1083 #ifdef __HAVE_OLD_DISKLABEL 1084 case ODIOCGDINFO: 1085 case ODIOCSDINFO: 1086 case ODIOCWDINFO: 1087 case ODIOCGDEFLABEL: 1088 #endif 1089 if ((cs->sc_flags & CCDF_INITED) == 0) { 1090 error = ENXIO; 1091 goto out; 1092 } 1093 } 1094 1095 switch (cmd) { 1096 case CCDIOCSET: 1097 if (cs->sc_flags & CCDF_INITED) { 1098 error = EBUSY; 1099 goto out; 1100 } 1101 1102 /* Validate the flags. */ 1103 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1104 error = EINVAL; 1105 goto out; 1106 } 1107 1108 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1109 ccio->ccio_ndisks == 0) { 1110 error = EINVAL; 1111 goto out; 1112 } 1113 1114 /* Fill in some important bits. */ 1115 cs->sc_ileave = ccio->ccio_ileave; 1116 cs->sc_nccdisks = ccio->ccio_ndisks; 1117 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1118 1119 /* 1120 * Allocate space for and copy in the array of 1121 * componet pathnames and device numbers. 1122 */ 1123 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1124 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1125 error = copyin(ccio->ccio_disks, cpp, 1126 ccio->ccio_ndisks * sizeof(*cpp)); 1127 if (error) { 1128 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1129 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1130 goto out; 1131 } 1132 1133 #ifdef DEBUG 1134 if (ccddebug & CCDB_INIT) 1135 for (i = 0; i < ccio->ccio_ndisks; ++i) 1136 printf("ccdioctl: component %d: %p\n", 1137 i, cpp[i]); 1138 #endif 1139 1140 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1141 #ifdef DEBUG 1142 if (ccddebug & CCDB_INIT) 1143 printf("ccdioctl: lookedup = %d\n", lookedup); 1144 #endif 1145 error = pathbuf_copyin(cpp[i], &pb); 1146 if (error == 0) { 1147 error = dk_lookup(pb, l, &vpp[i]); 1148 } 1149 pathbuf_destroy(pb); 1150 if (error != 0) { 1151 for (j = 0; j < lookedup; ++j) 1152 (void)vn_close(vpp[j], FREAD|FWRITE, 1153 uc); 1154 kmem_free(vpp, ccio->ccio_ndisks * 1155 sizeof(*vpp)); 1156 kmem_free(cpp, ccio->ccio_ndisks * 1157 sizeof(*cpp)); 1158 goto out; 1159 } 1160 ++lookedup; 1161 } 1162 1163 /* Attach the disk. */ 1164 disk_attach(&cs->sc_dkdev); 1165 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1166 1167 /* 1168 * Initialize the ccd. Fills in the softc for us. 1169 */ 1170 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1171 for (j = 0; j < lookedup; ++j) 1172 (void)vn_close(vpp[j], FREAD|FWRITE, 1173 uc); 1174 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1175 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1176 disk_detach(&cs->sc_dkdev); 1177 bufq_free(cs->sc_bufq); 1178 goto out; 1179 } 1180 1181 /* We can free the temporary variables now. */ 1182 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1183 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1184 1185 /* 1186 * The ccd has been successfully initialized, so 1187 * we can place it into the array. Don't try to 1188 * read the disklabel until the disk has been attached, 1189 * because space for the disklabel is allocated 1190 * in disk_attach(); 1191 */ 1192 ccio->ccio_unit = unit; 1193 ccio->ccio_size = cs->sc_size; 1194 1195 /* Try and read the disklabel. */ 1196 ccdgetdisklabel(dev); 1197 break; 1198 1199 case CCDIOCCLR: 1200 /* 1201 * Don't unconfigure if any other partitions are open 1202 * or if both the character and block flavors of this 1203 * partition are open. 1204 */ 1205 part = DISKPART(dev); 1206 pmask = (1 << part); 1207 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1208 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1209 (cs->sc_dkdev.dk_copenmask & pmask))) { 1210 error = EBUSY; 1211 goto out; 1212 } 1213 1214 /* Stop new I/O, wait for in-flight I/O to complete. */ 1215 mutex_enter(cs->sc_iolock); 1216 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1217 cs->sc_zap = true; 1218 while (disk_isbusy(&cs->sc_dkdev) || 1219 bufq_peek(cs->sc_bufq) != NULL || 1220 cs->sc_thread != NULL) { 1221 cv_broadcast(&cs->sc_push); 1222 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1223 } 1224 mutex_exit(cs->sc_iolock); 1225 1226 /* 1227 * Free ccd_softc information and clear entry. 1228 */ 1229 1230 /* Close the components and free their pathnames. */ 1231 for (i = 0; i < cs->sc_nccdisks; ++i) { 1232 /* 1233 * XXX: this close could potentially fail and 1234 * cause Bad Things. Maybe we need to force 1235 * the close to happen? 1236 */ 1237 #ifdef DEBUG 1238 if (ccddebug & CCDB_VNODE) 1239 vprint("CCDIOCCLR: vnode info", 1240 cs->sc_cinfo[i].ci_vp); 1241 #endif 1242 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1243 uc); 1244 kmem_free(cs->sc_cinfo[i].ci_path, 1245 cs->sc_cinfo[i].ci_pathlen); 1246 } 1247 1248 /* Free interleave index. */ 1249 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1250 kmem_free(cs->sc_itable[i].ii_index, 1251 cs->sc_itable[i].ii_indexsz); 1252 } 1253 1254 /* Free component info and interleave table. */ 1255 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1256 sizeof(struct ccdcinfo)); 1257 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1258 sizeof(struct ccdiinfo)); 1259 1260 /* Detatch the disk. */ 1261 disk_detach(&cs->sc_dkdev); 1262 bufq_free(cs->sc_bufq); 1263 ccdput(cs); 1264 /* Don't break, otherwise cs is read again. */ 1265 return 0; 1266 1267 case DIOCGDINFO: 1268 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label); 1269 break; 1270 1271 #ifdef __HAVE_OLD_DISKLABEL 1272 case ODIOCGDINFO: 1273 newlabel = *(cs->sc_dkdev.dk_label); 1274 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1275 return ENOTTY; 1276 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1277 break; 1278 #endif 1279 1280 case DIOCGPART: 1281 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label; 1282 ((struct partinfo *)data)->part = 1283 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1284 break; 1285 1286 case DIOCCACHESYNC: 1287 /* 1288 * XXX Do we really need to care about having a writable 1289 * file descriptor here? 1290 */ 1291 if ((flag & FWRITE) == 0) 1292 return (EBADF); 1293 1294 /* 1295 * We pass this call down to all components and report 1296 * the first error we encounter. 1297 */ 1298 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1299 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1300 flag, uc); 1301 if (j != 0 && error == 0) 1302 error = j; 1303 } 1304 break; 1305 1306 case DIOCWDINFO: 1307 case DIOCSDINFO: 1308 #ifdef __HAVE_OLD_DISKLABEL 1309 case ODIOCWDINFO: 1310 case ODIOCSDINFO: 1311 #endif 1312 { 1313 struct disklabel *lp; 1314 #ifdef __HAVE_OLD_DISKLABEL 1315 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1316 memset(&newlabel, 0, sizeof newlabel); 1317 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1318 lp = &newlabel; 1319 } else 1320 #endif 1321 lp = (struct disklabel *)data; 1322 1323 cs->sc_flags |= CCDF_LABELLING; 1324 1325 error = setdisklabel(cs->sc_dkdev.dk_label, 1326 lp, 0, cs->sc_dkdev.dk_cpulabel); 1327 if (error == 0) { 1328 if (cmd == DIOCWDINFO 1329 #ifdef __HAVE_OLD_DISKLABEL 1330 || cmd == ODIOCWDINFO 1331 #endif 1332 ) 1333 error = writedisklabel(CCDLABELDEV(dev), 1334 ccdstrategy, cs->sc_dkdev.dk_label, 1335 cs->sc_dkdev.dk_cpulabel); 1336 } 1337 1338 cs->sc_flags &= ~CCDF_LABELLING; 1339 break; 1340 } 1341 1342 case DIOCKLABEL: 1343 if (*(int *)data != 0) 1344 cs->sc_flags |= CCDF_KLABEL; 1345 else 1346 cs->sc_flags &= ~CCDF_KLABEL; 1347 break; 1348 1349 case DIOCWLABEL: 1350 if (*(int *)data != 0) 1351 cs->sc_flags |= CCDF_WLABEL; 1352 else 1353 cs->sc_flags &= ~CCDF_WLABEL; 1354 break; 1355 1356 case DIOCGDEFLABEL: 1357 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1358 break; 1359 1360 #ifdef __HAVE_OLD_DISKLABEL 1361 case ODIOCGDEFLABEL: 1362 ccdgetdefaultlabel(cs, &newlabel); 1363 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1364 return ENOTTY; 1365 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1366 break; 1367 #endif 1368 1369 default: 1370 error = ENOTTY; 1371 } 1372 1373 out: 1374 mutex_exit(&cs->sc_dvlock); 1375 return (error); 1376 } 1377 1378 static int 1379 ccdsize(dev_t dev) 1380 { 1381 struct ccd_softc *cs; 1382 struct disklabel *lp; 1383 int part, unit, omask, size; 1384 1385 unit = ccdunit(dev); 1386 if ((cs = ccdget(unit)) == NULL) 1387 return -1; 1388 1389 if ((cs->sc_flags & CCDF_INITED) == 0) 1390 return (-1); 1391 1392 part = DISKPART(dev); 1393 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1394 lp = cs->sc_dkdev.dk_label; 1395 1396 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1397 return (-1); 1398 1399 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1400 size = -1; 1401 else 1402 size = lp->d_partitions[part].p_size * 1403 (lp->d_secsize / DEV_BSIZE); 1404 1405 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1406 return (-1); 1407 1408 return (size); 1409 } 1410 1411 static void 1412 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1413 { 1414 struct ccdgeom *ccg = &cs->sc_geom; 1415 1416 memset(lp, 0, sizeof(*lp)); 1417 1418 lp->d_secperunit = cs->sc_size; 1419 lp->d_secsize = ccg->ccg_secsize; 1420 lp->d_nsectors = ccg->ccg_nsectors; 1421 lp->d_ntracks = ccg->ccg_ntracks; 1422 lp->d_ncylinders = ccg->ccg_ncylinders; 1423 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1424 1425 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1426 lp->d_type = DTYPE_CCD; 1427 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1428 lp->d_rpm = 3600; 1429 lp->d_interleave = 1; 1430 lp->d_flags = 0; 1431 1432 lp->d_partitions[RAW_PART].p_offset = 0; 1433 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1434 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1435 lp->d_npartitions = RAW_PART + 1; 1436 1437 lp->d_magic = DISKMAGIC; 1438 lp->d_magic2 = DISKMAGIC; 1439 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1440 } 1441 1442 /* 1443 * Read the disklabel from the ccd. If one is not present, fake one 1444 * up. 1445 */ 1446 static void 1447 ccdgetdisklabel(dev_t dev) 1448 { 1449 int unit = ccdunit(dev); 1450 struct ccd_softc *cs; 1451 const char *errstring; 1452 struct disklabel *lp; 1453 struct cpu_disklabel *clp; 1454 1455 if ((cs = ccdget(unit)) == NULL) 1456 return; 1457 lp = cs->sc_dkdev.dk_label; 1458 clp = cs->sc_dkdev.dk_cpulabel; 1459 KASSERT(mutex_owned(&cs->sc_dvlock)); 1460 1461 memset(clp, 0, sizeof(*clp)); 1462 1463 ccdgetdefaultlabel(cs, lp); 1464 1465 /* 1466 * Call the generic disklabel extraction routine. 1467 */ 1468 cs->sc_flags |= CCDF_RLABEL; 1469 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1470 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1471 else 1472 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1473 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1474 if (errstring) 1475 ccdmakedisklabel(cs); 1476 else { 1477 int i; 1478 struct partition *pp; 1479 1480 /* 1481 * Sanity check whether the found disklabel is valid. 1482 * 1483 * This is necessary since total size of ccd may vary 1484 * when an interleave is changed even though exactly 1485 * same componets are used, and old disklabel may used 1486 * if that is found. 1487 */ 1488 if (lp->d_secperunit != cs->sc_size) 1489 printf("WARNING: %s: " 1490 "total sector size in disklabel (%d) != " 1491 "the size of ccd (%lu)\n", cs->sc_xname, 1492 lp->d_secperunit, (u_long)cs->sc_size); 1493 for (i = 0; i < lp->d_npartitions; i++) { 1494 pp = &lp->d_partitions[i]; 1495 if (pp->p_offset + pp->p_size > cs->sc_size) 1496 printf("WARNING: %s: end of partition `%c' " 1497 "exceeds the size of ccd (%lu)\n", 1498 cs->sc_xname, 'a' + i, (u_long)cs->sc_size); 1499 } 1500 } 1501 1502 #ifdef DEBUG 1503 /* It's actually extremely common to have unlabeled ccds. */ 1504 if (ccddebug & CCDB_LABEL) 1505 if (errstring != NULL) 1506 printf("%s: %s\n", cs->sc_xname, errstring); 1507 #endif 1508 1509 /* In-core label now valid. */ 1510 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1511 } 1512 1513 /* 1514 * Take care of things one might want to take care of in the event 1515 * that a disklabel isn't present. 1516 */ 1517 static void 1518 ccdmakedisklabel(struct ccd_softc *cs) 1519 { 1520 struct disklabel *lp = cs->sc_dkdev.dk_label; 1521 1522 /* 1523 * For historical reasons, if there's no disklabel present 1524 * the raw partition must be marked FS_BSDFFS. 1525 */ 1526 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1527 1528 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1529 1530 lp->d_checksum = dkcksum(lp); 1531 } 1532 1533 #ifdef DEBUG 1534 static void 1535 printiinfo(struct ccdiinfo *ii) 1536 { 1537 int ix, i; 1538 1539 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1540 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1541 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1542 for (i = 0; i < ii->ii_ndisk; i++) 1543 printf(" %d", ii->ii_index[i]); 1544 printf("\n"); 1545 } 1546 } 1547 #endif 1548 1549 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1550 1551 static int 1552 ccd_modcmd(modcmd_t cmd, void *arg) 1553 { 1554 int error = 0; 1555 #ifdef _MODULE 1556 int bmajor = -1, cmajor = -1; 1557 #endif 1558 1559 1560 switch (cmd) { 1561 case MODULE_CMD_INIT: 1562 #ifdef _MODULE 1563 ccdattach(4); 1564 1565 return devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1566 &ccd_cdevsw, &cmajor); 1567 #endif 1568 break; 1569 1570 case MODULE_CMD_FINI: 1571 #ifdef _MODULE 1572 return devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1573 #endif 1574 break; 1575 1576 case MODULE_CMD_STAT: 1577 return ENOTTY; 1578 1579 default: 1580 return ENOTTY; 1581 } 1582 1583 return error; 1584 } 1585 1586 static int 1587 ccd_units_sysctl(SYSCTLFN_ARGS) 1588 { 1589 struct sysctlnode node; 1590 struct ccd_softc *sc; 1591 int error, i, nccd, *units; 1592 size_t size; 1593 1594 nccd = 0; 1595 mutex_enter(&ccd_lock); 1596 LIST_FOREACH(sc, &ccds, sc_link) 1597 nccd++; 1598 mutex_exit(&ccd_lock); 1599 1600 if (nccd != 0) { 1601 size = nccd * sizeof(*units); 1602 units = kmem_zalloc(size, KM_SLEEP); 1603 if (units == NULL) 1604 return ENOMEM; 1605 1606 i = 0; 1607 mutex_enter(&ccd_lock); 1608 LIST_FOREACH(sc, &ccds, sc_link) { 1609 if (i >= nccd) 1610 break; 1611 units[i] = sc->sc_unit; 1612 } 1613 mutex_exit(&ccd_lock); 1614 } else { 1615 units = NULL; 1616 size = 0; 1617 } 1618 1619 node = *rnode; 1620 node.sysctl_data = units; 1621 node.sysctl_size = size; 1622 1623 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1624 if (units) 1625 kmem_free(units, size); 1626 return error; 1627 } 1628 1629 static int 1630 ccd_info_sysctl(SYSCTLFN_ARGS) 1631 { 1632 struct sysctlnode node; 1633 struct ccddiskinfo ccd; 1634 struct ccd_softc *sc; 1635 int unit; 1636 1637 if (newp == NULL || newlen != sizeof(int)) 1638 return EINVAL; 1639 1640 unit = *(const int *)newp; 1641 newp = NULL; 1642 newlen = 0; 1643 ccd.ccd_ndisks = ~0; 1644 mutex_enter(&ccd_lock); 1645 LIST_FOREACH(sc, &ccds, sc_link) { 1646 if (sc->sc_unit == unit) { 1647 ccd.ccd_ileave = sc->sc_ileave; 1648 ccd.ccd_size = sc->sc_size; 1649 ccd.ccd_ndisks = sc->sc_nccdisks; 1650 ccd.ccd_flags = sc->sc_flags; 1651 break; 1652 } 1653 } 1654 mutex_exit(&ccd_lock); 1655 1656 if (ccd.ccd_ndisks == ~0) 1657 return ENOENT; 1658 1659 node = *rnode; 1660 node.sysctl_data = &ccd; 1661 node.sysctl_size = sizeof(ccd); 1662 1663 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1664 } 1665 1666 static int 1667 ccd_components_sysctl(SYSCTLFN_ARGS) 1668 { 1669 struct sysctlnode node; 1670 int error, unit; 1671 size_t size; 1672 char *names, *p, *ep; 1673 struct ccd_softc *sc; 1674 1675 if (newp == NULL || newlen != sizeof(int)) 1676 return EINVAL; 1677 1678 size = 0; 1679 unit = *(const int *)newp; 1680 newp = NULL; 1681 newlen = 0; 1682 mutex_enter(&ccd_lock); 1683 LIST_FOREACH(sc, &ccds, sc_link) 1684 if (sc->sc_unit == unit) { 1685 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1686 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1687 break; 1688 } 1689 mutex_exit(&ccd_lock); 1690 1691 if (size == 0) 1692 return ENOENT; 1693 names = kmem_zalloc(size, KM_SLEEP); 1694 if (names == NULL) 1695 return ENOMEM; 1696 1697 p = names; 1698 ep = names + size; 1699 mutex_enter(&ccd_lock); 1700 LIST_FOREACH(sc, &ccds, sc_link) 1701 if (sc->sc_unit == unit) { 1702 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1703 char *d = sc->sc_cinfo[i].ci_path; 1704 while (p < ep && (*p++ = *d++) != '\0') 1705 continue; 1706 } 1707 break; 1708 } 1709 mutex_exit(&ccd_lock); 1710 1711 node = *rnode; 1712 node.sysctl_data = names; 1713 node.sysctl_size = ep - names; 1714 1715 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1716 kmem_free(names, size); 1717 return error; 1718 } 1719 1720 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1721 { 1722 const struct sysctlnode *node = NULL; 1723 1724 sysctl_createv(clog, 0, NULL, &node, 1725 CTLFLAG_PERMANENT, 1726 CTLTYPE_NODE, "ccd", 1727 SYSCTL_DESCR("ConCatenated Disk state"), 1728 NULL, 0, NULL, 0, 1729 CTL_KERN, CTL_CREATE, CTL_EOL); 1730 1731 if (node == NULL) 1732 return; 1733 1734 sysctl_createv(clog, 0, &node, NULL, 1735 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1736 CTLTYPE_STRUCT, "units", 1737 SYSCTL_DESCR("List of ccd unit numbers"), 1738 ccd_units_sysctl, 0, NULL, 0, 1739 CTL_CREATE, CTL_EOL); 1740 sysctl_createv(clog, 0, &node, NULL, 1741 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1742 CTLTYPE_STRUCT, "info", 1743 SYSCTL_DESCR("Information about a CCD unit"), 1744 ccd_info_sysctl, 0, NULL, 0, 1745 CTL_CREATE, CTL_EOL); 1746 sysctl_createv(clog, 0, &node, NULL, 1747 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1748 CTLTYPE_STRUCT, "components", 1749 SYSCTL_DESCR("Information about CCD components"), 1750 ccd_components_sysctl, 0, NULL, 0, 1751 CTL_CREATE, CTL_EOL); 1752 } 1753