1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/ccdvar.h> 145 146 #include <vm/vm_zone.h> 147 148 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 149 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 150 151 #include <sys/thread2.h> 152 #include <sys/buf2.h> 153 #include <sys/mplock2.h> 154 155 #if defined(CCDDEBUG) && !defined(DEBUG) 156 #define DEBUG 157 #endif 158 159 #ifdef DEBUG 160 #define CCDB_FOLLOW 0x01 161 #define CCDB_INIT 0x02 162 #define CCDB_IO 0x04 163 #define CCDB_LABEL 0x08 164 #define CCDB_VNODE 0x10 165 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 166 CCDB_VNODE; 167 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 168 #undef DEBUG 169 #endif 170 171 #define ccdunit(x) dkunit(x) 172 #define ccdpart(x) dkpart(x) 173 174 /* 175 This is how mirroring works (only writes are special): 176 177 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 178 linked together by the cb_mirror field. "cb_pflags & 179 CCDPF_MIRROR_DONE" is set to 0 on both of them. 180 181 When a component returns to ccdiodone(), it checks if "cb_pflags & 182 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 183 flag and returns. If it is, it means its partner has already 184 returned, so it will go to the regular cleanup. 185 186 */ 187 188 struct ccdbuf { 189 struct buf cb_buf; /* new I/O buf */ 190 struct vnode *cb_vp; /* related vnode */ 191 struct bio *cb_obio; /* ptr. to original I/O buf */ 192 struct ccdbuf *cb_freenext; /* free list link */ 193 int cb_unit; /* target unit */ 194 int cb_comp; /* target component */ 195 int cb_pflags; /* mirror/parity status flag */ 196 struct ccdbuf *cb_mirror; /* mirror counterpart */ 197 }; 198 199 /* bits in cb_pflags */ 200 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 201 202 static d_open_t ccdopen; 203 static d_close_t ccdclose; 204 static d_strategy_t ccdstrategy; 205 static d_ioctl_t ccdioctl; 206 static d_dump_t ccddump; 207 208 #define NCCDFREEHIWAT 16 209 210 #define CDEV_MAJOR 74 211 212 static struct dev_ops ccd_ops = { 213 { "ccd", CDEV_MAJOR, D_DISK }, 214 .d_open = ccdopen, 215 .d_close = ccdclose, 216 .d_read = physread, 217 .d_write = physwrite, 218 .d_ioctl = ccdioctl, 219 .d_strategy = ccdstrategy, 220 .d_dump = ccddump 221 }; 222 223 /* called during module initialization */ 224 static void ccdattach (void); 225 static int ccddetach (void); 226 static int ccd_modevent (module_t, int, void *); 227 228 /* called by biodone() at interrupt time */ 229 static void ccdiodone (struct bio *bio); 230 231 static void ccdstart (struct ccd_softc *, struct bio *); 232 static void ccdinterleave (struct ccd_softc *, int); 233 static void ccdintr (struct ccd_softc *, struct bio *); 234 static int ccdinit (struct ccddevice *, char **, struct ucred *); 235 static int ccdlookup (char *, struct vnode **); 236 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 237 struct bio *, off_t, caddr_t, long); 238 static int ccdlock (struct ccd_softc *); 239 static void ccdunlock (struct ccd_softc *); 240 241 #ifdef DEBUG 242 static void printiinfo (struct ccdiinfo *); 243 #endif 244 245 /* Non-private for the benefit of libkvm. */ 246 struct ccd_softc *ccd_softc; 247 struct ccddevice *ccddevs; 248 struct ccdbuf *ccdfreebufs; 249 static int numccdfreebufs; 250 static int numccd = 0; 251 252 /* 253 * getccdbuf() - Allocate and zero a ccd buffer. 254 * 255 * This routine is called at splbio(). 256 */ 257 258 static __inline 259 struct ccdbuf * 260 getccdbuf(void) 261 { 262 struct ccdbuf *cbp; 263 264 /* 265 * Allocate from freelist or malloc as necessary 266 */ 267 if ((cbp = ccdfreebufs) != NULL) { 268 ccdfreebufs = cbp->cb_freenext; 269 --numccdfreebufs; 270 reinitbufbio(&cbp->cb_buf); 271 } else { 272 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 273 initbufbio(&cbp->cb_buf); 274 } 275 276 /* 277 * independant struct buf initialization 278 */ 279 buf_dep_init(&cbp->cb_buf); 280 BUF_LOCKINIT(&cbp->cb_buf); 281 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 282 BUF_KERNPROC(&cbp->cb_buf); 283 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 284 285 return(cbp); 286 } 287 288 /* 289 * putccdbuf() - Free a ccd buffer. 290 * 291 * This routine is called at splbio(). 292 */ 293 294 static __inline 295 void 296 putccdbuf(struct ccdbuf *cbp) 297 { 298 BUF_UNLOCK(&cbp->cb_buf); 299 BUF_LOCKFREE(&cbp->cb_buf); 300 301 if (numccdfreebufs < NCCDFREEHIWAT) { 302 cbp->cb_freenext = ccdfreebufs; 303 ccdfreebufs = cbp; 304 ++numccdfreebufs; 305 } else { 306 kfree((caddr_t)cbp, M_DEVBUF); 307 } 308 } 309 310 /* 311 * Called by main() during pseudo-device attachment. All we need 312 * to do is allocate enough space for devices to be configured later, and 313 * add devsw entries. 314 */ 315 static void 316 ccdattach(void) 317 { 318 struct disk_info info; 319 struct ccd_softc *cs; 320 int i; 321 int num = NCCD; 322 323 if (num > 1) 324 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 325 else 326 kprintf("ccd0: Concatenated disk driver\n"); 327 328 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 329 M_WAITOK | M_ZERO); 330 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 331 M_WAITOK | M_ZERO); 332 numccd = num; 333 334 /* 335 * With normal disk devices the open simply fails if the media 336 * is not present. With CCD we have to be able to open the 337 * raw disk to use the ioctl's to set it up, so create a dummy 338 * disk info structure so dscheck() doesn't blow up. 339 */ 340 bzero(&info, sizeof(info)); 341 info.d_media_blksize = DEV_BSIZE; 342 343 for (i = 0; i < numccd; ++i) { 344 cs = &ccd_softc[i]; 345 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 346 cs->sc_dev->si_drv1 = cs; 347 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 348 disk_setdiskinfo(&cs->sc_disk, &info); 349 } 350 } 351 352 static int 353 ccddetach(void) 354 { 355 struct ccd_softc *cs; 356 struct dev_ioctl_args ioctl_args; 357 int i; 358 int error = 0; 359 int eval; 360 361 bzero(&ioctl_args, sizeof(ioctl_args)); 362 363 for (i = 0; i < numccd; ++i) { 364 cs = &ccd_softc[i]; 365 if (cs->sc_dev == NULL) 366 continue; 367 ioctl_args.a_head.a_dev = cs->sc_dev; 368 ioctl_args.a_cmd = CCDIOCCLR; 369 ioctl_args.a_fflag = FWRITE; 370 eval = ccdioctl(&ioctl_args); 371 if (eval && eval != ENXIO) { 372 kprintf("ccd%d: In use, cannot detach\n", i); 373 error = EBUSY; 374 } 375 } 376 if (error == 0) { 377 for (i = 0; i < numccd; ++i) { 378 cs = &ccd_softc[i]; 379 if (cs->sc_dev == NULL) 380 continue; 381 disk_destroy(&cs->sc_disk); 382 cs->sc_dev = NULL; 383 } 384 if (ccd_softc) 385 kfree(ccd_softc, M_DEVBUF); 386 if (ccddevs) 387 kfree(ccddevs, M_DEVBUF); 388 } 389 return (error); 390 } 391 392 static int 393 ccd_modevent(module_t mod, int type, void *data) 394 { 395 int error = 0; 396 397 switch (type) { 398 case MOD_LOAD: 399 ccdattach(); 400 break; 401 402 case MOD_UNLOAD: 403 error = ccddetach(); 404 break; 405 406 default: /* MOD_SHUTDOWN etc */ 407 break; 408 } 409 return (error); 410 } 411 412 DEV_MODULE(ccd, ccd_modevent, NULL); 413 414 static int 415 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 416 { 417 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 418 struct ccdcinfo *ci = NULL; /* XXX */ 419 int ix; 420 struct vnode *vp; 421 u_int64_t skip; 422 u_int64_t size; 423 u_int64_t minsize; 424 int maxsecsize; 425 struct partinfo dpart; 426 struct ccdgeom *ccg = &cs->sc_geom; 427 char tmppath[MAXPATHLEN]; 428 int error = 0; 429 430 #ifdef DEBUG 431 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 432 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 433 #endif 434 435 cs->sc_size = 0; 436 cs->sc_ileave = ccd->ccd_interleave; 437 cs->sc_nccdisks = ccd->ccd_ndev; 438 439 /* Allocate space for the component info. */ 440 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 441 M_DEVBUF, M_WAITOK); 442 cs->sc_maxiosize = MAXPHYS; 443 444 /* 445 * Verify that each component piece exists and record 446 * relevant information about it. 447 */ 448 maxsecsize = 0; 449 minsize = 0; 450 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 451 vp = ccd->ccd_vpp[ix]; 452 ci = &cs->sc_cinfo[ix]; 453 ci->ci_vp = vp; 454 455 /* 456 * Copy in the pathname of the component. 457 */ 458 bzero(tmppath, sizeof(tmppath)); /* sanity */ 459 if ((error = copyinstr(cpaths[ix], tmppath, 460 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 461 #ifdef DEBUG 462 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 463 kprintf("ccd%d: can't copy path, error = %d\n", 464 ccd->ccd_unit, error); 465 #endif 466 goto fail; 467 } 468 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 469 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 470 471 ci->ci_dev = vn_todev(vp); 472 if (ci->ci_dev->si_iosize_max && 473 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 474 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 475 } 476 477 /* 478 * Get partition information for the component. 479 */ 480 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 481 cred, NULL); 482 if (error) { 483 #ifdef DEBUG 484 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 485 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 486 ccd->ccd_unit, ci->ci_path, error); 487 #endif 488 goto fail; 489 } 490 if (dpart.fstype != FS_CCD && 491 !kuuid_is_ccd(&dpart.fstype_uuid)) { 492 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 493 ccd->ccd_unit, ci->ci_path); 494 error = EFTYPE; 495 goto fail; 496 } 497 if (maxsecsize < dpart.media_blksize) 498 maxsecsize = dpart.media_blksize; 499 500 /* 501 * Skip a certain amount of storage at the beginning of 502 * the component to make sure we don't infringe on any 503 * reserved sectors. This is handled entirely by 504 * dpart.reserved_blocks but we also impose a minimum 505 * of 16 sectors for backwards compatibility. 506 */ 507 skip = 16; 508 if (skip < dpart.reserved_blocks) 509 skip = dpart.reserved_blocks; 510 size = dpart.media_blocks - skip; 511 512 /* 513 * Calculate the size, truncating to an interleave 514 * boundary if necessary. 515 */ 516 if (cs->sc_ileave > 1) 517 size -= size % cs->sc_ileave; 518 519 if ((int64_t)size <= 0) { 520 #ifdef DEBUG 521 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 522 kprintf("ccd%d: %s: size == 0\n", 523 ccd->ccd_unit, ci->ci_path); 524 #endif 525 error = ENODEV; 526 goto fail; 527 } 528 529 /* 530 * Calculate the smallest uniform component, used 531 * elsewhere. 532 */ 533 if (minsize == 0 || minsize > size) 534 minsize = size; 535 ci->ci_skip = skip; 536 ci->ci_size = size; 537 cs->sc_size += size; 538 } 539 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 540 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 541 542 /* 543 * Don't allow the interleave to be smaller than 544 * the biggest component sector. 545 */ 546 if ((cs->sc_ileave > 0) && 547 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 548 #ifdef DEBUG 549 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 550 kprintf("ccd%d: interleave must be at least %d\n", 551 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 552 #endif 553 error = EINVAL; 554 goto fail; 555 } 556 557 /* 558 * If uniform interleave is desired set all sizes to that of 559 * the smallest component. This will guarentee that a single 560 * interleave table is generated. 561 * 562 * Lost space must be taken into account when calculating the 563 * overall size. Half the space is lost when CCDF_MIRROR is 564 * specified. One disk is lost when CCDF_PARITY is specified. 565 */ 566 if (ccd->ccd_flags & CCDF_UNIFORM) { 567 for (ci = cs->sc_cinfo; 568 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 569 ci->ci_size = minsize; 570 } 571 if (ccd->ccd_flags & CCDF_MIRROR) { 572 /* 573 * Check to see if an even number of components 574 * have been specified. The interleave must also 575 * be non-zero in order for us to be able to 576 * guarentee the topology. 577 */ 578 if (cs->sc_nccdisks % 2) { 579 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 580 error = EINVAL; 581 goto fail; 582 } 583 if (cs->sc_ileave == 0) { 584 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 585 error = EINVAL; 586 goto fail; 587 } 588 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 589 } else if (ccd->ccd_flags & CCDF_PARITY) { 590 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 591 } else { 592 if (cs->sc_ileave == 0) { 593 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 594 error = EINVAL; 595 goto fail; 596 } 597 cs->sc_size = cs->sc_nccdisks * minsize; 598 } 599 } 600 601 /* 602 * Construct the interleave table. 603 */ 604 ccdinterleave(cs, ccd->ccd_unit); 605 606 /* 607 * Create pseudo-geometry based on 1MB cylinders. It's 608 * pretty close. 609 */ 610 ccg->ccg_secsize = maxsecsize; 611 ccg->ccg_ntracks = 1; 612 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 613 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 614 615 /* 616 * Add an devstat entry for this device. 617 */ 618 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 619 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 620 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 621 DEVSTAT_PRIORITY_ARRAY); 622 623 cs->sc_flags |= CCDF_INITED; 624 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 625 cs->sc_unit = ccd->ccd_unit; 626 return (0); 627 fail: 628 while (ci > cs->sc_cinfo) { 629 ci--; 630 kfree(ci->ci_path, M_DEVBUF); 631 } 632 kfree(cs->sc_cinfo, M_DEVBUF); 633 cs->sc_cinfo = NULL; 634 return (error); 635 } 636 637 static void 638 ccdinterleave(struct ccd_softc *cs, int unit) 639 { 640 struct ccdcinfo *ci, *smallci; 641 struct ccdiinfo *ii; 642 u_int64_t bn; 643 u_int64_t lbn; 644 u_int64_t size; 645 int icount; 646 int ix; 647 648 #ifdef DEBUG 649 if (ccddebug & CCDB_INIT) 650 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 651 #endif 652 653 /* 654 * Allocate an interleave table. The worst case occurs when each 655 * of N disks is of a different size, resulting in N interleave 656 * tables. 657 * 658 * Chances are this is too big, but we don't care. 659 */ 660 icount = cs->sc_nccdisks + 1; 661 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 662 M_DEVBUF, M_WAITOK|M_ZERO); 663 664 /* 665 * Trivial case: no interleave (actually interleave of disk size). 666 * Each table entry represents a single component in its entirety. 667 * 668 * An interleave of 0 may not be used with a mirror or parity setup. 669 */ 670 if (cs->sc_ileave == 0) { 671 bn = 0; 672 ii = cs->sc_itable; 673 674 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 675 /* Allocate space for ii_index. */ 676 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 677 ii->ii_ndisk = 1; 678 ii->ii_startblk = bn; 679 ii->ii_startoff = 0; 680 ii->ii_index[0] = ix; 681 bn += cs->sc_cinfo[ix].ci_size; 682 ii++; 683 } 684 ii->ii_ndisk = 0; 685 #ifdef DEBUG 686 if (ccddebug & CCDB_INIT) 687 printiinfo(cs->sc_itable); 688 #endif 689 return; 690 } 691 692 /* 693 * The following isn't fast or pretty; it doesn't have to be. 694 */ 695 size = 0; 696 bn = lbn = 0; 697 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 698 /* 699 * Allocate space for ii_index. We might allocate more then 700 * we use. 701 */ 702 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 703 M_DEVBUF, M_WAITOK); 704 705 /* 706 * Locate the smallest of the remaining components 707 */ 708 smallci = NULL; 709 ci = cs->sc_cinfo; 710 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 711 if (ci->ci_size > size && 712 (smallci == NULL || 713 ci->ci_size < smallci->ci_size)) { 714 smallci = ci; 715 } 716 ++ci; 717 } 718 719 /* 720 * Nobody left, all done 721 */ 722 if (smallci == NULL) { 723 ii->ii_ndisk = 0; 724 break; 725 } 726 727 /* 728 * Record starting logical block using an sc_ileave blocksize. 729 */ 730 ii->ii_startblk = bn / cs->sc_ileave; 731 732 /* 733 * Record starting component block using an sc_ileave 734 * blocksize. This value is relative to the beginning of 735 * a component disk. 736 */ 737 ii->ii_startoff = lbn; 738 739 /* 740 * Determine how many disks take part in this interleave 741 * and record their indices. 742 */ 743 ix = 0; 744 for (ci = cs->sc_cinfo; 745 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 746 if (ci->ci_size >= smallci->ci_size) { 747 ii->ii_index[ix++] = ci - cs->sc_cinfo; 748 } 749 } 750 ii->ii_ndisk = ix; 751 752 /* 753 * Adjust for loop 754 */ 755 bn += ix * (smallci->ci_size - size); 756 lbn = smallci->ci_size / cs->sc_ileave; 757 size = smallci->ci_size; 758 } 759 if (ii == &cs->sc_itable[icount]) 760 panic("ccdinterlave software bug! table exhausted"); 761 #ifdef DEBUG 762 if (ccddebug & CCDB_INIT) 763 printiinfo(cs->sc_itable); 764 #endif 765 } 766 767 /* ARGSUSED */ 768 static int 769 ccdopen(struct dev_open_args *ap) 770 { 771 cdev_t dev = ap->a_head.a_dev; 772 int unit = ccdunit(dev); 773 struct ccd_softc *cs; 774 int error = 0; 775 776 #ifdef DEBUG 777 if (ccddebug & CCDB_FOLLOW) 778 kprintf("ccdopen(%x, %x)\n", dev, flags); 779 #endif 780 if (unit >= numccd) 781 return (ENXIO); 782 cs = &ccd_softc[unit]; 783 784 if ((error = ccdlock(cs)) == 0) { 785 ccdunlock(cs); 786 } 787 return (error); 788 } 789 790 /* ARGSUSED */ 791 static int 792 ccdclose(struct dev_close_args *ap) 793 { 794 cdev_t dev = ap->a_head.a_dev; 795 int unit = ccdunit(dev); 796 struct ccd_softc *cs; 797 int error = 0; 798 799 #ifdef DEBUG 800 if (ccddebug & CCDB_FOLLOW) 801 kprintf("ccdclose(%x, %x)\n", dev, flags); 802 #endif 803 804 if (unit >= numccd) 805 return (ENXIO); 806 cs = &ccd_softc[unit]; 807 if ((error = ccdlock(cs)) == 0) { 808 ccdunlock(cs); 809 } 810 return (error); 811 } 812 813 static int 814 ccdstrategy(struct dev_strategy_args *ap) 815 { 816 cdev_t dev = ap->a_head.a_dev; 817 struct bio *bio = ap->a_bio; 818 int unit = ccdunit(dev); 819 struct bio *nbio; 820 struct buf *bp = bio->bio_buf; 821 struct ccd_softc *cs = &ccd_softc[unit]; 822 u_int64_t pbn; /* in sc_secsize chunks */ 823 u_int32_t sz; /* in sc_secsize chunks */ 824 825 #ifdef DEBUG 826 if (ccddebug & CCDB_FOLLOW) 827 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 828 #endif 829 if ((cs->sc_flags & CCDF_INITED) == 0) { 830 bp->b_error = ENXIO; 831 goto error; 832 } 833 834 /* If it's a nil transfer, wake up the top half now. */ 835 if (bp->b_bcount == 0) { 836 bp->b_resid = 0; 837 goto done; 838 } 839 840 /* 841 * Do bounds checking and adjust transfer. If there's an 842 * error, the bounds check will flag that for us. 843 */ 844 845 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 846 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 847 848 /* 849 * If out of bounds return an error. If the request goes 850 * past EOF, clip the request as appropriate. If exactly 851 * at EOF, return success (don't clip), but with 0 bytes 852 * of I/O. 853 * 854 * Mark EOF B_INVAL (just like bad), indicating that the 855 * contents of the buffer, if any, is invalid. 856 */ 857 if ((int64_t)pbn < 0) 858 goto bad; 859 if (pbn + sz > cs->sc_size) { 860 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 861 goto bad; 862 if (pbn == cs->sc_size) { 863 bp->b_resid = bp->b_bcount; 864 bp->b_flags |= B_INVAL; 865 goto done; 866 } 867 sz = (long)(cs->sc_size - pbn); 868 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 869 } 870 nbio = bio; 871 872 bp->b_resid = bp->b_bcount; 873 nbio->bio_driver_info = dev; 874 875 /* 876 * "Start" the unit. 877 */ 878 crit_enter(); 879 ccdstart(cs, nbio); 880 crit_exit(); 881 return(0); 882 883 /* 884 * note: bio, not nbio, is valid at the done label. 885 */ 886 bad: 887 bp->b_error = EINVAL; 888 error: 889 bp->b_resid = bp->b_bcount; 890 bp->b_flags |= B_ERROR | B_INVAL; 891 done: 892 biodone(bio); 893 return(0); 894 } 895 896 static void 897 ccdstart(struct ccd_softc *cs, struct bio *bio) 898 { 899 long bcount, rcount; 900 struct ccdbuf *cbp[4]; 901 struct buf *bp = bio->bio_buf; 902 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 903 caddr_t addr; 904 off_t doffset; 905 906 #ifdef DEBUG 907 if (ccddebug & CCDB_FOLLOW) 908 kprintf("ccdstart(%x, %x)\n", cs, bp); 909 #endif 910 911 /* Record the transaction start */ 912 devstat_start_transaction(&cs->device_stats); 913 914 /* 915 * Allocate component buffers and fire off the requests 916 */ 917 doffset = bio->bio_offset; 918 addr = bp->b_data; 919 920 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 921 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 922 rcount = cbp[0]->cb_buf.b_bcount; 923 924 if (cs->sc_cflags & CCDF_MIRROR) { 925 /* 926 * Mirroring. Writes go to both disks, reads are 927 * taken from whichever disk seems most appropriate. 928 * 929 * We attempt to localize reads to the disk whos arm 930 * is nearest the read request. We ignore seeks due 931 * to writes when making this determination and we 932 * also try to avoid hogging. 933 */ 934 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 935 vn_strategy(cbp[0]->cb_vp, 936 &cbp[0]->cb_buf.b_bio1); 937 vn_strategy(cbp[1]->cb_vp, 938 &cbp[1]->cb_buf.b_bio1); 939 } else { 940 int pick = cs->sc_pick; 941 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 942 if (doffset < cs->sc_blk[pick] - range || 943 doffset > cs->sc_blk[pick] + range 944 ) { 945 cs->sc_pick = pick = 1 - pick; 946 } 947 cs->sc_blk[pick] = doffset + rcount; 948 vn_strategy(cbp[pick]->cb_vp, 949 &cbp[pick]->cb_buf.b_bio1); 950 } 951 } else { 952 /* 953 * Not mirroring 954 */ 955 vn_strategy(cbp[0]->cb_vp, 956 &cbp[0]->cb_buf.b_bio1); 957 } 958 doffset += rcount; 959 addr += rcount; 960 } 961 } 962 963 /* 964 * Build a component buffer header. 965 */ 966 static void 967 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 968 off_t doffset, caddr_t addr, long bcount) 969 { 970 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 971 struct ccdbuf *cbp; 972 u_int64_t bn; 973 u_int64_t cbn; 974 u_int64_t cboff; 975 off_t cbc; 976 977 #ifdef DEBUG 978 if (ccddebug & CCDB_IO) 979 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 980 cs, bp, bn, addr, bcount); 981 #endif 982 /* 983 * Determine which component bn falls in. 984 */ 985 bn = doffset / cs->sc_geom.ccg_secsize; 986 cbn = bn; 987 cboff = 0; 988 989 if (cs->sc_ileave == 0) { 990 /* 991 * Serially concatenated and neither a mirror nor a parity 992 * config. This is a special case. 993 */ 994 daddr_t sblk; 995 996 sblk = 0; 997 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 998 sblk += ci->ci_size; 999 cbn -= sblk; 1000 } else { 1001 struct ccdiinfo *ii; 1002 int ccdisk, off; 1003 1004 /* 1005 * Calculate cbn, the logical superblock (sc_ileave chunks), 1006 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1007 * to cbn. 1008 */ 1009 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1010 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1011 1012 /* 1013 * Figure out which interleave table to use. 1014 */ 1015 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1016 if (ii->ii_startblk > cbn) 1017 break; 1018 } 1019 ii--; 1020 1021 /* 1022 * off is the logical superblock relative to the beginning 1023 * of this interleave block. 1024 */ 1025 off = cbn - ii->ii_startblk; 1026 1027 /* 1028 * We must calculate which disk component to use (ccdisk), 1029 * and recalculate cbn to be the superblock relative to 1030 * the beginning of the component. This is typically done by 1031 * adding 'off' and ii->ii_startoff together. However, 'off' 1032 * must typically be divided by the number of components in 1033 * this interleave array to be properly convert it from a 1034 * CCD-relative logical superblock number to a 1035 * component-relative superblock number. 1036 */ 1037 if (ii->ii_ndisk == 1) { 1038 /* 1039 * When we have just one disk, it can't be a mirror 1040 * or a parity config. 1041 */ 1042 ccdisk = ii->ii_index[0]; 1043 cbn = ii->ii_startoff + off; 1044 } else { 1045 if (cs->sc_cflags & CCDF_MIRROR) { 1046 /* 1047 * We have forced a uniform mapping, resulting 1048 * in a single interleave array. We double 1049 * up on the first half of the available 1050 * components and our mirror is in the second 1051 * half. This only works with a single 1052 * interleave array because doubling up 1053 * doubles the number of sectors, so there 1054 * cannot be another interleave array because 1055 * the next interleave array's calculations 1056 * would be off. 1057 */ 1058 int ndisk2 = ii->ii_ndisk / 2; 1059 ccdisk = ii->ii_index[off % ndisk2]; 1060 cbn = ii->ii_startoff + off / ndisk2; 1061 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1062 } else if (cs->sc_cflags & CCDF_PARITY) { 1063 /* 1064 * XXX not implemented yet 1065 */ 1066 int ndisk2 = ii->ii_ndisk - 1; 1067 ccdisk = ii->ii_index[off % ndisk2]; 1068 cbn = ii->ii_startoff + off / ndisk2; 1069 if (cbn % ii->ii_ndisk <= ccdisk) 1070 ccdisk++; 1071 } else { 1072 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1073 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1074 } 1075 } 1076 1077 ci = &cs->sc_cinfo[ccdisk]; 1078 1079 /* 1080 * Convert cbn from a superblock to a normal block so it 1081 * can be used to calculate (along with cboff) the normal 1082 * block index into this particular disk. 1083 */ 1084 cbn *= cs->sc_ileave; 1085 } 1086 1087 /* 1088 * Fill in the component buf structure. 1089 * 1090 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1091 * will be truncated on device EOF so we use b_bufsize to detect 1092 * the case. 1093 */ 1094 cbp = getccdbuf(); 1095 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1096 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1097 cbp->cb_buf.b_data = addr; 1098 cbp->cb_vp = ci->ci_vp; 1099 if (cs->sc_ileave == 0) 1100 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1101 else 1102 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1103 if (cbc > cs->sc_maxiosize) 1104 cbc = cs->sc_maxiosize; 1105 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1106 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1107 1108 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1109 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1110 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1111 1112 /* 1113 * context for ccdiodone 1114 */ 1115 cbp->cb_obio = bio; 1116 cbp->cb_unit = cs - ccd_softc; 1117 cbp->cb_comp = ci - cs->sc_cinfo; 1118 1119 #ifdef DEBUG 1120 if (ccddebug & CCDB_IO) 1121 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1122 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1123 cbp->cb_buf.b_bio1.bio_offset, 1124 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1125 #endif 1126 cb[0] = cbp; 1127 1128 /* 1129 * Note: both I/O's setup when reading from mirror, but only one 1130 * will be executed. 1131 */ 1132 if (cs->sc_cflags & CCDF_MIRROR) { 1133 /* mirror, setup second I/O */ 1134 cbp = getccdbuf(); 1135 1136 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1137 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1138 cbp->cb_buf.b_data = addr; 1139 cbp->cb_vp = ci2->ci_vp; 1140 if (cs->sc_ileave == 0) 1141 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1142 else 1143 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1144 if (cbc > cs->sc_maxiosize) 1145 cbc = cs->sc_maxiosize; 1146 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1147 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1148 1149 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1150 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1151 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1152 1153 /* 1154 * context for ccdiodone 1155 */ 1156 cbp->cb_obio = bio; 1157 cbp->cb_unit = cs - ccd_softc; 1158 cbp->cb_comp = ci2 - cs->sc_cinfo; 1159 cb[1] = cbp; 1160 /* link together the ccdbuf's and clear "mirror done" flag */ 1161 cb[0]->cb_mirror = cb[1]; 1162 cb[1]->cb_mirror = cb[0]; 1163 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1164 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1165 } 1166 } 1167 1168 static void 1169 ccdintr(struct ccd_softc *cs, struct bio *bio) 1170 { 1171 struct buf *bp = bio->bio_buf; 1172 1173 #ifdef DEBUG 1174 if (ccddebug & CCDB_FOLLOW) 1175 kprintf("ccdintr(%x, %x)\n", cs, bp); 1176 #endif 1177 /* 1178 * Request is done for better or worse, wakeup the top half. 1179 */ 1180 if (bp->b_flags & B_ERROR) 1181 bp->b_resid = bp->b_bcount; 1182 devstat_end_transaction_buf(&cs->device_stats, bp); 1183 biodone(bio); 1184 } 1185 1186 /* 1187 * Called at interrupt time. 1188 * 1189 * Mark the component as done and if all components are done, 1190 * take a ccd interrupt. 1191 */ 1192 static void 1193 ccdiodone(struct bio *bio) 1194 { 1195 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1196 struct bio *obio = cbp->cb_obio; 1197 struct buf *obp = obio->bio_buf; 1198 int unit = cbp->cb_unit; 1199 int count; 1200 1201 /* 1202 * Since we do not have exclusive access to underlying devices, 1203 * we can't keep cache translations around. 1204 */ 1205 clearbiocache(bio->bio_next); 1206 1207 get_mplock(); 1208 crit_enter(); 1209 #ifdef DEBUG 1210 if (ccddebug & CCDB_FOLLOW) 1211 kprintf("ccdiodone(%x)\n", cbp); 1212 if (ccddebug & CCDB_IO) { 1213 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1214 obp, obp->b_bcount, obp->b_resid); 1215 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1216 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1217 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1218 cbp->cb_buf.b_bcount); 1219 } 1220 #endif 1221 1222 /* 1223 * If an error occured, report it. If this is a mirrored 1224 * configuration and the first of two possible reads, do not 1225 * set the error in the bp yet because the second read may 1226 * succeed. 1227 */ 1228 if (cbp->cb_buf.b_flags & B_ERROR) { 1229 const char *msg = ""; 1230 1231 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1232 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1233 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1234 /* 1235 * We will try our read on the other disk down 1236 * below, also reverse the default pick so if we 1237 * are doing a scan we do not keep hitting the 1238 * bad disk first. 1239 */ 1240 struct ccd_softc *cs = &ccd_softc[unit]; 1241 1242 msg = ", trying other disk"; 1243 cs->sc_pick = 1 - cs->sc_pick; 1244 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1245 } else { 1246 obp->b_flags |= B_ERROR; 1247 obp->b_error = cbp->cb_buf.b_error ? 1248 cbp->cb_buf.b_error : EIO; 1249 } 1250 kprintf("ccd%d: error %d on component %d " 1251 "offset %jd (ccd offset %jd)%s\n", 1252 unit, obp->b_error, cbp->cb_comp, 1253 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1254 (intmax_t)obio->bio_offset, 1255 msg); 1256 } 1257 1258 /* 1259 * Process mirror. If we are writing, I/O has been initiated on both 1260 * buffers and we fall through only after both are finished. 1261 * 1262 * If we are reading only one I/O is initiated at a time. If an 1263 * error occurs we initiate the second I/O and return, otherwise 1264 * we free the second I/O without initiating it. 1265 */ 1266 1267 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1268 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1269 /* 1270 * When writing, handshake with the second buffer 1271 * to determine when both are done. If both are not 1272 * done, return here. 1273 */ 1274 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1275 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1276 putccdbuf(cbp); 1277 crit_exit(); 1278 rel_mplock(); 1279 return; 1280 } 1281 } else { 1282 /* 1283 * When reading, either dispose of the second buffer 1284 * or initiate I/O on the second buffer if an error 1285 * occured with this one. 1286 */ 1287 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1288 if (cbp->cb_buf.b_flags & B_ERROR) { 1289 cbp->cb_mirror->cb_pflags |= 1290 CCDPF_MIRROR_DONE; 1291 vn_strategy( 1292 cbp->cb_mirror->cb_vp, 1293 &cbp->cb_mirror->cb_buf.b_bio1 1294 ); 1295 putccdbuf(cbp); 1296 crit_exit(); 1297 rel_mplock(); 1298 return; 1299 } else { 1300 putccdbuf(cbp->cb_mirror); 1301 /* fall through */ 1302 } 1303 } 1304 } 1305 } 1306 1307 /* 1308 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1309 */ 1310 count = cbp->cb_buf.b_bufsize; 1311 putccdbuf(cbp); 1312 1313 /* 1314 * If all done, "interrupt". 1315 */ 1316 obp->b_resid -= count; 1317 if (obp->b_resid < 0) 1318 panic("ccdiodone: count"); 1319 if (obp->b_resid == 0) 1320 ccdintr(&ccd_softc[unit], obio); 1321 crit_exit(); 1322 rel_mplock(); 1323 } 1324 1325 static int 1326 ccdioctl(struct dev_ioctl_args *ap) 1327 { 1328 cdev_t dev = ap->a_head.a_dev; 1329 int unit = ccdunit(dev); 1330 int i, j, lookedup = 0, error = 0; 1331 struct ccd_softc *cs; 1332 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1333 struct ccddevice ccd; 1334 struct disk_info info; 1335 char **cpp; 1336 struct vnode **vpp; 1337 1338 if (unit >= numccd) 1339 return (ENXIO); 1340 cs = &ccd_softc[unit]; 1341 1342 bzero(&ccd, sizeof(ccd)); 1343 1344 switch (ap->a_cmd) { 1345 case CCDIOCSET: 1346 if (cs->sc_flags & CCDF_INITED) 1347 return (EBUSY); 1348 1349 if ((ap->a_fflag & FWRITE) == 0) 1350 return (EBADF); 1351 1352 if ((error = ccdlock(cs)) != 0) 1353 return (error); 1354 1355 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1356 ccdunlock(cs); 1357 return (EINVAL); 1358 } 1359 1360 /* Fill in some important bits. */ 1361 ccd.ccd_unit = unit; 1362 ccd.ccd_interleave = ccio->ccio_ileave; 1363 if (ccd.ccd_interleave == 0 && 1364 ((ccio->ccio_flags & CCDF_MIRROR) || 1365 (ccio->ccio_flags & CCDF_PARITY))) { 1366 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1367 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1368 } 1369 if ((ccio->ccio_flags & CCDF_MIRROR) && 1370 (ccio->ccio_flags & CCDF_PARITY)) { 1371 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1372 ccio->ccio_flags &= ~CCDF_PARITY; 1373 } 1374 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1375 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1376 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1377 unit); 1378 ccio->ccio_flags |= CCDF_UNIFORM; 1379 } 1380 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1381 1382 /* 1383 * Allocate space for and copy in the array of 1384 * componet pathnames and device numbers. 1385 */ 1386 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1387 M_DEVBUF, M_WAITOK); 1388 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1389 M_DEVBUF, M_WAITOK); 1390 1391 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1392 ccio->ccio_ndisks * sizeof(char **)); 1393 if (error) { 1394 kfree(vpp, M_DEVBUF); 1395 kfree(cpp, M_DEVBUF); 1396 ccdunlock(cs); 1397 return (error); 1398 } 1399 1400 #ifdef DEBUG 1401 if (ccddebug & CCDB_INIT) { 1402 for (i = 0; i < ccio->ccio_ndisks; ++i) 1403 kprintf("ccdioctl: component %d: 0x%x\n", 1404 i, cpp[i]); 1405 } 1406 #endif 1407 1408 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1409 #ifdef DEBUG 1410 if (ccddebug & CCDB_INIT) 1411 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1412 #endif 1413 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1414 for (j = 0; j < lookedup; ++j) 1415 (void)vn_close(vpp[j], FREAD|FWRITE); 1416 kfree(vpp, M_DEVBUF); 1417 kfree(cpp, M_DEVBUF); 1418 ccdunlock(cs); 1419 return (error); 1420 } 1421 ++lookedup; 1422 } 1423 ccd.ccd_cpp = cpp; 1424 ccd.ccd_vpp = vpp; 1425 ccd.ccd_ndev = ccio->ccio_ndisks; 1426 1427 /* 1428 * Initialize the ccd. Fills in the softc for us. 1429 */ 1430 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1431 for (j = 0; j < lookedup; ++j) 1432 (void)vn_close(vpp[j], FREAD|FWRITE); 1433 kfree(vpp, M_DEVBUF); 1434 kfree(cpp, M_DEVBUF); 1435 ccdunlock(cs); 1436 return (error); 1437 } 1438 1439 /* 1440 * The ccd has been successfully initialized, so 1441 * we can place it into the array and read the disklabel. 1442 */ 1443 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1444 ccio->ccio_unit = unit; 1445 ccio->ccio_size = cs->sc_size; 1446 1447 bzero(&info, sizeof(info)); 1448 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1449 info.d_media_blocks = cs->sc_size; 1450 info.d_nheads = cs->sc_geom.ccg_ntracks; 1451 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1452 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1453 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1454 1455 /* 1456 * For cases where a label is directly applied to the ccd, 1457 * without slices, DSO_COMPATMBR forces one sector be 1458 * reserved for backwards compatibility. 1459 */ 1460 info.d_dsflags = DSO_COMPATMBR; 1461 disk_setdiskinfo(&cs->sc_disk, &info); 1462 1463 ccdunlock(cs); 1464 1465 break; 1466 1467 case CCDIOCCLR: 1468 if ((cs->sc_flags & CCDF_INITED) == 0) 1469 return (ENXIO); 1470 1471 if ((ap->a_fflag & FWRITE) == 0) 1472 return (EBADF); 1473 1474 if ((error = ccdlock(cs)) != 0) 1475 return (error); 1476 1477 if (dev_drefs(cs->sc_dev) > 1) { 1478 ccdunlock(cs); 1479 return (EBUSY); 1480 } 1481 1482 /* 1483 * Free ccd_softc information and clear entry. 1484 */ 1485 1486 /* Close the components and free their pathnames. */ 1487 for (i = 0; i < cs->sc_nccdisks; ++i) { 1488 /* 1489 * XXX: this close could potentially fail and 1490 * cause Bad Things. Maybe we need to force 1491 * the close to happen? 1492 */ 1493 #ifdef DEBUG 1494 if (ccddebug & CCDB_VNODE) 1495 vprint("CCDIOCCLR: vnode info", 1496 cs->sc_cinfo[i].ci_vp); 1497 #endif 1498 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1499 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1500 } 1501 1502 /* Free interleave index. */ 1503 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1504 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1505 1506 /* Free component info and interleave table. */ 1507 kfree(cs->sc_cinfo, M_DEVBUF); 1508 kfree(cs->sc_itable, M_DEVBUF); 1509 cs->sc_cinfo = NULL; 1510 cs->sc_itable = NULL; 1511 cs->sc_flags &= ~CCDF_INITED; 1512 1513 /* 1514 * Free ccddevice information and clear entry. 1515 */ 1516 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1517 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1518 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1519 1520 /* 1521 * And remove the devstat entry. 1522 */ 1523 devstat_remove_entry(&cs->device_stats); 1524 1525 /* This must be atomic. */ 1526 crit_enter(); 1527 ccdunlock(cs); 1528 crit_exit(); 1529 1530 break; 1531 1532 default: 1533 return (ENOTTY); 1534 } 1535 1536 return (0); 1537 } 1538 1539 static int 1540 ccddump(struct dev_dump_args *ap) 1541 { 1542 /* Not implemented. */ 1543 return ENXIO; 1544 } 1545 1546 /* 1547 * Lookup the provided name in the filesystem. If the file exists, 1548 * is a valid block device, and isn't being used by anyone else, 1549 * set *vpp to the file's vnode. 1550 */ 1551 static int 1552 ccdlookup(char *path, struct vnode **vpp) 1553 { 1554 struct nlookupdata nd; 1555 struct vnode *vp; 1556 int error; 1557 1558 *vpp = NULL; 1559 1560 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1561 if (error) 1562 return (error); 1563 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1564 #ifdef DEBUG 1565 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1566 kprintf("ccdlookup: vn_open error = %d\n", error); 1567 #endif 1568 goto done; 1569 } 1570 vp = nd.nl_open_vp; 1571 1572 if (vp->v_opencount > 1) { 1573 error = EBUSY; 1574 goto done; 1575 } 1576 1577 if (!vn_isdisk(vp, &error)) 1578 goto done; 1579 1580 #ifdef DEBUG 1581 if (ccddebug & CCDB_VNODE) 1582 vprint("ccdlookup: vnode info", vp); 1583 #endif 1584 1585 vn_unlock(vp); 1586 nd.nl_open_vp = NULL; 1587 nlookup_done(&nd); 1588 *vpp = vp; /* leave ref intact */ 1589 return (0); 1590 done: 1591 nlookup_done(&nd); 1592 return (error); 1593 } 1594 1595 /* 1596 * Wait interruptibly for an exclusive lock. 1597 * 1598 * XXX 1599 * Several drivers do this; it should be abstracted and made MP-safe. 1600 */ 1601 static int 1602 ccdlock(struct ccd_softc *cs) 1603 { 1604 int error; 1605 1606 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1607 cs->sc_flags |= CCDF_WANTED; 1608 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1609 return (error); 1610 } 1611 cs->sc_flags |= CCDF_LOCKED; 1612 return (0); 1613 } 1614 1615 /* 1616 * Unlock and wake up any waiters. 1617 */ 1618 static void 1619 ccdunlock(struct ccd_softc *cs) 1620 { 1621 1622 cs->sc_flags &= ~CCDF_LOCKED; 1623 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1624 cs->sc_flags &= ~CCDF_WANTED; 1625 wakeup(cs); 1626 } 1627 } 1628 1629 #ifdef DEBUG 1630 static void 1631 printiinfo(struct ccdiinfo *ii) 1632 { 1633 int ix, i; 1634 1635 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1636 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1637 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1638 for (i = 0; i < ii->ii_ndisk; i++) 1639 kprintf(" %d", ii->ii_index[i]); 1640 kprintf("\n"); 1641 } 1642 } 1643 #endif 1644 1645 1646 /* Local Variables: */ 1647 /* c-argdecl-indent: 8 */ 1648 /* c-continued-statement-offset: 8 */ 1649 /* c-indent-level: 8 */ 1650 /* End: */ 1651