1 /* $NetBSD: vnd.c,v 1.44 1997/06/26 16:28:37 kleink Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE 30 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.6 (Berkeley) 4/1/94 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/namei.h> 103 #include <sys/proc.h> 104 #include <sys/errno.h> 105 #include <sys/buf.h> 106 #include <sys/malloc.h> 107 #include <sys/ioctl.h> 108 #include <sys/disklabel.h> 109 #include <sys/device.h> 110 #include <sys/disk.h> 111 #include <sys/stat.h> 112 #include <sys/mount.h> 113 #include <sys/vnode.h> 114 #include <sys/file.h> 115 #include <sys/uio.h> 116 #include <sys/conf.h> 117 118 #include <miscfs/specfs/specdev.h> 119 120 #include <dev/vndvar.h> 121 122 #if defined(VNDDEBUG) && !defined(DEBUG) 123 #define DEBUG 124 #endif 125 126 #ifdef DEBUG 127 int dovndcluster = 1; 128 #define VDB_FOLLOW 0x01 129 #define VDB_INIT 0x02 130 #define VDB_IO 0x04 131 #define VDB_LABEL 0x08 132 int vnddebug = 0x00; 133 #endif 134 135 #define b_cylin b_resid 136 137 #define vndunit(x) DISKUNIT(x) 138 139 struct vndxfer { 140 struct buf *vx_bp; /* Pointer to parent buffer */ 141 int vx_error; 142 int vx_pending; /* # of pending aux buffers */ 143 }; 144 145 struct vndbuf { 146 struct buf vb_buf; 147 struct vndxfer *vb_xfer; 148 }; 149 150 #define getvndxfer() \ 151 ((struct vndxfer *)malloc(sizeof(struct vndxfer), M_DEVBUF, M_WAITOK)) 152 #define putvndxfer(vnx) \ 153 free((caddr_t)(vnx), M_DEVBUF) 154 #define getvndbuf() \ 155 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 156 #define putvndbuf(vbp) \ 157 free((caddr_t)(vbp), M_DEVBUF) 158 159 struct vnd_softc *vnd_softc; 160 int numvnd = 0; 161 162 #define VNDLABELDEV(dev) \ 163 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 164 165 /* called by main() at boot time */ 166 void vndattach __P((int)); 167 168 void vndclear __P((struct vnd_softc *)); 169 void vndstart __P((struct vnd_softc *)); 170 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 171 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 172 void vndiodone __P((struct buf *)); 173 void vndshutdown __P((void)); 174 175 void vndgetdisklabel __P((dev_t)); 176 177 static int vndlock __P((struct vnd_softc *)); 178 static void vndunlock __P((struct vnd_softc *)); 179 180 void 181 vndattach(num) 182 int num; 183 { 184 char *mem; 185 register u_long size; 186 187 if (num <= 0) 188 return; 189 size = num * sizeof(struct vnd_softc); 190 mem = malloc(size, M_DEVBUF, M_NOWAIT); 191 if (mem == NULL) { 192 printf("WARNING: no memory for vnode disks\n"); 193 return; 194 } 195 bzero(mem, size); 196 vnd_softc = (struct vnd_softc *)mem; 197 numvnd = num; 198 } 199 200 int 201 vndopen(dev, flags, mode, p) 202 dev_t dev; 203 int flags, mode; 204 struct proc *p; 205 { 206 int unit = vndunit(dev); 207 struct vnd_softc *sc; 208 int error = 0, part, pmask; 209 struct disklabel *lp; 210 211 #ifdef DEBUG 212 if (vnddebug & VDB_FOLLOW) 213 printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 214 #endif 215 if (unit >= numvnd) 216 return (ENXIO); 217 sc = &vnd_softc[unit]; 218 219 if ((error = vndlock(sc)) != 0) 220 return (error); 221 222 lp = sc->sc_dkdev.dk_label; 223 224 part = DISKPART(dev); 225 pmask = (1 << part); 226 227 /* 228 * If we're initialized, check to see if there are any other 229 * open partitions. If not, then it's safe to update the 230 * in-core disklabel. 231 */ 232 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 233 vndgetdisklabel(dev); 234 235 /* Check that the partitions exists. */ 236 if (part != RAW_PART) { 237 if (((sc->sc_flags & VNF_INITED) == 0) || 238 ((part >= lp->d_npartitions) || 239 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 240 error = ENXIO; 241 goto done; 242 } 243 } 244 245 /* Prevent our unit from being unconfigured while open. */ 246 switch (mode) { 247 case S_IFCHR: 248 sc->sc_dkdev.dk_copenmask |= pmask; 249 break; 250 251 case S_IFBLK: 252 sc->sc_dkdev.dk_bopenmask |= pmask; 253 break; 254 } 255 sc->sc_dkdev.dk_openmask = 256 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 257 258 done: 259 vndunlock(sc); 260 return (error); 261 } 262 263 int 264 vndclose(dev, flags, mode, p) 265 dev_t dev; 266 int flags, mode; 267 struct proc *p; 268 { 269 int unit = vndunit(dev); 270 struct vnd_softc *sc; 271 int error = 0, part; 272 273 #ifdef DEBUG 274 if (vnddebug & VDB_FOLLOW) 275 printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 276 #endif 277 278 if (unit >= numvnd) 279 return (ENXIO); 280 sc = &vnd_softc[unit]; 281 282 if ((error = vndlock(sc)) != 0) 283 return (error); 284 285 part = DISKPART(dev); 286 287 /* ...that much closer to allowing unconfiguration... */ 288 switch (mode) { 289 case S_IFCHR: 290 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 291 break; 292 293 case S_IFBLK: 294 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 295 break; 296 } 297 sc->sc_dkdev.dk_openmask = 298 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 299 300 vndunlock(sc); 301 return (0); 302 } 303 304 /* 305 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 306 */ 307 void 308 vndstrategy(bp) 309 register struct buf *bp; 310 { 311 int unit = vndunit(bp->b_dev); 312 struct vnd_softc *vnd = &vnd_softc[unit]; 313 struct vndbuf *nbp; 314 struct vndxfer *vnx; 315 int bn, bsize, resid; 316 caddr_t addr; 317 int sz, flags, error, wlabel; 318 struct disklabel *lp; 319 struct partition *pp; 320 321 #ifdef DEBUG 322 if (vnddebug & VDB_FOLLOW) 323 printf("vndstrategy(%p): unit %d\n", bp, unit); 324 #endif 325 if ((vnd->sc_flags & VNF_INITED) == 0) { 326 bp->b_error = ENXIO; 327 bp->b_flags |= B_ERROR; 328 goto done; 329 } 330 331 /* If it's a nil transfer, wake up the top half now. */ 332 if (bp->b_bcount == 0) 333 goto done; 334 335 lp = vnd->sc_dkdev.dk_label; 336 337 /* 338 * The transfer must be a whole number of blocks. 339 */ 340 if ((bp->b_bcount % lp->d_secsize) != 0) { 341 bp->b_error = EINVAL; 342 bp->b_flags |= B_ERROR; 343 goto done; 344 } 345 346 /* 347 * Do bounds checking and adjust transfer. If there's an error, 348 * the bounds check will flag that for us. 349 */ 350 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 351 if (DISKPART(bp->b_dev) != RAW_PART) 352 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 353 goto done; 354 355 bp->b_resid = bp->b_bcount; 356 357 /* 358 * Put the block number in terms of the logical blocksize 359 * of the "device". 360 */ 361 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 362 363 /* 364 * Translate the partition-relative block number to an absolute. 365 */ 366 if (DISKPART(bp->b_dev) != RAW_PART) { 367 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 368 bn += pp->p_offset; 369 } 370 371 /* ...and convert to a byte offset within the file. */ 372 bn *= lp->d_secsize; 373 374 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 375 addr = bp->b_data; 376 flags = bp->b_flags | B_CALL; 377 378 /* Allocate a header for this transfer and link it to the buffer */ 379 vnx = getvndxfer(); 380 vnx->vx_error = 0; 381 vnx->vx_pending = 0; 382 vnx->vx_bp = bp; 383 384 for (resid = bp->b_resid; resid; resid -= sz) { 385 struct vnode *vp; 386 daddr_t nbn; 387 int off, s, nra; 388 389 nra = 0; 390 VOP_LOCK(vnd->sc_vp); 391 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 392 VOP_UNLOCK(vnd->sc_vp); 393 394 if (error == 0 && (long)nbn == -1) 395 error = EIO; 396 397 /* 398 * If there was an error or a hole in the file...punt. 399 * Note that we may have to wait for any operations 400 * that we have already fired off before releasing 401 * the buffer. 402 * 403 * XXX we could deal with holes here but it would be 404 * a hassle (in the write case). 405 */ 406 if (error) { 407 vnx->vx_error = error; 408 s = splbio(); 409 if (vnx->vx_pending == 0) { 410 bp->b_error = error; 411 bp->b_flags |= B_ERROR; 412 putvndxfer(vnx); 413 goto done; 414 } 415 splx(s); 416 return; 417 } 418 419 #ifdef DEBUG 420 if (!dovndcluster) 421 nra = 0; 422 #endif 423 424 if ((off = bn % bsize) != 0) 425 sz = bsize - off; 426 else 427 sz = (1 + nra) * bsize; 428 if (resid < sz) 429 sz = resid; 430 #ifdef DEBUG 431 if (vnddebug & VDB_IO) 432 printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n", 433 vnd->sc_vp, vp, bn, nbn, sz); 434 #endif 435 436 nbp = getvndbuf(); 437 nbp->vb_buf.b_flags = flags; 438 nbp->vb_buf.b_bcount = sz; 439 nbp->vb_buf.b_bufsize = bp->b_bufsize; 440 nbp->vb_buf.b_error = 0; 441 if (vp->v_type == VBLK || vp->v_type == VCHR) 442 nbp->vb_buf.b_dev = vp->v_rdev; 443 else 444 nbp->vb_buf.b_dev = NODEV; 445 nbp->vb_buf.b_data = addr; 446 nbp->vb_buf.b_blkno = nbn + btodb(off); 447 nbp->vb_buf.b_proc = bp->b_proc; 448 nbp->vb_buf.b_iodone = vndiodone; 449 nbp->vb_buf.b_vp = vp; 450 nbp->vb_buf.b_rcred = vnd->sc_cred; /* XXX crdup? */ 451 nbp->vb_buf.b_wcred = vnd->sc_cred; /* XXX crdup? */ 452 if (bp->b_dirtyend == 0) { 453 nbp->vb_buf.b_dirtyoff = 0; 454 nbp->vb_buf.b_dirtyend = sz; 455 } else { 456 nbp->vb_buf.b_dirtyoff = 457 max(0, bp->b_dirtyoff - (bp->b_bcount - resid)); 458 nbp->vb_buf.b_dirtyend = 459 min(sz, 460 max(0, bp->b_dirtyend - (bp->b_bcount-resid))); 461 } 462 if (bp->b_validend == 0) { 463 nbp->vb_buf.b_validoff = 0; 464 nbp->vb_buf.b_validend = sz; 465 } else { 466 nbp->vb_buf.b_validoff = 467 max(0, bp->b_validoff - (bp->b_bcount - resid)); 468 nbp->vb_buf.b_validend = 469 min(sz, 470 max(0, bp->b_validend - (bp->b_bcount-resid))); 471 } 472 473 nbp->vb_xfer = vnx; 474 475 /* 476 * Just sort by block number 477 */ 478 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 479 s = splbio(); 480 vnx->vx_pending++; 481 disksort(&vnd->sc_tab, &nbp->vb_buf); 482 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 483 vnd->sc_tab.b_active++; 484 vndstart(vnd); 485 } 486 splx(s); 487 bn += sz; 488 addr += sz; 489 } 490 return; 491 492 done: 493 biodone(bp); 494 } 495 496 /* 497 * Feed requests sequentially. 498 * We do it this way to keep from flooding NFS servers if we are connected 499 * to an NFS file. This places the burden on the client rather than the 500 * server. 501 */ 502 void 503 vndstart(vnd) 504 register struct vnd_softc *vnd; 505 { 506 register struct buf *bp; 507 508 /* 509 * Dequeue now since lower level strategy routine might 510 * queue using same links 511 */ 512 bp = vnd->sc_tab.b_actf; 513 vnd->sc_tab.b_actf = bp->b_actf; 514 #ifdef DEBUG 515 if (vnddebug & VDB_IO) 516 printf("vndstart(%ld): bp %p vp %p blkno %x addr %p cnt %lx\n", 517 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 518 bp->b_data, bp->b_bcount); 519 #endif 520 521 /* Instrumentation. */ 522 disk_busy(&vnd->sc_dkdev); 523 524 if ((bp->b_flags & B_READ) == 0) 525 bp->b_vp->v_numoutput++; 526 VOP_STRATEGY(bp); 527 } 528 529 void 530 vndiodone(bp) 531 struct buf *bp; 532 { 533 register struct vndbuf *vbp = (struct vndbuf *) bp; 534 register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 535 register struct buf *pbp = vnx->vx_bp; 536 register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 537 int s, resid; 538 539 s = splbio(); 540 #ifdef DEBUG 541 if (vnddebug & VDB_IO) 542 printf("vndiodone(%ld): vbp %p vp %p blkno %x addr %p cnt %lx\n", 543 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 544 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 545 vbp->vb_buf.b_bcount); 546 #endif 547 548 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 549 pbp->b_resid -= resid; 550 disk_unbusy(&vnd->sc_dkdev, resid); 551 vnx->vx_pending--; 552 553 if (vbp->vb_buf.b_error) { 554 #ifdef DEBUG 555 if (vnddebug & VDB_IO) 556 printf("vndiodone: vbp %p error %d\n", vbp, 557 vbp->vb_buf.b_error); 558 #endif 559 vnx->vx_error = vbp->vb_buf.b_error; 560 } 561 putvndbuf(vbp); 562 563 /* 564 * Wrap up this transaction if it has run to completion or, in 565 * case of an error, when all auxiliary buffers have returned. 566 */ 567 if (pbp->b_resid == 0 || (vnx->vx_error && vnx->vx_pending == 0)) { 568 569 if (vnx->vx_error != 0) { 570 pbp->b_flags |= B_ERROR; 571 pbp->b_error = vnx->vx_error; 572 } 573 putvndxfer(vnx); 574 #ifdef DEBUG 575 if (vnddebug & VDB_IO) 576 printf("vndiodone: pbp %p iodone\n", pbp); 577 #endif 578 biodone(pbp); 579 } 580 581 if (vnd->sc_tab.b_actf) 582 vndstart(vnd); 583 else 584 vnd->sc_tab.b_active--; 585 splx(s); 586 } 587 588 /* ARGSUSED */ 589 int 590 vndread(dev, uio, flags) 591 dev_t dev; 592 struct uio *uio; 593 int flags; 594 { 595 int unit = vndunit(dev); 596 struct vnd_softc *sc; 597 598 #ifdef DEBUG 599 if (vnddebug & VDB_FOLLOW) 600 printf("vndread(%x, %p)\n", dev, uio); 601 #endif 602 603 if (unit >= numvnd) 604 return (ENXIO); 605 sc = &vnd_softc[unit]; 606 607 if ((sc->sc_flags & VNF_INITED) == 0) 608 return (ENXIO); 609 610 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 611 } 612 613 /* ARGSUSED */ 614 int 615 vndwrite(dev, uio, flags) 616 dev_t dev; 617 struct uio *uio; 618 int flags; 619 { 620 int unit = vndunit(dev); 621 struct vnd_softc *sc; 622 623 #ifdef DEBUG 624 if (vnddebug & VDB_FOLLOW) 625 printf("vndwrite(%x, %p)\n", dev, uio); 626 #endif 627 628 if (unit >= numvnd) 629 return (ENXIO); 630 sc = &vnd_softc[unit]; 631 632 if ((sc->sc_flags & VNF_INITED) == 0) 633 return (ENXIO); 634 635 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 636 } 637 638 /* ARGSUSED */ 639 int 640 vndioctl(dev, cmd, data, flag, p) 641 dev_t dev; 642 u_long cmd; 643 caddr_t data; 644 int flag; 645 struct proc *p; 646 { 647 int unit = vndunit(dev); 648 register struct vnd_softc *vnd; 649 struct vnd_ioctl *vio; 650 struct vattr vattr; 651 struct nameidata nd; 652 int error, part, pmask; 653 size_t geomsize; 654 655 #ifdef DEBUG 656 if (vnddebug & VDB_FOLLOW) 657 printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 658 dev, cmd, data, flag, p, unit); 659 #endif 660 error = suser(p->p_ucred, &p->p_acflag); 661 if (error) 662 return (error); 663 if (unit >= numvnd) 664 return (ENXIO); 665 666 vnd = &vnd_softc[unit]; 667 vio = (struct vnd_ioctl *)data; 668 669 /* Must be open for writes for these commands... */ 670 switch (cmd) { 671 case VNDIOCSET: 672 case VNDIOCCLR: 673 case DIOCSDINFO: 674 case DIOCWDINFO: 675 case DIOCWLABEL: 676 if ((flag & FWRITE) == 0) 677 return (EBADF); 678 } 679 680 /* Must be initialized for these... */ 681 switch (cmd) { 682 case VNDIOCCLR: 683 case DIOCGDINFO: 684 case DIOCSDINFO: 685 case DIOCWDINFO: 686 case DIOCGPART: 687 case DIOCWLABEL: 688 if ((vnd->sc_flags & VNF_INITED) == 0) 689 return (ENXIO); 690 } 691 692 switch (cmd) { 693 case VNDIOCSET: 694 if (vnd->sc_flags & VNF_INITED) 695 return (EBUSY); 696 697 if ((error = vndlock(vnd)) != 0) 698 return (error); 699 700 /* 701 * Always open for read and write. 702 * This is probably bogus, but it lets vn_open() 703 * weed out directories, sockets, etc. so we don't 704 * have to worry about them. 705 */ 706 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 707 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 708 vndunlock(vnd); 709 return(error); 710 } 711 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 712 if (error) { 713 VOP_UNLOCK(nd.ni_vp); 714 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 715 vndunlock(vnd); 716 return(error); 717 } 718 VOP_UNLOCK(nd.ni_vp); 719 vnd->sc_vp = nd.ni_vp; 720 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 721 722 /* 723 * Use pseudo-geometry specified. If none was provided, 724 * use "standard" Adaptec fictitious geometry. 725 */ 726 if (vio->vnd_flags & VNDIOF_HASGEOM) { 727 728 bcopy(&vio->vnd_geom, &vnd->sc_geom, 729 sizeof(vio->vnd_geom)); 730 731 /* 732 * Sanity-check the sector size. 733 * XXX Don't allow secsize < DEV_BSIZE. Should 734 * XXX we? 735 */ 736 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 737 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 738 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 739 p->p_ucred, p); 740 vndunlock(vnd); 741 return (EINVAL); 742 } 743 744 /* 745 * Compute the size (in DEV_BSIZE blocks) specified 746 * by the geometry. 747 */ 748 geomsize = (vnd->sc_geom.vng_nsectors * 749 vnd->sc_geom.vng_ntracks * 750 vnd->sc_geom.vng_ncylinders) * 751 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 752 753 /* 754 * Sanity-check the size against the specified 755 * geometry. 756 */ 757 if (vnd->sc_size < geomsize) { 758 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 759 p->p_ucred, p); 760 vndunlock(vnd); 761 return (EINVAL); 762 } 763 } else { 764 /* 765 * Size must be at least 2048 DEV_BSIZE blocks 766 * (1M) in order to use this geometry. 767 */ 768 if (vnd->sc_size < (32 * 64)) 769 return (EINVAL); 770 771 vnd->sc_geom.vng_secsize = DEV_BSIZE; 772 vnd->sc_geom.vng_nsectors = 32; 773 vnd->sc_geom.vng_ntracks = 64; 774 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 775 776 /* 777 * Compute the actual size allowed by this geometry. 778 */ 779 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 780 } 781 782 /* 783 * Truncate the size to that specified by 784 * the geometry. 785 * XXX Should we even bother with this? 786 */ 787 vnd->sc_size = geomsize; 788 789 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 790 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 791 vndunlock(vnd); 792 return(error); 793 } 794 vndthrottle(vnd, vnd->sc_vp); 795 vio->vnd_size = dbtob(vnd->sc_size); 796 vnd->sc_flags |= VNF_INITED; 797 #ifdef DEBUG 798 if (vnddebug & VDB_INIT) 799 printf("vndioctl: SET vp %p size %lx %d/%d/%d/%d\n", 800 vnd->sc_vp, (unsigned long) vnd->sc_size, 801 vnd->sc_geom.vng_secsize, 802 vnd->sc_geom.vng_nsectors, 803 vnd->sc_geom.vng_ntracks, 804 vnd->sc_geom.vng_ncylinders); 805 #endif 806 807 /* Attach the disk. */ 808 bzero(vnd->sc_xname, sizeof(vnd->sc_xname)); /* XXX */ 809 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 810 vnd->sc_dkdev.dk_name = vnd->sc_xname; 811 disk_attach(&vnd->sc_dkdev); 812 813 /* Try and read the disklabel. */ 814 vndgetdisklabel(dev); 815 816 vndunlock(vnd); 817 818 break; 819 820 case VNDIOCCLR: 821 if ((error = vndlock(vnd)) != 0) 822 return (error); 823 824 /* 825 * Don't unconfigure if any other partitions are open 826 * or if both the character and block flavors of this 827 * partition are open. 828 */ 829 part = DISKPART(dev); 830 pmask = (1 << part); 831 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 832 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 833 (vnd->sc_dkdev.dk_copenmask & pmask))) { 834 vndunlock(vnd); 835 return (EBUSY); 836 } 837 838 vndclear(vnd); 839 #ifdef DEBUG 840 if (vnddebug & VDB_INIT) 841 printf("vndioctl: CLRed\n"); 842 #endif 843 844 /* Detatch the disk. */ 845 disk_detach(&vnd->sc_dkdev); 846 847 vndunlock(vnd); 848 849 break; 850 851 case DIOCGDINFO: 852 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 853 break; 854 855 case DIOCGPART: 856 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 857 ((struct partinfo *)data)->part = 858 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 859 break; 860 861 case DIOCWDINFO: 862 case DIOCSDINFO: 863 if ((error = vndlock(vnd)) != 0) 864 return (error); 865 866 vnd->sc_flags |= VNF_LABELLING; 867 868 error = setdisklabel(vnd->sc_dkdev.dk_label, 869 (struct disklabel *)data, 0, vnd->sc_dkdev.dk_cpulabel); 870 if (error == 0) { 871 if (cmd == DIOCWDINFO) 872 error = writedisklabel(VNDLABELDEV(dev), 873 vndstrategy, vnd->sc_dkdev.dk_label, 874 vnd->sc_dkdev.dk_cpulabel); 875 } 876 877 vnd->sc_flags &= ~VNF_LABELLING; 878 879 vndunlock(vnd); 880 881 if (error) 882 return (error); 883 break; 884 885 case DIOCWLABEL: 886 if (*(int *)data != 0) 887 vnd->sc_flags |= VNF_WLABEL; 888 else 889 vnd->sc_flags &= ~VNF_WLABEL; 890 break; 891 892 default: 893 return (ENOTTY); 894 } 895 896 return (0); 897 } 898 899 /* 900 * Duplicate the current processes' credentials. Since we are called only 901 * as the result of a SET ioctl and only root can do that, any future access 902 * to this "disk" is essentially as root. Note that credentials may change 903 * if some other uid can write directly to the mapped file (NFS). 904 */ 905 int 906 vndsetcred(vnd, cred) 907 register struct vnd_softc *vnd; 908 struct ucred *cred; 909 { 910 struct uio auio; 911 struct iovec aiov; 912 char *tmpbuf; 913 int error; 914 915 vnd->sc_cred = crdup(cred); 916 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 917 918 /* XXX: Horrible kludge to establish credentials for NFS */ 919 aiov.iov_base = tmpbuf; 920 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 921 auio.uio_iov = &aiov; 922 auio.uio_iovcnt = 1; 923 auio.uio_offset = 0; 924 auio.uio_rw = UIO_READ; 925 auio.uio_segflg = UIO_SYSSPACE; 926 auio.uio_resid = aiov.iov_len; 927 VOP_LOCK(vnd->sc_vp); 928 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 929 VOP_UNLOCK(vnd->sc_vp); 930 931 free(tmpbuf, M_TEMP); 932 return (error); 933 } 934 935 /* 936 * Set maxactive based on FS type 937 */ 938 void 939 vndthrottle(vnd, vp) 940 register struct vnd_softc *vnd; 941 struct vnode *vp; 942 { 943 #ifdef NFS 944 extern int (**nfsv2_vnodeop_p) __P((void *)); 945 946 if (vp->v_op == nfsv2_vnodeop_p) 947 vnd->sc_maxactive = 2; 948 else 949 #endif 950 vnd->sc_maxactive = 8; 951 952 if (vnd->sc_maxactive < 1) 953 vnd->sc_maxactive = 1; 954 } 955 956 void 957 vndshutdown() 958 { 959 register struct vnd_softc *vnd; 960 961 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 962 if (vnd->sc_flags & VNF_INITED) 963 vndclear(vnd); 964 } 965 966 void 967 vndclear(vnd) 968 register struct vnd_softc *vnd; 969 { 970 register struct vnode *vp = vnd->sc_vp; 971 struct proc *p = curproc; /* XXX */ 972 973 #ifdef DEBUG 974 if (vnddebug & VDB_FOLLOW) 975 printf("vndclear(%p): vp %p\n", vnd, vp); 976 #endif 977 vnd->sc_flags &= ~VNF_INITED; 978 if (vp == (struct vnode *)0) 979 panic("vndioctl: null vp"); 980 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 981 crfree(vnd->sc_cred); 982 vnd->sc_vp = (struct vnode *)0; 983 vnd->sc_cred = (struct ucred *)0; 984 vnd->sc_size = 0; 985 } 986 987 int 988 vndsize(dev) 989 dev_t dev; 990 { 991 struct vnd_softc *sc; 992 struct disklabel *lp; 993 int part, unit, omask; 994 int size; 995 996 unit = vndunit(dev); 997 if (unit >= numvnd) 998 return (-1); 999 sc = &vnd_softc[unit]; 1000 1001 if ((sc->sc_flags & VNF_INITED) == 0) 1002 return (-1); 1003 1004 part = DISKPART(dev); 1005 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1006 lp = sc->sc_dkdev.dk_label; 1007 1008 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1009 return (-1); 1010 1011 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1012 size = -1; 1013 else 1014 size = lp->d_partitions[part].p_size * 1015 (lp->d_secsize / DEV_BSIZE); 1016 1017 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1018 return (-1); 1019 1020 return (size); 1021 } 1022 1023 int 1024 vnddump(dev, blkno, va, size) 1025 dev_t dev; 1026 daddr_t blkno; 1027 caddr_t va; 1028 size_t size; 1029 { 1030 1031 /* Not implemented. */ 1032 return ENXIO; 1033 } 1034 1035 /* 1036 * Read the disklabel from a vnd. If one is not present, create a fake one. 1037 */ 1038 void 1039 vndgetdisklabel(dev) 1040 dev_t dev; 1041 { 1042 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1043 char *errstring; 1044 struct disklabel *lp = sc->sc_dkdev.dk_label; 1045 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1046 struct vndgeom *vng = &sc->sc_geom; 1047 struct partition *pp; 1048 int i; 1049 1050 bzero(lp, sizeof(*lp)); 1051 bzero(clp, sizeof(*clp)); 1052 1053 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1054 lp->d_secsize = vng->vng_secsize; 1055 lp->d_nsectors = vng->vng_nsectors; 1056 lp->d_ntracks = vng->vng_ntracks; 1057 lp->d_ncylinders = vng->vng_ncylinders; 1058 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1059 1060 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1061 lp->d_type = DTYPE_VND; 1062 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1063 lp->d_rpm = 3600; 1064 lp->d_interleave = 1; 1065 lp->d_flags = 0; 1066 1067 pp = &lp->d_partitions[RAW_PART]; 1068 pp->p_offset = 0; 1069 pp->p_size = lp->d_secperunit; 1070 pp->p_fstype = FS_UNUSED; 1071 lp->d_npartitions = RAW_PART + 1; 1072 1073 lp->d_magic = DISKMAGIC; 1074 lp->d_magic2 = DISKMAGIC; 1075 lp->d_checksum = dkcksum(lp); 1076 1077 /* 1078 * Call the generic disklabel extraction routine. 1079 */ 1080 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1081 if (errstring) { 1082 /* 1083 * Lack of disklabel is common, but we print the warning 1084 * anyway, since it might contain other useful information. 1085 */ 1086 printf("%s: %s\n", sc->sc_xname, errstring); 1087 1088 /* 1089 * For historical reasons, if there's no disklabel 1090 * present, all partitions must be FS_BSDFFS and 1091 * occupy the entire disk. 1092 */ 1093 for (i = 0; i < MAXPARTITIONS; i++) { 1094 lp->d_partitions[i].p_size = lp->d_secperunit; 1095 lp->d_partitions[i].p_offset = 0; 1096 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1097 } 1098 1099 strncpy(lp->d_packname, "default label", 1100 sizeof(lp->d_packname)); 1101 } 1102 } 1103 1104 /* 1105 * Wait interruptibly for an exclusive lock. 1106 * 1107 * XXX 1108 * Several drivers do this; it should be abstracted and made MP-safe. 1109 */ 1110 static int 1111 vndlock(sc) 1112 struct vnd_softc *sc; 1113 { 1114 int error; 1115 1116 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1117 sc->sc_flags |= VNF_WANTED; 1118 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1119 return (error); 1120 } 1121 sc->sc_flags |= VNF_LOCKED; 1122 return (0); 1123 } 1124 1125 /* 1126 * Unlock and wake up any waiters. 1127 */ 1128 static void 1129 vndunlock(sc) 1130 struct vnd_softc *sc; 1131 { 1132 1133 sc->sc_flags &= ~VNF_LOCKED; 1134 if ((sc->sc_flags & VNF_WANTED) != 0) { 1135 sc->sc_flags &= ~VNF_WANTED; 1136 wakeup(sc); 1137 } 1138 } 1139