1 /* $NetBSD: vnd.c,v 1.77 2002/01/13 19:28:08 tsutsui Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.77 2002/01/13 19:28:08 tsutsui Exp $"); 102 103 #if defined(_KERNEL_OPT) 104 #include "fs_nfs.h" 105 #endif 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/namei.h> 110 #include <sys/proc.h> 111 #include <sys/errno.h> 112 #include <sys/buf.h> 113 #include <sys/malloc.h> 114 #include <sys/ioctl.h> 115 #include <sys/disklabel.h> 116 #include <sys/device.h> 117 #include <sys/disk.h> 118 #include <sys/stat.h> 119 #include <sys/mount.h> 120 #include <sys/vnode.h> 121 #include <sys/file.h> 122 #include <sys/uio.h> 123 #include <sys/conf.h> 124 125 #include <miscfs/specfs/specdev.h> 126 127 #include <dev/vndvar.h> 128 129 #if defined(VNDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 int dovndcluster = 1; 135 #define VDB_FOLLOW 0x01 136 #define VDB_INIT 0x02 137 #define VDB_IO 0x04 138 #define VDB_LABEL 0x08 139 int vnddebug = 0x00; 140 #endif 141 142 #define vndunit(x) DISKUNIT(x) 143 144 struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149 #define VX_BUSY 1 150 }; 151 152 struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155 }; 156 157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163 struct vnd_softc *vnd_softc; 164 int numvnd = 0; 165 166 #define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169 /* called by main() at boot time */ 170 void vndattach __P((int)); 171 void vnddetach __P((void)); 172 173 void vndclear __P((struct vnd_softc *)); 174 void vndstart __P((struct vnd_softc *)); 175 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 176 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 177 void vndiodone __P((struct buf *)); 178 void vndshutdown __P((void)); 179 180 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 181 void vndgetdisklabel __P((dev_t)); 182 183 static int vndlock __P((struct vnd_softc *)); 184 static void vndunlock __P((struct vnd_softc *)); 185 186 void 187 vndattach(num) 188 int num; 189 { 190 int i; 191 char *mem; 192 193 if (num <= 0) 194 return; 195 i = num * sizeof(struct vnd_softc); 196 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 197 if (mem == NULL) { 198 printf("WARNING: no memory for vnode disks\n"); 199 return; 200 } 201 vnd_softc = (struct vnd_softc *)mem; 202 numvnd = num; 203 204 for (i = 0; i < numvnd; i++) 205 BUFQ_INIT(&vnd_softc[i].sc_tab); 206 } 207 208 void 209 vnddetach() 210 { 211 212 free(vnd_softc, M_DEVBUF); 213 } 214 215 int 216 vndopen(dev, flags, mode, p) 217 dev_t dev; 218 int flags, mode; 219 struct proc *p; 220 { 221 int unit = vndunit(dev); 222 struct vnd_softc *sc; 223 int error = 0, part, pmask; 224 struct disklabel *lp; 225 226 #ifdef DEBUG 227 if (vnddebug & VDB_FOLLOW) 228 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 229 #endif 230 if (unit >= numvnd) 231 return (ENXIO); 232 sc = &vnd_softc[unit]; 233 234 if ((error = vndlock(sc)) != 0) 235 return (error); 236 237 lp = sc->sc_dkdev.dk_label; 238 239 part = DISKPART(dev); 240 pmask = (1 << part); 241 242 /* 243 * If we're initialized, check to see if there are any other 244 * open partitions. If not, then it's safe to update the 245 * in-core disklabel. 246 */ 247 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 248 vndgetdisklabel(dev); 249 250 /* Check that the partitions exists. */ 251 if (part != RAW_PART) { 252 if (((sc->sc_flags & VNF_INITED) == 0) || 253 ((part >= lp->d_npartitions) || 254 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 255 error = ENXIO; 256 goto done; 257 } 258 } 259 260 /* Prevent our unit from being unconfigured while open. */ 261 switch (mode) { 262 case S_IFCHR: 263 sc->sc_dkdev.dk_copenmask |= pmask; 264 break; 265 266 case S_IFBLK: 267 sc->sc_dkdev.dk_bopenmask |= pmask; 268 break; 269 } 270 sc->sc_dkdev.dk_openmask = 271 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 272 273 done: 274 vndunlock(sc); 275 return (error); 276 } 277 278 int 279 vndclose(dev, flags, mode, p) 280 dev_t dev; 281 int flags, mode; 282 struct proc *p; 283 { 284 int unit = vndunit(dev); 285 struct vnd_softc *sc; 286 int error = 0, part; 287 288 #ifdef DEBUG 289 if (vnddebug & VDB_FOLLOW) 290 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 291 #endif 292 293 if (unit >= numvnd) 294 return (ENXIO); 295 sc = &vnd_softc[unit]; 296 297 if ((error = vndlock(sc)) != 0) 298 return (error); 299 300 part = DISKPART(dev); 301 302 /* ...that much closer to allowing unconfiguration... */ 303 switch (mode) { 304 case S_IFCHR: 305 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 306 break; 307 308 case S_IFBLK: 309 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 310 break; 311 } 312 sc->sc_dkdev.dk_openmask = 313 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 314 315 vndunlock(sc); 316 return (0); 317 } 318 319 /* 320 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 321 */ 322 void 323 vndstrategy(bp) 324 struct buf *bp; 325 { 326 int unit = vndunit(bp->b_dev); 327 struct vnd_softc *vnd = &vnd_softc[unit]; 328 struct vndxfer *vnx; 329 int s, bsize, resid; 330 off_t bn; 331 caddr_t addr; 332 int sz, flags, error, wlabel; 333 struct disklabel *lp; 334 struct partition *pp; 335 336 #ifdef DEBUG 337 if (vnddebug & VDB_FOLLOW) 338 printf("vndstrategy(%p): unit %d\n", bp, unit); 339 #endif 340 if ((vnd->sc_flags & VNF_INITED) == 0) { 341 bp->b_error = ENXIO; 342 bp->b_flags |= B_ERROR; 343 goto done; 344 } 345 346 /* If it's a nil transfer, wake up the top half now. */ 347 if (bp->b_bcount == 0) 348 goto done; 349 350 lp = vnd->sc_dkdev.dk_label; 351 352 /* 353 * The transfer must be a whole number of blocks. 354 */ 355 if ((bp->b_bcount % lp->d_secsize) != 0) { 356 bp->b_error = EINVAL; 357 bp->b_flags |= B_ERROR; 358 goto done; 359 } 360 361 /* 362 * Do bounds checking and adjust transfer. If there's an error, 363 * the bounds check will flag that for us. 364 */ 365 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 366 if (DISKPART(bp->b_dev) != RAW_PART) 367 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 368 goto done; 369 370 bp->b_resid = bp->b_bcount; 371 372 /* 373 * Put the block number in terms of the logical blocksize 374 * of the "device". 375 */ 376 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 377 378 /* 379 * Translate the partition-relative block number to an absolute. 380 */ 381 if (DISKPART(bp->b_dev) != RAW_PART) { 382 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 383 bn += pp->p_offset; 384 } 385 386 /* ...and convert to a byte offset within the file. */ 387 bn *= lp->d_secsize; 388 389 if (vnd->sc_vp->v_mount == NULL) { 390 bp->b_error = ENXIO; 391 bp->b_flags |= B_ERROR; 392 goto done; 393 } 394 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 395 addr = bp->b_data; 396 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 397 398 /* Allocate a header for this transfer and link it to the buffer */ 399 s = splbio(); 400 vnx = VND_GETXFER(vnd); 401 splx(s); 402 vnx->vx_flags = VX_BUSY; 403 vnx->vx_error = 0; 404 vnx->vx_pending = 0; 405 vnx->vx_bp = bp; 406 407 for (resid = bp->b_resid; resid; resid -= sz) { 408 struct vndbuf *nbp; 409 struct vnode *vp; 410 daddr_t nbn; 411 int off, nra; 412 413 nra = 0; 414 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 415 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 416 VOP_UNLOCK(vnd->sc_vp, 0); 417 418 if (error == 0 && (long)nbn == -1) 419 error = EIO; 420 421 /* 422 * If there was an error or a hole in the file...punt. 423 * Note that we may have to wait for any operations 424 * that we have already fired off before releasing 425 * the buffer. 426 * 427 * XXX we could deal with holes here but it would be 428 * a hassle (in the write case). 429 */ 430 if (error) { 431 s = splbio(); 432 vnx->vx_error = error; 433 goto out; 434 } 435 436 #ifdef DEBUG 437 if (!dovndcluster) 438 nra = 0; 439 #endif 440 441 if ((off = bn % bsize) != 0) 442 sz = bsize - off; 443 else 444 sz = (1 + nra) * bsize; 445 if (resid < sz) 446 sz = resid; 447 #ifdef DEBUG 448 if (vnddebug & VDB_IO) 449 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 450 vnd->sc_vp, vp, (long long)bn, nbn, sz); 451 #endif 452 453 s = splbio(); 454 nbp = VND_GETBUF(vnd); 455 splx(s); 456 nbp->vb_buf.b_flags = flags; 457 nbp->vb_buf.b_bcount = sz; 458 nbp->vb_buf.b_bufsize = bp->b_bufsize; 459 nbp->vb_buf.b_error = 0; 460 nbp->vb_buf.b_data = addr; 461 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 462 nbp->vb_buf.b_proc = bp->b_proc; 463 nbp->vb_buf.b_iodone = vndiodone; 464 nbp->vb_buf.b_vp = NULLVP; 465 LIST_INIT(&nbp->vb_buf.b_dep); 466 467 nbp->vb_xfer = vnx; 468 469 /* 470 * Just sort by block number 471 */ 472 s = splbio(); 473 if (vnx->vx_error != 0) { 474 VND_PUTBUF(vnd, nbp); 475 goto out; 476 } 477 vnx->vx_pending++; 478 bgetvp(vp, &nbp->vb_buf); 479 disksort_blkno(&vnd->sc_tab, &nbp->vb_buf); 480 vndstart(vnd); 481 splx(s); 482 bn += sz; 483 addr += sz; 484 } 485 486 s = splbio(); 487 488 out: /* Arrive here at splbio */ 489 vnx->vx_flags &= ~VX_BUSY; 490 if (vnx->vx_pending == 0) { 491 if (vnx->vx_error != 0) { 492 bp->b_error = vnx->vx_error; 493 bp->b_flags |= B_ERROR; 494 } 495 VND_PUTXFER(vnd, vnx); 496 biodone(bp); 497 } 498 splx(s); 499 return; 500 501 done: 502 biodone(bp); 503 } 504 505 /* 506 * Feed requests sequentially. 507 * We do it this way to keep from flooding NFS servers if we are connected 508 * to an NFS file. This places the burden on the client rather than the 509 * server. 510 */ 511 void 512 vndstart(vnd) 513 struct vnd_softc *vnd; 514 { 515 struct buf *bp; 516 517 /* 518 * Dequeue now since lower level strategy routine might 519 * queue using same links 520 */ 521 522 if ((vnd->sc_flags & VNF_BUSY) != 0) 523 return; 524 525 vnd->sc_flags |= VNF_BUSY; 526 527 while (vnd->sc_active < vnd->sc_maxactive) { 528 bp = BUFQ_FIRST(&vnd->sc_tab); 529 if (bp == NULL) 530 break; 531 BUFQ_REMOVE(&vnd->sc_tab, bp); 532 vnd->sc_active++; 533 #ifdef DEBUG 534 if (vnddebug & VDB_IO) 535 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 536 " flags %lx addr %p cnt 0x%lx\n", 537 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 538 bp->b_flags, bp->b_data, bp->b_bcount); 539 #endif 540 541 /* Instrumentation. */ 542 disk_busy(&vnd->sc_dkdev); 543 544 if ((bp->b_flags & B_READ) == 0) 545 bp->b_vp->v_numoutput++; 546 VOP_STRATEGY(bp); 547 } 548 vnd->sc_flags &= ~VNF_BUSY; 549 } 550 551 void 552 vndiodone(bp) 553 struct buf *bp; 554 { 555 struct vndbuf *vbp = (struct vndbuf *) bp; 556 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 557 struct buf *pbp = vnx->vx_bp; 558 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 559 int s, resid; 560 561 s = splbio(); 562 #ifdef DEBUG 563 if (vnddebug & VDB_IO) 564 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 565 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 566 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 567 vbp->vb_buf.b_bcount); 568 #endif 569 570 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 571 pbp->b_resid -= resid; 572 disk_unbusy(&vnd->sc_dkdev, resid); 573 vnx->vx_pending--; 574 575 if (vbp->vb_buf.b_error) { 576 #ifdef DEBUG 577 if (vnddebug & VDB_IO) 578 printf("vndiodone: vbp %p error %d\n", vbp, 579 vbp->vb_buf.b_error); 580 #endif 581 vnx->vx_error = vbp->vb_buf.b_error; 582 } 583 584 if (vbp->vb_buf.b_vp != NULLVP) 585 brelvp(&vbp->vb_buf); 586 587 VND_PUTBUF(vnd, vbp); 588 589 /* 590 * Wrap up this transaction if it has run to completion or, in 591 * case of an error, when all auxiliary buffers have returned. 592 */ 593 if (vnx->vx_error != 0) { 594 pbp->b_flags |= B_ERROR; 595 pbp->b_error = vnx->vx_error; 596 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 597 598 #ifdef DEBUG 599 if (vnddebug & VDB_IO) 600 printf("vndiodone: pbp %p iodone: error %d\n", 601 pbp, vnx->vx_error); 602 #endif 603 VND_PUTXFER(vnd, vnx); 604 biodone(pbp); 605 } 606 } else if (pbp->b_resid == 0) { 607 608 #ifdef DIAGNOSTIC 609 if (vnx->vx_pending != 0) 610 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 611 #endif 612 613 if ((vnx->vx_flags & VX_BUSY) == 0) { 614 #ifdef DEBUG 615 if (vnddebug & VDB_IO) 616 printf("vndiodone: pbp %p iodone\n", pbp); 617 #endif 618 VND_PUTXFER(vnd, vnx); 619 biodone(pbp); 620 } 621 } 622 623 vnd->sc_active--; 624 vndstart(vnd); 625 splx(s); 626 } 627 628 /* ARGSUSED */ 629 int 630 vndread(dev, uio, flags) 631 dev_t dev; 632 struct uio *uio; 633 int flags; 634 { 635 int unit = vndunit(dev); 636 struct vnd_softc *sc; 637 638 #ifdef DEBUG 639 if (vnddebug & VDB_FOLLOW) 640 printf("vndread(0x%x, %p)\n", dev, uio); 641 #endif 642 643 if (unit >= numvnd) 644 return (ENXIO); 645 sc = &vnd_softc[unit]; 646 647 if ((sc->sc_flags & VNF_INITED) == 0) 648 return (ENXIO); 649 650 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 651 } 652 653 /* ARGSUSED */ 654 int 655 vndwrite(dev, uio, flags) 656 dev_t dev; 657 struct uio *uio; 658 int flags; 659 { 660 int unit = vndunit(dev); 661 struct vnd_softc *sc; 662 663 #ifdef DEBUG 664 if (vnddebug & VDB_FOLLOW) 665 printf("vndwrite(0x%x, %p)\n", dev, uio); 666 #endif 667 668 if (unit >= numvnd) 669 return (ENXIO); 670 sc = &vnd_softc[unit]; 671 672 if ((sc->sc_flags & VNF_INITED) == 0) 673 return (ENXIO); 674 675 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 676 } 677 678 /* ARGSUSED */ 679 int 680 vndioctl(dev, cmd, data, flag, p) 681 dev_t dev; 682 u_long cmd; 683 caddr_t data; 684 int flag; 685 struct proc *p; 686 { 687 int unit = vndunit(dev); 688 struct vnd_softc *vnd; 689 struct vnd_ioctl *vio; 690 struct vattr vattr; 691 struct nameidata nd; 692 int error, part, pmask; 693 size_t geomsize; 694 #ifdef __HAVE_OLD_DISKLABEL 695 struct disklabel newlabel; 696 #endif 697 698 #ifdef DEBUG 699 if (vnddebug & VDB_FOLLOW) 700 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 701 dev, cmd, data, flag, p, unit); 702 #endif 703 error = suser(p->p_ucred, &p->p_acflag); 704 if (error) 705 return (error); 706 if (unit >= numvnd) 707 return (ENXIO); 708 709 vnd = &vnd_softc[unit]; 710 vio = (struct vnd_ioctl *)data; 711 712 /* Must be open for writes for these commands... */ 713 switch (cmd) { 714 case VNDIOCSET: 715 case VNDIOCCLR: 716 case DIOCSDINFO: 717 case DIOCWDINFO: 718 #ifdef __HAVE_OLD_DISKLABEL 719 case ODIOCSDINFO: 720 case ODIOCWDINFO: 721 #endif 722 case DIOCWLABEL: 723 if ((flag & FWRITE) == 0) 724 return (EBADF); 725 } 726 727 /* Must be initialized for these... */ 728 switch (cmd) { 729 case VNDIOCCLR: 730 case DIOCGDINFO: 731 case DIOCSDINFO: 732 case DIOCWDINFO: 733 case DIOCGPART: 734 case DIOCWLABEL: 735 case DIOCGDEFLABEL: 736 #ifdef __HAVE_OLD_DISKLABEL 737 case ODIOCGDINFO: 738 case ODIOCSDINFO: 739 case ODIOCWDINFO: 740 case ODIOCGDEFLABEL: 741 #endif 742 if ((vnd->sc_flags & VNF_INITED) == 0) 743 return (ENXIO); 744 } 745 746 switch (cmd) { 747 case VNDIOCSET: 748 if (vnd->sc_flags & VNF_INITED) 749 return (EBUSY); 750 751 if ((error = vndlock(vnd)) != 0) 752 return (error); 753 754 /* 755 * Always open for read and write. 756 * This is probably bogus, but it lets vn_open() 757 * weed out directories, sockets, etc. so we don't 758 * have to worry about them. 759 */ 760 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 761 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 762 vndunlock(vnd); 763 return(error); 764 } 765 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 766 if (error) { 767 VOP_UNLOCK(nd.ni_vp, 0); 768 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 769 vndunlock(vnd); 770 return(error); 771 } 772 VOP_UNLOCK(nd.ni_vp, 0); 773 vnd->sc_vp = nd.ni_vp; 774 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 775 776 /* 777 * Use pseudo-geometry specified. If none was provided, 778 * use "standard" Adaptec fictitious geometry. 779 */ 780 if (vio->vnd_flags & VNDIOF_HASGEOM) { 781 782 memcpy(&vnd->sc_geom, &vio->vnd_geom, 783 sizeof(vio->vnd_geom)); 784 785 /* 786 * Sanity-check the sector size. 787 * XXX Don't allow secsize < DEV_BSIZE. Should 788 * XXX we? 789 */ 790 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 791 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 792 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 793 p->p_ucred, p); 794 vndunlock(vnd); 795 return (EINVAL); 796 } 797 798 /* 799 * Compute the size (in DEV_BSIZE blocks) specified 800 * by the geometry. 801 */ 802 geomsize = (vnd->sc_geom.vng_nsectors * 803 vnd->sc_geom.vng_ntracks * 804 vnd->sc_geom.vng_ncylinders) * 805 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 806 807 /* 808 * Sanity-check the size against the specified 809 * geometry. 810 */ 811 if (vnd->sc_size < geomsize) { 812 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 813 p->p_ucred, p); 814 vndunlock(vnd); 815 return (EINVAL); 816 } 817 } else { 818 /* 819 * Size must be at least 2048 DEV_BSIZE blocks 820 * (1M) in order to use this geometry. 821 */ 822 if (vnd->sc_size < (32 * 64)) { 823 vndunlock(vnd); 824 return (EINVAL); 825 } 826 827 vnd->sc_geom.vng_secsize = DEV_BSIZE; 828 vnd->sc_geom.vng_nsectors = 32; 829 vnd->sc_geom.vng_ntracks = 64; 830 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 831 832 /* 833 * Compute the actual size allowed by this geometry. 834 */ 835 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 836 } 837 838 /* 839 * Truncate the size to that specified by 840 * the geometry. 841 * XXX Should we even bother with this? 842 */ 843 vnd->sc_size = geomsize; 844 845 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 846 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 847 vndunlock(vnd); 848 return(error); 849 } 850 vndthrottle(vnd, vnd->sc_vp); 851 vio->vnd_size = dbtob(vnd->sc_size); 852 vnd->sc_flags |= VNF_INITED; 853 #ifdef DEBUG 854 if (vnddebug & VDB_INIT) 855 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 856 vnd->sc_vp, (unsigned long) vnd->sc_size, 857 vnd->sc_geom.vng_secsize, 858 vnd->sc_geom.vng_nsectors, 859 vnd->sc_geom.vng_ntracks, 860 vnd->sc_geom.vng_ncylinders); 861 #endif 862 863 /* Attach the disk. */ 864 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 865 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 866 vnd->sc_dkdev.dk_name = vnd->sc_xname; 867 disk_attach(&vnd->sc_dkdev); 868 869 /* Initialize the xfer and buffer pools. */ 870 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 871 0, 0, "vndxpl", 0, NULL, NULL, M_DEVBUF); 872 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 873 0, 0, "vndbpl", 0, NULL, NULL, M_DEVBUF); 874 875 /* Try and read the disklabel. */ 876 vndgetdisklabel(dev); 877 878 vndunlock(vnd); 879 880 break; 881 882 case VNDIOCCLR: 883 if ((error = vndlock(vnd)) != 0) 884 return (error); 885 886 /* 887 * Don't unconfigure if any other partitions are open 888 * or if both the character and block flavors of this 889 * partition are open. 890 */ 891 part = DISKPART(dev); 892 pmask = (1 << part); 893 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 894 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 895 (vnd->sc_dkdev.dk_copenmask & pmask))) { 896 vndunlock(vnd); 897 return (EBUSY); 898 } 899 900 vndclear(vnd); 901 #ifdef DEBUG 902 if (vnddebug & VDB_INIT) 903 printf("vndioctl: CLRed\n"); 904 #endif 905 906 /* Destroy the xfer and buffer pools. */ 907 pool_destroy(&vnd->sc_vxpool); 908 pool_destroy(&vnd->sc_vbpool); 909 910 /* Detatch the disk. */ 911 disk_detach(&vnd->sc_dkdev); 912 913 vndunlock(vnd); 914 915 break; 916 917 case DIOCGDINFO: 918 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 919 break; 920 921 #ifdef __HAVE_OLD_DISKLABEL 922 case ODIOCGDINFO: 923 newlabel = *(vnd->sc_dkdev.dk_label); 924 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 925 return ENOTTY; 926 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 927 break; 928 #endif 929 930 case DIOCGPART: 931 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 932 ((struct partinfo *)data)->part = 933 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 934 break; 935 936 case DIOCWDINFO: 937 case DIOCSDINFO: 938 #ifdef __HAVE_OLD_DISKLABEL 939 case ODIOCWDINFO: 940 case ODIOCSDINFO: 941 #endif 942 { 943 struct disklabel *lp; 944 945 if ((error = vndlock(vnd)) != 0) 946 return (error); 947 948 vnd->sc_flags |= VNF_LABELLING; 949 950 #ifdef __HAVE_OLD_DISKLABEL 951 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 952 memset(&newlabel, 0, sizeof newlabel); 953 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 954 lp = &newlabel; 955 } else 956 #endif 957 lp = (struct disklabel *)data; 958 959 error = setdisklabel(vnd->sc_dkdev.dk_label, 960 lp, 0, vnd->sc_dkdev.dk_cpulabel); 961 if (error == 0) { 962 if (cmd == DIOCWDINFO 963 #ifdef __HAVE_OLD_DISKLABEL 964 || cmd == ODIOCWDINFO 965 #endif 966 ) 967 error = writedisklabel(VNDLABELDEV(dev), 968 vndstrategy, vnd->sc_dkdev.dk_label, 969 vnd->sc_dkdev.dk_cpulabel); 970 } 971 972 vnd->sc_flags &= ~VNF_LABELLING; 973 974 vndunlock(vnd); 975 976 if (error) 977 return (error); 978 break; 979 } 980 981 case DIOCWLABEL: 982 if (*(int *)data != 0) 983 vnd->sc_flags |= VNF_WLABEL; 984 else 985 vnd->sc_flags &= ~VNF_WLABEL; 986 break; 987 988 case DIOCGDEFLABEL: 989 vndgetdefaultlabel(vnd, (struct disklabel *)data); 990 break; 991 992 #ifdef __HAVE_OLD_DISKLABEL 993 case ODIOCGDEFLABEL: 994 vndgetdefaultlabel(vnd, &newlabel); 995 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 996 return ENOTTY; 997 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 998 break; 999 #endif 1000 1001 default: 1002 return (ENOTTY); 1003 } 1004 1005 return (0); 1006 } 1007 1008 /* 1009 * Duplicate the current processes' credentials. Since we are called only 1010 * as the result of a SET ioctl and only root can do that, any future access 1011 * to this "disk" is essentially as root. Note that credentials may change 1012 * if some other uid can write directly to the mapped file (NFS). 1013 */ 1014 int 1015 vndsetcred(vnd, cred) 1016 struct vnd_softc *vnd; 1017 struct ucred *cred; 1018 { 1019 struct uio auio; 1020 struct iovec aiov; 1021 char *tmpbuf; 1022 int error; 1023 1024 vnd->sc_cred = crdup(cred); 1025 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1026 1027 /* XXX: Horrible kludge to establish credentials for NFS */ 1028 aiov.iov_base = tmpbuf; 1029 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1030 auio.uio_iov = &aiov; 1031 auio.uio_iovcnt = 1; 1032 auio.uio_offset = 0; 1033 auio.uio_rw = UIO_READ; 1034 auio.uio_segflg = UIO_SYSSPACE; 1035 auio.uio_resid = aiov.iov_len; 1036 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1037 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1038 if (error == 0) { 1039 /* 1040 * Because vnd does all IO directly through the vnode 1041 * we need to flush (at least) the buffer from the above 1042 * VOP_READ from the buffer cache to prevent cache 1043 * incoherencies. Also, be careful to write dirty 1044 * buffers back to stable storage. 1045 */ 1046 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1047 curproc, 0, 0); 1048 } 1049 VOP_UNLOCK(vnd->sc_vp, 0); 1050 1051 free(tmpbuf, M_TEMP); 1052 return (error); 1053 } 1054 1055 /* 1056 * Set maxactive based on FS type 1057 */ 1058 void 1059 vndthrottle(vnd, vp) 1060 struct vnd_softc *vnd; 1061 struct vnode *vp; 1062 { 1063 #ifdef NFS 1064 extern int (**nfsv2_vnodeop_p) __P((void *)); 1065 1066 if (vp->v_op == nfsv2_vnodeop_p) 1067 vnd->sc_maxactive = 2; 1068 else 1069 #endif 1070 vnd->sc_maxactive = 8; 1071 1072 if (vnd->sc_maxactive < 1) 1073 vnd->sc_maxactive = 1; 1074 } 1075 1076 void 1077 vndshutdown() 1078 { 1079 struct vnd_softc *vnd; 1080 1081 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1082 if (vnd->sc_flags & VNF_INITED) 1083 vndclear(vnd); 1084 } 1085 1086 void 1087 vndclear(vnd) 1088 struct vnd_softc *vnd; 1089 { 1090 struct vnode *vp = vnd->sc_vp; 1091 struct proc *p = curproc; /* XXX */ 1092 1093 #ifdef DEBUG 1094 if (vnddebug & VDB_FOLLOW) 1095 printf("vndclear(%p): vp %p\n", vnd, vp); 1096 #endif 1097 vnd->sc_flags &= ~VNF_INITED; 1098 if (vp == (struct vnode *)0) 1099 panic("vndioctl: null vp"); 1100 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1101 crfree(vnd->sc_cred); 1102 vnd->sc_vp = (struct vnode *)0; 1103 vnd->sc_cred = (struct ucred *)0; 1104 vnd->sc_size = 0; 1105 } 1106 1107 int 1108 vndsize(dev) 1109 dev_t dev; 1110 { 1111 struct vnd_softc *sc; 1112 struct disklabel *lp; 1113 int part, unit, omask; 1114 int size; 1115 1116 unit = vndunit(dev); 1117 if (unit >= numvnd) 1118 return (-1); 1119 sc = &vnd_softc[unit]; 1120 1121 if ((sc->sc_flags & VNF_INITED) == 0) 1122 return (-1); 1123 1124 part = DISKPART(dev); 1125 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1126 lp = sc->sc_dkdev.dk_label; 1127 1128 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1129 return (-1); 1130 1131 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1132 size = -1; 1133 else 1134 size = lp->d_partitions[part].p_size * 1135 (lp->d_secsize / DEV_BSIZE); 1136 1137 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1138 return (-1); 1139 1140 return (size); 1141 } 1142 1143 int 1144 vnddump(dev, blkno, va, size) 1145 dev_t dev; 1146 daddr_t blkno; 1147 caddr_t va; 1148 size_t size; 1149 { 1150 1151 /* Not implemented. */ 1152 return ENXIO; 1153 } 1154 1155 void 1156 vndgetdefaultlabel(sc, lp) 1157 struct vnd_softc *sc; 1158 struct disklabel *lp; 1159 { 1160 struct vndgeom *vng = &sc->sc_geom; 1161 struct partition *pp; 1162 1163 memset(lp, 0, sizeof(*lp)); 1164 1165 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1166 lp->d_secsize = vng->vng_secsize; 1167 lp->d_nsectors = vng->vng_nsectors; 1168 lp->d_ntracks = vng->vng_ntracks; 1169 lp->d_ncylinders = vng->vng_ncylinders; 1170 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1171 1172 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1173 lp->d_type = DTYPE_VND; 1174 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1175 lp->d_rpm = 3600; 1176 lp->d_interleave = 1; 1177 lp->d_flags = 0; 1178 1179 pp = &lp->d_partitions[RAW_PART]; 1180 pp->p_offset = 0; 1181 pp->p_size = lp->d_secperunit; 1182 pp->p_fstype = FS_UNUSED; 1183 lp->d_npartitions = RAW_PART + 1; 1184 1185 lp->d_magic = DISKMAGIC; 1186 lp->d_magic2 = DISKMAGIC; 1187 lp->d_checksum = dkcksum(lp); 1188 } 1189 1190 /* 1191 * Read the disklabel from a vnd. If one is not present, create a fake one. 1192 */ 1193 void 1194 vndgetdisklabel(dev) 1195 dev_t dev; 1196 { 1197 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1198 char *errstring; 1199 struct disklabel *lp = sc->sc_dkdev.dk_label; 1200 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1201 int i; 1202 1203 memset(clp, 0, sizeof(*clp)); 1204 1205 vndgetdefaultlabel(sc, lp); 1206 1207 /* 1208 * Call the generic disklabel extraction routine. 1209 */ 1210 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1211 if (errstring) { 1212 /* 1213 * Lack of disklabel is common, but we print the warning 1214 * anyway, since it might contain other useful information. 1215 */ 1216 printf("%s: %s\n", sc->sc_xname, errstring); 1217 1218 /* 1219 * For historical reasons, if there's no disklabel 1220 * present, all partitions must be FS_BSDFFS and 1221 * occupy the entire disk. 1222 */ 1223 for (i = 0; i < MAXPARTITIONS; i++) { 1224 /* 1225 * Don't wipe out port specific hack (such as 1226 * dos partition hack of i386 port). 1227 */ 1228 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1229 continue; 1230 1231 lp->d_partitions[i].p_size = lp->d_secperunit; 1232 lp->d_partitions[i].p_offset = 0; 1233 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1234 } 1235 1236 strncpy(lp->d_packname, "default label", 1237 sizeof(lp->d_packname)); 1238 1239 lp->d_checksum = dkcksum(lp); 1240 } 1241 } 1242 1243 /* 1244 * Wait interruptibly for an exclusive lock. 1245 * 1246 * XXX 1247 * Several drivers do this; it should be abstracted and made MP-safe. 1248 */ 1249 static int 1250 vndlock(sc) 1251 struct vnd_softc *sc; 1252 { 1253 int error; 1254 1255 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1256 sc->sc_flags |= VNF_WANTED; 1257 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1258 return (error); 1259 } 1260 sc->sc_flags |= VNF_LOCKED; 1261 return (0); 1262 } 1263 1264 /* 1265 * Unlock and wake up any waiters. 1266 */ 1267 static void 1268 vndunlock(sc) 1269 struct vnd_softc *sc; 1270 { 1271 1272 sc->sc_flags &= ~VNF_LOCKED; 1273 if ((sc->sc_flags & VNF_WANTED) != 0) { 1274 sc->sc_flags &= ~VNF_WANTED; 1275 wakeup(sc); 1276 } 1277 } 1278