1 /* $NetBSD: vnd.c,v 1.75 2001/11/13 05:32:50 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.75 2001/11/13 05:32:50 lukem Exp $"); 102 103 #if defined(_KERNEL_OPT) 104 #include "fs_nfs.h" 105 #endif 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/namei.h> 110 #include <sys/proc.h> 111 #include <sys/errno.h> 112 #include <sys/buf.h> 113 #include <sys/malloc.h> 114 #include <sys/ioctl.h> 115 #include <sys/disklabel.h> 116 #include <sys/device.h> 117 #include <sys/disk.h> 118 #include <sys/stat.h> 119 #include <sys/mount.h> 120 #include <sys/vnode.h> 121 #include <sys/file.h> 122 #include <sys/uio.h> 123 #include <sys/conf.h> 124 125 #include <miscfs/specfs/specdev.h> 126 127 #include <dev/vndvar.h> 128 129 #if defined(VNDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 int dovndcluster = 1; 135 #define VDB_FOLLOW 0x01 136 #define VDB_INIT 0x02 137 #define VDB_IO 0x04 138 #define VDB_LABEL 0x08 139 int vnddebug = 0x00; 140 #endif 141 142 #define vndunit(x) DISKUNIT(x) 143 144 struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149 #define VX_BUSY 1 150 }; 151 152 struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155 }; 156 157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163 struct vnd_softc *vnd_softc; 164 int numvnd = 0; 165 166 #define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169 /* called by main() at boot time */ 170 void vndattach __P((int)); 171 172 void vndclear __P((struct vnd_softc *)); 173 void vndstart __P((struct vnd_softc *)); 174 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 175 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 176 void vndiodone __P((struct buf *)); 177 void vndshutdown __P((void)); 178 179 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 180 void vndgetdisklabel __P((dev_t)); 181 182 static int vndlock __P((struct vnd_softc *)); 183 static void vndunlock __P((struct vnd_softc *)); 184 185 void 186 vndattach(num) 187 int num; 188 { 189 int i; 190 char *mem; 191 192 if (num <= 0) 193 return; 194 i = num * sizeof(struct vnd_softc); 195 mem = malloc(i, M_DEVBUF, M_NOWAIT); 196 if (mem == NULL) { 197 printf("WARNING: no memory for vnode disks\n"); 198 return; 199 } 200 memset(mem, 0, i); 201 vnd_softc = (struct vnd_softc *)mem; 202 numvnd = num; 203 204 for (i = 0; i < numvnd; i++) 205 BUFQ_INIT(&vnd_softc[i].sc_tab); 206 } 207 208 int 209 vndopen(dev, flags, mode, p) 210 dev_t dev; 211 int flags, mode; 212 struct proc *p; 213 { 214 int unit = vndunit(dev); 215 struct vnd_softc *sc; 216 int error = 0, part, pmask; 217 struct disklabel *lp; 218 219 #ifdef DEBUG 220 if (vnddebug & VDB_FOLLOW) 221 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 222 #endif 223 if (unit >= numvnd) 224 return (ENXIO); 225 sc = &vnd_softc[unit]; 226 227 if ((error = vndlock(sc)) != 0) 228 return (error); 229 230 lp = sc->sc_dkdev.dk_label; 231 232 part = DISKPART(dev); 233 pmask = (1 << part); 234 235 /* 236 * If we're initialized, check to see if there are any other 237 * open partitions. If not, then it's safe to update the 238 * in-core disklabel. 239 */ 240 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 241 vndgetdisklabel(dev); 242 243 /* Check that the partitions exists. */ 244 if (part != RAW_PART) { 245 if (((sc->sc_flags & VNF_INITED) == 0) || 246 ((part >= lp->d_npartitions) || 247 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 248 error = ENXIO; 249 goto done; 250 } 251 } 252 253 /* Prevent our unit from being unconfigured while open. */ 254 switch (mode) { 255 case S_IFCHR: 256 sc->sc_dkdev.dk_copenmask |= pmask; 257 break; 258 259 case S_IFBLK: 260 sc->sc_dkdev.dk_bopenmask |= pmask; 261 break; 262 } 263 sc->sc_dkdev.dk_openmask = 264 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 265 266 done: 267 vndunlock(sc); 268 return (error); 269 } 270 271 int 272 vndclose(dev, flags, mode, p) 273 dev_t dev; 274 int flags, mode; 275 struct proc *p; 276 { 277 int unit = vndunit(dev); 278 struct vnd_softc *sc; 279 int error = 0, part; 280 281 #ifdef DEBUG 282 if (vnddebug & VDB_FOLLOW) 283 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 284 #endif 285 286 if (unit >= numvnd) 287 return (ENXIO); 288 sc = &vnd_softc[unit]; 289 290 if ((error = vndlock(sc)) != 0) 291 return (error); 292 293 part = DISKPART(dev); 294 295 /* ...that much closer to allowing unconfiguration... */ 296 switch (mode) { 297 case S_IFCHR: 298 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 299 break; 300 301 case S_IFBLK: 302 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 303 break; 304 } 305 sc->sc_dkdev.dk_openmask = 306 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 307 308 vndunlock(sc); 309 return (0); 310 } 311 312 /* 313 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 314 */ 315 void 316 vndstrategy(bp) 317 struct buf *bp; 318 { 319 int unit = vndunit(bp->b_dev); 320 struct vnd_softc *vnd = &vnd_softc[unit]; 321 struct vndxfer *vnx; 322 int s, bsize, resid; 323 off_t bn; 324 caddr_t addr; 325 int sz, flags, error, wlabel; 326 struct disklabel *lp; 327 struct partition *pp; 328 329 #ifdef DEBUG 330 if (vnddebug & VDB_FOLLOW) 331 printf("vndstrategy(%p): unit %d\n", bp, unit); 332 #endif 333 if ((vnd->sc_flags & VNF_INITED) == 0) { 334 bp->b_error = ENXIO; 335 bp->b_flags |= B_ERROR; 336 goto done; 337 } 338 339 /* If it's a nil transfer, wake up the top half now. */ 340 if (bp->b_bcount == 0) 341 goto done; 342 343 lp = vnd->sc_dkdev.dk_label; 344 345 /* 346 * The transfer must be a whole number of blocks. 347 */ 348 if ((bp->b_bcount % lp->d_secsize) != 0) { 349 bp->b_error = EINVAL; 350 bp->b_flags |= B_ERROR; 351 goto done; 352 } 353 354 /* 355 * Do bounds checking and adjust transfer. If there's an error, 356 * the bounds check will flag that for us. 357 */ 358 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 359 if (DISKPART(bp->b_dev) != RAW_PART) 360 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 361 goto done; 362 363 bp->b_resid = bp->b_bcount; 364 365 /* 366 * Put the block number in terms of the logical blocksize 367 * of the "device". 368 */ 369 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 370 371 /* 372 * Translate the partition-relative block number to an absolute. 373 */ 374 if (DISKPART(bp->b_dev) != RAW_PART) { 375 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 376 bn += pp->p_offset; 377 } 378 379 /* ...and convert to a byte offset within the file. */ 380 bn *= lp->d_secsize; 381 382 if (vnd->sc_vp->v_mount == NULL) { 383 bp->b_error = ENXIO; 384 bp->b_flags |= B_ERROR; 385 goto done; 386 } 387 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 388 addr = bp->b_data; 389 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 390 391 /* Allocate a header for this transfer and link it to the buffer */ 392 s = splbio(); 393 vnx = VND_GETXFER(vnd); 394 splx(s); 395 vnx->vx_flags = VX_BUSY; 396 vnx->vx_error = 0; 397 vnx->vx_pending = 0; 398 vnx->vx_bp = bp; 399 400 for (resid = bp->b_resid; resid; resid -= sz) { 401 struct vndbuf *nbp; 402 struct vnode *vp; 403 daddr_t nbn; 404 int off, nra; 405 406 nra = 0; 407 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 408 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 409 VOP_UNLOCK(vnd->sc_vp, 0); 410 411 if (error == 0 && (long)nbn == -1) 412 error = EIO; 413 414 /* 415 * If there was an error or a hole in the file...punt. 416 * Note that we may have to wait for any operations 417 * that we have already fired off before releasing 418 * the buffer. 419 * 420 * XXX we could deal with holes here but it would be 421 * a hassle (in the write case). 422 */ 423 if (error) { 424 s = splbio(); 425 vnx->vx_error = error; 426 goto out; 427 } 428 429 #ifdef DEBUG 430 if (!dovndcluster) 431 nra = 0; 432 #endif 433 434 if ((off = bn % bsize) != 0) 435 sz = bsize - off; 436 else 437 sz = (1 + nra) * bsize; 438 if (resid < sz) 439 sz = resid; 440 #ifdef DEBUG 441 if (vnddebug & VDB_IO) 442 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 443 vnd->sc_vp, vp, (long long)bn, nbn, sz); 444 #endif 445 446 s = splbio(); 447 nbp = VND_GETBUF(vnd); 448 splx(s); 449 nbp->vb_buf.b_flags = flags; 450 nbp->vb_buf.b_bcount = sz; 451 nbp->vb_buf.b_bufsize = bp->b_bufsize; 452 nbp->vb_buf.b_error = 0; 453 nbp->vb_buf.b_data = addr; 454 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 455 nbp->vb_buf.b_proc = bp->b_proc; 456 nbp->vb_buf.b_iodone = vndiodone; 457 nbp->vb_buf.b_vp = NULLVP; 458 LIST_INIT(&nbp->vb_buf.b_dep); 459 460 nbp->vb_xfer = vnx; 461 462 /* 463 * Just sort by block number 464 */ 465 s = splbio(); 466 if (vnx->vx_error != 0) { 467 VND_PUTBUF(vnd, nbp); 468 goto out; 469 } 470 vnx->vx_pending++; 471 bgetvp(vp, &nbp->vb_buf); 472 disksort_blkno(&vnd->sc_tab, &nbp->vb_buf); 473 vndstart(vnd); 474 splx(s); 475 bn += sz; 476 addr += sz; 477 } 478 479 s = splbio(); 480 481 out: /* Arrive here at splbio */ 482 vnx->vx_flags &= ~VX_BUSY; 483 if (vnx->vx_pending == 0) { 484 if (vnx->vx_error != 0) { 485 bp->b_error = vnx->vx_error; 486 bp->b_flags |= B_ERROR; 487 } 488 VND_PUTXFER(vnd, vnx); 489 biodone(bp); 490 } 491 splx(s); 492 return; 493 494 done: 495 biodone(bp); 496 } 497 498 /* 499 * Feed requests sequentially. 500 * We do it this way to keep from flooding NFS servers if we are connected 501 * to an NFS file. This places the burden on the client rather than the 502 * server. 503 */ 504 void 505 vndstart(vnd) 506 struct vnd_softc *vnd; 507 { 508 struct buf *bp; 509 510 /* 511 * Dequeue now since lower level strategy routine might 512 * queue using same links 513 */ 514 515 if ((vnd->sc_flags & VNF_BUSY) != 0) 516 return; 517 518 vnd->sc_flags |= VNF_BUSY; 519 520 while (vnd->sc_active < vnd->sc_maxactive) { 521 bp = BUFQ_FIRST(&vnd->sc_tab); 522 if (bp == NULL) 523 break; 524 BUFQ_REMOVE(&vnd->sc_tab, bp); 525 vnd->sc_active++; 526 #ifdef DEBUG 527 if (vnddebug & VDB_IO) 528 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 529 " flags %lx addr %p cnt 0x%lx\n", 530 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 531 bp->b_flags, bp->b_data, bp->b_bcount); 532 #endif 533 534 /* Instrumentation. */ 535 disk_busy(&vnd->sc_dkdev); 536 537 if ((bp->b_flags & B_READ) == 0) 538 bp->b_vp->v_numoutput++; 539 VOP_STRATEGY(bp); 540 } 541 vnd->sc_flags &= ~VNF_BUSY; 542 } 543 544 void 545 vndiodone(bp) 546 struct buf *bp; 547 { 548 struct vndbuf *vbp = (struct vndbuf *) bp; 549 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 550 struct buf *pbp = vnx->vx_bp; 551 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 552 int s, resid; 553 554 s = splbio(); 555 #ifdef DEBUG 556 if (vnddebug & VDB_IO) 557 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 558 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 559 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 560 vbp->vb_buf.b_bcount); 561 #endif 562 563 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 564 pbp->b_resid -= resid; 565 disk_unbusy(&vnd->sc_dkdev, resid); 566 vnx->vx_pending--; 567 568 if (vbp->vb_buf.b_error) { 569 #ifdef DEBUG 570 if (vnddebug & VDB_IO) 571 printf("vndiodone: vbp %p error %d\n", vbp, 572 vbp->vb_buf.b_error); 573 #endif 574 vnx->vx_error = vbp->vb_buf.b_error; 575 } 576 577 if (vbp->vb_buf.b_vp != NULLVP) 578 brelvp(&vbp->vb_buf); 579 580 VND_PUTBUF(vnd, vbp); 581 582 /* 583 * Wrap up this transaction if it has run to completion or, in 584 * case of an error, when all auxiliary buffers have returned. 585 */ 586 if (vnx->vx_error != 0) { 587 pbp->b_flags |= B_ERROR; 588 pbp->b_error = vnx->vx_error; 589 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 590 591 #ifdef DEBUG 592 if (vnddebug & VDB_IO) 593 printf("vndiodone: pbp %p iodone: error %d\n", 594 pbp, vnx->vx_error); 595 #endif 596 VND_PUTXFER(vnd, vnx); 597 biodone(pbp); 598 } 599 } else if (pbp->b_resid == 0) { 600 601 #ifdef DIAGNOSTIC 602 if (vnx->vx_pending != 0) 603 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 604 #endif 605 606 if ((vnx->vx_flags & VX_BUSY) == 0) { 607 #ifdef DEBUG 608 if (vnddebug & VDB_IO) 609 printf("vndiodone: pbp %p iodone\n", pbp); 610 #endif 611 VND_PUTXFER(vnd, vnx); 612 biodone(pbp); 613 } 614 } 615 616 vnd->sc_active--; 617 vndstart(vnd); 618 splx(s); 619 } 620 621 /* ARGSUSED */ 622 int 623 vndread(dev, uio, flags) 624 dev_t dev; 625 struct uio *uio; 626 int flags; 627 { 628 int unit = vndunit(dev); 629 struct vnd_softc *sc; 630 631 #ifdef DEBUG 632 if (vnddebug & VDB_FOLLOW) 633 printf("vndread(0x%x, %p)\n", dev, uio); 634 #endif 635 636 if (unit >= numvnd) 637 return (ENXIO); 638 sc = &vnd_softc[unit]; 639 640 if ((sc->sc_flags & VNF_INITED) == 0) 641 return (ENXIO); 642 643 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 644 } 645 646 /* ARGSUSED */ 647 int 648 vndwrite(dev, uio, flags) 649 dev_t dev; 650 struct uio *uio; 651 int flags; 652 { 653 int unit = vndunit(dev); 654 struct vnd_softc *sc; 655 656 #ifdef DEBUG 657 if (vnddebug & VDB_FOLLOW) 658 printf("vndwrite(0x%x, %p)\n", dev, uio); 659 #endif 660 661 if (unit >= numvnd) 662 return (ENXIO); 663 sc = &vnd_softc[unit]; 664 665 if ((sc->sc_flags & VNF_INITED) == 0) 666 return (ENXIO); 667 668 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 669 } 670 671 /* ARGSUSED */ 672 int 673 vndioctl(dev, cmd, data, flag, p) 674 dev_t dev; 675 u_long cmd; 676 caddr_t data; 677 int flag; 678 struct proc *p; 679 { 680 int unit = vndunit(dev); 681 struct vnd_softc *vnd; 682 struct vnd_ioctl *vio; 683 struct vattr vattr; 684 struct nameidata nd; 685 int error, part, pmask; 686 size_t geomsize; 687 #ifdef __HAVE_OLD_DISKLABEL 688 struct disklabel newlabel; 689 #endif 690 691 #ifdef DEBUG 692 if (vnddebug & VDB_FOLLOW) 693 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 694 dev, cmd, data, flag, p, unit); 695 #endif 696 error = suser(p->p_ucred, &p->p_acflag); 697 if (error) 698 return (error); 699 if (unit >= numvnd) 700 return (ENXIO); 701 702 vnd = &vnd_softc[unit]; 703 vio = (struct vnd_ioctl *)data; 704 705 /* Must be open for writes for these commands... */ 706 switch (cmd) { 707 case VNDIOCSET: 708 case VNDIOCCLR: 709 case DIOCSDINFO: 710 case DIOCWDINFO: 711 #ifdef __HAVE_OLD_DISKLABEL 712 case ODIOCSDINFO: 713 case ODIOCWDINFO: 714 #endif 715 case DIOCWLABEL: 716 if ((flag & FWRITE) == 0) 717 return (EBADF); 718 } 719 720 /* Must be initialized for these... */ 721 switch (cmd) { 722 case VNDIOCCLR: 723 case DIOCGDINFO: 724 case DIOCSDINFO: 725 case DIOCWDINFO: 726 case DIOCGPART: 727 case DIOCWLABEL: 728 case DIOCGDEFLABEL: 729 #ifdef __HAVE_OLD_DISKLABEL 730 case ODIOCGDINFO: 731 case ODIOCSDINFO: 732 case ODIOCWDINFO: 733 case ODIOCGDEFLABEL: 734 #endif 735 if ((vnd->sc_flags & VNF_INITED) == 0) 736 return (ENXIO); 737 } 738 739 switch (cmd) { 740 case VNDIOCSET: 741 if (vnd->sc_flags & VNF_INITED) 742 return (EBUSY); 743 744 if ((error = vndlock(vnd)) != 0) 745 return (error); 746 747 /* 748 * Always open for read and write. 749 * This is probably bogus, but it lets vn_open() 750 * weed out directories, sockets, etc. so we don't 751 * have to worry about them. 752 */ 753 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 754 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 755 vndunlock(vnd); 756 return(error); 757 } 758 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 759 if (error) { 760 VOP_UNLOCK(nd.ni_vp, 0); 761 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 762 vndunlock(vnd); 763 return(error); 764 } 765 VOP_UNLOCK(nd.ni_vp, 0); 766 vnd->sc_vp = nd.ni_vp; 767 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 768 769 /* 770 * Use pseudo-geometry specified. If none was provided, 771 * use "standard" Adaptec fictitious geometry. 772 */ 773 if (vio->vnd_flags & VNDIOF_HASGEOM) { 774 775 memcpy(&vnd->sc_geom, &vio->vnd_geom, 776 sizeof(vio->vnd_geom)); 777 778 /* 779 * Sanity-check the sector size. 780 * XXX Don't allow secsize < DEV_BSIZE. Should 781 * XXX we? 782 */ 783 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 784 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 785 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 786 p->p_ucred, p); 787 vndunlock(vnd); 788 return (EINVAL); 789 } 790 791 /* 792 * Compute the size (in DEV_BSIZE blocks) specified 793 * by the geometry. 794 */ 795 geomsize = (vnd->sc_geom.vng_nsectors * 796 vnd->sc_geom.vng_ntracks * 797 vnd->sc_geom.vng_ncylinders) * 798 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 799 800 /* 801 * Sanity-check the size against the specified 802 * geometry. 803 */ 804 if (vnd->sc_size < geomsize) { 805 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 806 p->p_ucred, p); 807 vndunlock(vnd); 808 return (EINVAL); 809 } 810 } else { 811 /* 812 * Size must be at least 2048 DEV_BSIZE blocks 813 * (1M) in order to use this geometry. 814 */ 815 if (vnd->sc_size < (32 * 64)) { 816 vndunlock(vnd); 817 return (EINVAL); 818 } 819 820 vnd->sc_geom.vng_secsize = DEV_BSIZE; 821 vnd->sc_geom.vng_nsectors = 32; 822 vnd->sc_geom.vng_ntracks = 64; 823 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 824 825 /* 826 * Compute the actual size allowed by this geometry. 827 */ 828 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 829 } 830 831 /* 832 * Truncate the size to that specified by 833 * the geometry. 834 * XXX Should we even bother with this? 835 */ 836 vnd->sc_size = geomsize; 837 838 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 839 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 840 vndunlock(vnd); 841 return(error); 842 } 843 vndthrottle(vnd, vnd->sc_vp); 844 vio->vnd_size = dbtob(vnd->sc_size); 845 vnd->sc_flags |= VNF_INITED; 846 #ifdef DEBUG 847 if (vnddebug & VDB_INIT) 848 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 849 vnd->sc_vp, (unsigned long) vnd->sc_size, 850 vnd->sc_geom.vng_secsize, 851 vnd->sc_geom.vng_nsectors, 852 vnd->sc_geom.vng_ntracks, 853 vnd->sc_geom.vng_ncylinders); 854 #endif 855 856 /* Attach the disk. */ 857 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 858 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 859 vnd->sc_dkdev.dk_name = vnd->sc_xname; 860 disk_attach(&vnd->sc_dkdev); 861 862 /* Initialize the xfer and buffer pools. */ 863 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 864 0, 0, "vndxpl", 0, NULL, NULL, M_DEVBUF); 865 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 866 0, 0, "vndbpl", 0, NULL, NULL, M_DEVBUF); 867 868 /* Try and read the disklabel. */ 869 vndgetdisklabel(dev); 870 871 vndunlock(vnd); 872 873 break; 874 875 case VNDIOCCLR: 876 if ((error = vndlock(vnd)) != 0) 877 return (error); 878 879 /* 880 * Don't unconfigure if any other partitions are open 881 * or if both the character and block flavors of this 882 * partition are open. 883 */ 884 part = DISKPART(dev); 885 pmask = (1 << part); 886 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 887 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 888 (vnd->sc_dkdev.dk_copenmask & pmask))) { 889 vndunlock(vnd); 890 return (EBUSY); 891 } 892 893 vndclear(vnd); 894 #ifdef DEBUG 895 if (vnddebug & VDB_INIT) 896 printf("vndioctl: CLRed\n"); 897 #endif 898 899 /* Destroy the xfer and buffer pools. */ 900 pool_destroy(&vnd->sc_vxpool); 901 pool_destroy(&vnd->sc_vbpool); 902 903 /* Detatch the disk. */ 904 disk_detach(&vnd->sc_dkdev); 905 906 vndunlock(vnd); 907 908 break; 909 910 case DIOCGDINFO: 911 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 912 break; 913 914 #ifdef __HAVE_OLD_DISKLABEL 915 case ODIOCGDINFO: 916 newlabel = *(vnd->sc_dkdev.dk_label); 917 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 918 return ENOTTY; 919 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 920 break; 921 #endif 922 923 case DIOCGPART: 924 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 925 ((struct partinfo *)data)->part = 926 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 927 break; 928 929 case DIOCWDINFO: 930 case DIOCSDINFO: 931 #ifdef __HAVE_OLD_DISKLABEL 932 case ODIOCWDINFO: 933 case ODIOCSDINFO: 934 #endif 935 { 936 struct disklabel *lp; 937 938 if ((error = vndlock(vnd)) != 0) 939 return (error); 940 941 vnd->sc_flags |= VNF_LABELLING; 942 943 #ifdef __HAVE_OLD_DISKLABEL 944 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 945 memset(&newlabel, 0, sizeof newlabel); 946 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 947 lp = &newlabel; 948 } else 949 #endif 950 lp = (struct disklabel *)data; 951 952 error = setdisklabel(vnd->sc_dkdev.dk_label, 953 lp, 0, vnd->sc_dkdev.dk_cpulabel); 954 if (error == 0) { 955 if (cmd == DIOCWDINFO 956 #ifdef __HAVE_OLD_DISKLABEL 957 || cmd == ODIOCWDINFO 958 #endif 959 ) 960 error = writedisklabel(VNDLABELDEV(dev), 961 vndstrategy, vnd->sc_dkdev.dk_label, 962 vnd->sc_dkdev.dk_cpulabel); 963 } 964 965 vnd->sc_flags &= ~VNF_LABELLING; 966 967 vndunlock(vnd); 968 969 if (error) 970 return (error); 971 break; 972 } 973 974 case DIOCWLABEL: 975 if (*(int *)data != 0) 976 vnd->sc_flags |= VNF_WLABEL; 977 else 978 vnd->sc_flags &= ~VNF_WLABEL; 979 break; 980 981 case DIOCGDEFLABEL: 982 vndgetdefaultlabel(vnd, (struct disklabel *)data); 983 break; 984 985 #ifdef __HAVE_OLD_DISKLABEL 986 case ODIOCGDEFLABEL: 987 vndgetdefaultlabel(vnd, &newlabel); 988 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 989 return ENOTTY; 990 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 991 break; 992 #endif 993 994 default: 995 return (ENOTTY); 996 } 997 998 return (0); 999 } 1000 1001 /* 1002 * Duplicate the current processes' credentials. Since we are called only 1003 * as the result of a SET ioctl and only root can do that, any future access 1004 * to this "disk" is essentially as root. Note that credentials may change 1005 * if some other uid can write directly to the mapped file (NFS). 1006 */ 1007 int 1008 vndsetcred(vnd, cred) 1009 struct vnd_softc *vnd; 1010 struct ucred *cred; 1011 { 1012 struct uio auio; 1013 struct iovec aiov; 1014 char *tmpbuf; 1015 int error; 1016 1017 vnd->sc_cred = crdup(cred); 1018 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1019 1020 /* XXX: Horrible kludge to establish credentials for NFS */ 1021 aiov.iov_base = tmpbuf; 1022 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1023 auio.uio_iov = &aiov; 1024 auio.uio_iovcnt = 1; 1025 auio.uio_offset = 0; 1026 auio.uio_rw = UIO_READ; 1027 auio.uio_segflg = UIO_SYSSPACE; 1028 auio.uio_resid = aiov.iov_len; 1029 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1030 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1031 if (error == 0) { 1032 /* 1033 * Because vnd does all IO directly through the vnode 1034 * we need to flush (at least) the buffer from the above 1035 * VOP_READ from the buffer cache to prevent cache 1036 * incoherencies. Also, be careful to write dirty 1037 * buffers back to stable storage. 1038 */ 1039 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1040 curproc, 0, 0); 1041 } 1042 VOP_UNLOCK(vnd->sc_vp, 0); 1043 1044 free(tmpbuf, M_TEMP); 1045 return (error); 1046 } 1047 1048 /* 1049 * Set maxactive based on FS type 1050 */ 1051 void 1052 vndthrottle(vnd, vp) 1053 struct vnd_softc *vnd; 1054 struct vnode *vp; 1055 { 1056 #ifdef NFS 1057 extern int (**nfsv2_vnodeop_p) __P((void *)); 1058 1059 if (vp->v_op == nfsv2_vnodeop_p) 1060 vnd->sc_maxactive = 2; 1061 else 1062 #endif 1063 vnd->sc_maxactive = 8; 1064 1065 if (vnd->sc_maxactive < 1) 1066 vnd->sc_maxactive = 1; 1067 } 1068 1069 void 1070 vndshutdown() 1071 { 1072 struct vnd_softc *vnd; 1073 1074 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1075 if (vnd->sc_flags & VNF_INITED) 1076 vndclear(vnd); 1077 } 1078 1079 void 1080 vndclear(vnd) 1081 struct vnd_softc *vnd; 1082 { 1083 struct vnode *vp = vnd->sc_vp; 1084 struct proc *p = curproc; /* XXX */ 1085 1086 #ifdef DEBUG 1087 if (vnddebug & VDB_FOLLOW) 1088 printf("vndclear(%p): vp %p\n", vnd, vp); 1089 #endif 1090 vnd->sc_flags &= ~VNF_INITED; 1091 if (vp == (struct vnode *)0) 1092 panic("vndioctl: null vp"); 1093 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1094 crfree(vnd->sc_cred); 1095 vnd->sc_vp = (struct vnode *)0; 1096 vnd->sc_cred = (struct ucred *)0; 1097 vnd->sc_size = 0; 1098 } 1099 1100 int 1101 vndsize(dev) 1102 dev_t dev; 1103 { 1104 struct vnd_softc *sc; 1105 struct disklabel *lp; 1106 int part, unit, omask; 1107 int size; 1108 1109 unit = vndunit(dev); 1110 if (unit >= numvnd) 1111 return (-1); 1112 sc = &vnd_softc[unit]; 1113 1114 if ((sc->sc_flags & VNF_INITED) == 0) 1115 return (-1); 1116 1117 part = DISKPART(dev); 1118 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1119 lp = sc->sc_dkdev.dk_label; 1120 1121 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1122 return (-1); 1123 1124 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1125 size = -1; 1126 else 1127 size = lp->d_partitions[part].p_size * 1128 (lp->d_secsize / DEV_BSIZE); 1129 1130 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1131 return (-1); 1132 1133 return (size); 1134 } 1135 1136 int 1137 vnddump(dev, blkno, va, size) 1138 dev_t dev; 1139 daddr_t blkno; 1140 caddr_t va; 1141 size_t size; 1142 { 1143 1144 /* Not implemented. */ 1145 return ENXIO; 1146 } 1147 1148 void 1149 vndgetdefaultlabel(sc, lp) 1150 struct vnd_softc *sc; 1151 struct disklabel *lp; 1152 { 1153 struct vndgeom *vng = &sc->sc_geom; 1154 struct partition *pp; 1155 1156 memset(lp, 0, sizeof(*lp)); 1157 1158 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1159 lp->d_secsize = vng->vng_secsize; 1160 lp->d_nsectors = vng->vng_nsectors; 1161 lp->d_ntracks = vng->vng_ntracks; 1162 lp->d_ncylinders = vng->vng_ncylinders; 1163 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1164 1165 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1166 lp->d_type = DTYPE_VND; 1167 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1168 lp->d_rpm = 3600; 1169 lp->d_interleave = 1; 1170 lp->d_flags = 0; 1171 1172 pp = &lp->d_partitions[RAW_PART]; 1173 pp->p_offset = 0; 1174 pp->p_size = lp->d_secperunit; 1175 pp->p_fstype = FS_UNUSED; 1176 lp->d_npartitions = RAW_PART + 1; 1177 1178 lp->d_magic = DISKMAGIC; 1179 lp->d_magic2 = DISKMAGIC; 1180 lp->d_checksum = dkcksum(lp); 1181 } 1182 1183 /* 1184 * Read the disklabel from a vnd. If one is not present, create a fake one. 1185 */ 1186 void 1187 vndgetdisklabel(dev) 1188 dev_t dev; 1189 { 1190 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1191 char *errstring; 1192 struct disklabel *lp = sc->sc_dkdev.dk_label; 1193 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1194 int i; 1195 1196 memset(clp, 0, sizeof(*clp)); 1197 1198 vndgetdefaultlabel(sc, lp); 1199 1200 /* 1201 * Call the generic disklabel extraction routine. 1202 */ 1203 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1204 if (errstring) { 1205 /* 1206 * Lack of disklabel is common, but we print the warning 1207 * anyway, since it might contain other useful information. 1208 */ 1209 printf("%s: %s\n", sc->sc_xname, errstring); 1210 1211 /* 1212 * For historical reasons, if there's no disklabel 1213 * present, all partitions must be FS_BSDFFS and 1214 * occupy the entire disk. 1215 */ 1216 for (i = 0; i < MAXPARTITIONS; i++) { 1217 /* 1218 * Don't wipe out port specific hack (such as 1219 * dos partition hack of i386 port). 1220 */ 1221 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1222 continue; 1223 1224 lp->d_partitions[i].p_size = lp->d_secperunit; 1225 lp->d_partitions[i].p_offset = 0; 1226 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1227 } 1228 1229 strncpy(lp->d_packname, "default label", 1230 sizeof(lp->d_packname)); 1231 1232 lp->d_checksum = dkcksum(lp); 1233 } 1234 } 1235 1236 /* 1237 * Wait interruptibly for an exclusive lock. 1238 * 1239 * XXX 1240 * Several drivers do this; it should be abstracted and made MP-safe. 1241 */ 1242 static int 1243 vndlock(sc) 1244 struct vnd_softc *sc; 1245 { 1246 int error; 1247 1248 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1249 sc->sc_flags |= VNF_WANTED; 1250 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1251 return (error); 1252 } 1253 sc->sc_flags |= VNF_LOCKED; 1254 return (0); 1255 } 1256 1257 /* 1258 * Unlock and wake up any waiters. 1259 */ 1260 static void 1261 vndunlock(sc) 1262 struct vnd_softc *sc; 1263 { 1264 1265 sc->sc_flags &= ~VNF_LOCKED; 1266 if ((sc->sc_flags & VNF_WANTED) != 0) { 1267 sc->sc_flags &= ~VNF_WANTED; 1268 wakeup(sc); 1269 } 1270 } 1271