1 /* $NetBSD: vnd.c,v 1.53 1997/12/31 02:46:51 enami Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.6 (Berkeley) 4/1/94 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/namei.h> 103 #include <sys/proc.h> 104 #include <sys/errno.h> 105 #include <sys/buf.h> 106 #include <sys/malloc.h> 107 #include <sys/ioctl.h> 108 #include <sys/disklabel.h> 109 #include <sys/device.h> 110 #include <sys/disk.h> 111 #include <sys/stat.h> 112 #include <sys/mount.h> 113 #include <sys/vnode.h> 114 #include <sys/file.h> 115 #include <sys/uio.h> 116 #include <sys/conf.h> 117 118 #include <miscfs/specfs/specdev.h> 119 120 #include <dev/vndvar.h> 121 122 #if defined(VNDDEBUG) && !defined(DEBUG) 123 #define DEBUG 124 #endif 125 126 #ifdef DEBUG 127 int dovndcluster = 1; 128 #define VDB_FOLLOW 0x01 129 #define VDB_INIT 0x02 130 #define VDB_IO 0x04 131 #define VDB_LABEL 0x08 132 int vnddebug = 0x00; 133 #endif 134 135 #define b_cylin b_resid 136 137 #define vndunit(x) DISKUNIT(x) 138 139 struct vndxfer { 140 struct buf *vx_bp; /* Pointer to parent buffer */ 141 int vx_error; 142 int vx_pending; /* # of pending aux buffers */ 143 int vx_flags; 144 #define VX_BUSY 1 145 }; 146 147 struct vndbuf { 148 struct buf vb_buf; 149 struct vndxfer *vb_xfer; 150 }; 151 152 #define getvndxfer() \ 153 ((struct vndxfer *)malloc(sizeof(struct vndxfer), M_DEVBUF, M_WAITOK)) 154 #define putvndxfer(vnx) \ 155 free((caddr_t)(vnx), M_DEVBUF) 156 #define getvndbuf() \ 157 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 158 #define putvndbuf(vbp) \ 159 free((caddr_t)(vbp), M_DEVBUF) 160 161 struct vnd_softc *vnd_softc; 162 int numvnd = 0; 163 164 #define VNDLABELDEV(dev) \ 165 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 166 167 /* called by main() at boot time */ 168 void vndattach __P((int)); 169 170 void vndclear __P((struct vnd_softc *)); 171 void vndstart __P((struct vnd_softc *)); 172 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 173 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 174 void vndiodone __P((struct buf *)); 175 void vndshutdown __P((void)); 176 177 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 178 void vndgetdisklabel __P((dev_t)); 179 180 static int vndlock __P((struct vnd_softc *)); 181 static void vndunlock __P((struct vnd_softc *)); 182 183 void 184 vndattach(num) 185 int num; 186 { 187 char *mem; 188 register u_long size; 189 190 if (num <= 0) 191 return; 192 size = num * sizeof(struct vnd_softc); 193 mem = malloc(size, M_DEVBUF, M_NOWAIT); 194 if (mem == NULL) { 195 printf("WARNING: no memory for vnode disks\n"); 196 return; 197 } 198 bzero(mem, size); 199 vnd_softc = (struct vnd_softc *)mem; 200 numvnd = num; 201 } 202 203 int 204 vndopen(dev, flags, mode, p) 205 dev_t dev; 206 int flags, mode; 207 struct proc *p; 208 { 209 int unit = vndunit(dev); 210 struct vnd_softc *sc; 211 int error = 0, part, pmask; 212 struct disklabel *lp; 213 214 #ifdef DEBUG 215 if (vnddebug & VDB_FOLLOW) 216 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 217 #endif 218 if (unit >= numvnd) 219 return (ENXIO); 220 sc = &vnd_softc[unit]; 221 222 if ((error = vndlock(sc)) != 0) 223 return (error); 224 225 lp = sc->sc_dkdev.dk_label; 226 227 part = DISKPART(dev); 228 pmask = (1 << part); 229 230 /* 231 * If we're initialized, check to see if there are any other 232 * open partitions. If not, then it's safe to update the 233 * in-core disklabel. 234 */ 235 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 236 vndgetdisklabel(dev); 237 238 /* Check that the partitions exists. */ 239 if (part != RAW_PART) { 240 if (((sc->sc_flags & VNF_INITED) == 0) || 241 ((part >= lp->d_npartitions) || 242 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 243 error = ENXIO; 244 goto done; 245 } 246 } 247 248 /* Prevent our unit from being unconfigured while open. */ 249 switch (mode) { 250 case S_IFCHR: 251 sc->sc_dkdev.dk_copenmask |= pmask; 252 break; 253 254 case S_IFBLK: 255 sc->sc_dkdev.dk_bopenmask |= pmask; 256 break; 257 } 258 sc->sc_dkdev.dk_openmask = 259 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 260 261 done: 262 vndunlock(sc); 263 return (error); 264 } 265 266 int 267 vndclose(dev, flags, mode, p) 268 dev_t dev; 269 int flags, mode; 270 struct proc *p; 271 { 272 int unit = vndunit(dev); 273 struct vnd_softc *sc; 274 int error = 0, part; 275 276 #ifdef DEBUG 277 if (vnddebug & VDB_FOLLOW) 278 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 279 #endif 280 281 if (unit >= numvnd) 282 return (ENXIO); 283 sc = &vnd_softc[unit]; 284 285 if ((error = vndlock(sc)) != 0) 286 return (error); 287 288 part = DISKPART(dev); 289 290 /* ...that much closer to allowing unconfiguration... */ 291 switch (mode) { 292 case S_IFCHR: 293 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 294 break; 295 296 case S_IFBLK: 297 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 298 break; 299 } 300 sc->sc_dkdev.dk_openmask = 301 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 302 303 vndunlock(sc); 304 return (0); 305 } 306 307 /* 308 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 309 */ 310 void 311 vndstrategy(bp) 312 register struct buf *bp; 313 { 314 int unit = vndunit(bp->b_dev); 315 struct vnd_softc *vnd = &vnd_softc[unit]; 316 struct vndxfer *vnx; 317 int s, bn, bsize, resid; 318 caddr_t addr; 319 int sz, flags, error, wlabel; 320 struct disklabel *lp; 321 struct partition *pp; 322 323 #ifdef DEBUG 324 if (vnddebug & VDB_FOLLOW) 325 printf("vndstrategy(%p): unit %d\n", bp, unit); 326 #endif 327 if ((vnd->sc_flags & VNF_INITED) == 0) { 328 bp->b_error = ENXIO; 329 bp->b_flags |= B_ERROR; 330 goto done; 331 } 332 333 /* If it's a nil transfer, wake up the top half now. */ 334 if (bp->b_bcount == 0) 335 goto done; 336 337 lp = vnd->sc_dkdev.dk_label; 338 339 /* 340 * The transfer must be a whole number of blocks. 341 */ 342 if ((bp->b_bcount % lp->d_secsize) != 0) { 343 bp->b_error = EINVAL; 344 bp->b_flags |= B_ERROR; 345 goto done; 346 } 347 348 /* 349 * Do bounds checking and adjust transfer. If there's an error, 350 * the bounds check will flag that for us. 351 */ 352 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 353 if (DISKPART(bp->b_dev) != RAW_PART) 354 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 355 goto done; 356 357 bp->b_resid = bp->b_bcount; 358 359 /* 360 * Put the block number in terms of the logical blocksize 361 * of the "device". 362 */ 363 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 364 365 /* 366 * Translate the partition-relative block number to an absolute. 367 */ 368 if (DISKPART(bp->b_dev) != RAW_PART) { 369 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 370 bn += pp->p_offset; 371 } 372 373 /* ...and convert to a byte offset within the file. */ 374 bn *= lp->d_secsize; 375 376 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 377 addr = bp->b_data; 378 flags = bp->b_flags | B_CALL; 379 380 /* Allocate a header for this transfer and link it to the buffer */ 381 vnx = getvndxfer(); 382 vnx->vx_flags = VX_BUSY; 383 vnx->vx_error = 0; 384 vnx->vx_pending = 0; 385 vnx->vx_bp = bp; 386 387 for (resid = bp->b_resid; resid; resid -= sz) { 388 struct vndbuf *nbp; 389 struct vnode *vp; 390 daddr_t nbn; 391 int off, nra, dolock = 0; 392 393 nra = 0; 394 /* 395 * XXX check if vnode is already locked, to avoid 396 * recursive locking. The real solution is to 397 * allow recursive locks here, but the interface 398 * doesn't allow it. 399 */ 400 if (!VOP_ISLOCKED(vnd->sc_vp)) { 401 dolock = 1; 402 VOP_LOCK(vnd->sc_vp); 403 } 404 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 405 if (dolock) 406 VOP_UNLOCK(vnd->sc_vp); 407 408 if (error == 0 && (long)nbn == -1) 409 error = EIO; 410 411 /* 412 * If there was an error or a hole in the file...punt. 413 * Note that we may have to wait for any operations 414 * that we have already fired off before releasing 415 * the buffer. 416 * 417 * XXX we could deal with holes here but it would be 418 * a hassle (in the write case). 419 */ 420 if (error) { 421 s = splbio(); 422 vnx->vx_error = error; 423 goto out; 424 } 425 426 #ifdef DEBUG 427 if (!dovndcluster) 428 nra = 0; 429 #endif 430 431 if ((off = bn % bsize) != 0) 432 sz = bsize - off; 433 else 434 sz = (1 + nra) * bsize; 435 if (resid < sz) 436 sz = resid; 437 #ifdef DEBUG 438 if (vnddebug & VDB_IO) 439 printf("vndstrategy: vp %p/%p bn 0x%x/0x%x sz 0x%x\n", 440 vnd->sc_vp, vp, bn, nbn, sz); 441 #endif 442 443 nbp = getvndbuf(); 444 nbp->vb_buf.b_flags = flags; 445 nbp->vb_buf.b_bcount = sz; 446 nbp->vb_buf.b_bufsize = bp->b_bufsize; 447 nbp->vb_buf.b_error = 0; 448 nbp->vb_buf.b_data = addr; 449 nbp->vb_buf.b_blkno = nbn + btodb(off); 450 nbp->vb_buf.b_proc = bp->b_proc; 451 nbp->vb_buf.b_iodone = vndiodone; 452 nbp->vb_buf.b_vp = NULLVP; 453 nbp->vb_buf.b_rcred = vnd->sc_cred; /* XXX crdup? */ 454 nbp->vb_buf.b_wcred = vnd->sc_cred; /* XXX crdup? */ 455 if (bp->b_dirtyend == 0) { 456 nbp->vb_buf.b_dirtyoff = 0; 457 nbp->vb_buf.b_dirtyend = sz; 458 } else { 459 nbp->vb_buf.b_dirtyoff = 460 max(0, bp->b_dirtyoff - (bp->b_bcount - resid)); 461 nbp->vb_buf.b_dirtyend = 462 min(sz, 463 max(0, bp->b_dirtyend - (bp->b_bcount-resid))); 464 } 465 if (bp->b_validend == 0) { 466 nbp->vb_buf.b_validoff = 0; 467 nbp->vb_buf.b_validend = sz; 468 } else { 469 nbp->vb_buf.b_validoff = 470 max(0, bp->b_validoff - (bp->b_bcount - resid)); 471 nbp->vb_buf.b_validend = 472 min(sz, 473 max(0, bp->b_validend - (bp->b_bcount-resid))); 474 } 475 476 nbp->vb_xfer = vnx; 477 478 /* 479 * Just sort by block number 480 */ 481 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 482 s = splbio(); 483 if (vnx->vx_error != 0) { 484 putvndbuf(nbp); 485 goto out; 486 } 487 vnx->vx_pending++; 488 bgetvp(vp, &nbp->vb_buf); 489 disksort(&vnd->sc_tab, &nbp->vb_buf); 490 vndstart(vnd); 491 splx(s); 492 bn += sz; 493 addr += sz; 494 } 495 496 s = splbio(); 497 498 out: /* Arrive here at splbio */ 499 vnx->vx_flags &= ~VX_BUSY; 500 if (vnx->vx_pending == 0) { 501 if (vnx->vx_error != 0) { 502 bp->b_error = vnx->vx_error; 503 bp->b_flags |= B_ERROR; 504 } 505 putvndxfer(vnx); 506 biodone(bp); 507 } 508 splx(s); 509 return; 510 511 done: 512 biodone(bp); 513 } 514 515 /* 516 * Feed requests sequentially. 517 * We do it this way to keep from flooding NFS servers if we are connected 518 * to an NFS file. This places the burden on the client rather than the 519 * server. 520 */ 521 void 522 vndstart(vnd) 523 register struct vnd_softc *vnd; 524 { 525 struct buf *bp; 526 527 /* 528 * Dequeue now since lower level strategy routine might 529 * queue using same links 530 */ 531 532 if ((vnd->sc_flags & VNF_BUSY) != 0) 533 return; 534 535 vnd->sc_flags |= VNF_BUSY; 536 537 while (vnd->sc_tab.b_active < vnd->sc_maxactive) { 538 bp = vnd->sc_tab.b_actf; 539 if (bp == NULL) 540 break; 541 vnd->sc_tab.b_actf = bp->b_actf; 542 vnd->sc_tab.b_active++; 543 #ifdef DEBUG 544 if (vnddebug & VDB_IO) 545 printf("vndstart(%ld): bp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 546 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 547 bp->b_data, bp->b_bcount); 548 #endif 549 550 /* Instrumentation. */ 551 disk_busy(&vnd->sc_dkdev); 552 553 if ((bp->b_flags & B_READ) == 0) 554 bp->b_vp->v_numoutput++; 555 VOP_STRATEGY(bp); 556 } 557 vnd->sc_flags &= ~VNF_BUSY; 558 } 559 560 void 561 vndiodone(bp) 562 struct buf *bp; 563 { 564 register struct vndbuf *vbp = (struct vndbuf *) bp; 565 register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 566 register struct buf *pbp = vnx->vx_bp; 567 register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 568 int s, resid; 569 570 s = splbio(); 571 #ifdef DEBUG 572 if (vnddebug & VDB_IO) 573 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 574 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 575 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 576 vbp->vb_buf.b_bcount); 577 #endif 578 579 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 580 pbp->b_resid -= resid; 581 disk_unbusy(&vnd->sc_dkdev, resid); 582 vnx->vx_pending--; 583 584 if (vbp->vb_buf.b_error) { 585 #ifdef DEBUG 586 if (vnddebug & VDB_IO) 587 printf("vndiodone: vbp %p error %d\n", vbp, 588 vbp->vb_buf.b_error); 589 #endif 590 vnx->vx_error = vbp->vb_buf.b_error; 591 } 592 593 if (vbp->vb_buf.b_vp != NULLVP) 594 brelvp(&vbp->vb_buf); 595 596 putvndbuf(vbp); 597 598 /* 599 * Wrap up this transaction if it has run to completion or, in 600 * case of an error, when all auxiliary buffers have returned. 601 */ 602 if (vnx->vx_error != 0) { 603 pbp->b_flags |= B_ERROR; 604 pbp->b_error = vnx->vx_error; 605 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 606 607 #ifdef DEBUG 608 if (vnddebug & VDB_IO) 609 printf("vndiodone: pbp %p iodone: error %d\n", 610 pbp, vnx->vx_error); 611 #endif 612 putvndxfer(vnx); 613 biodone(pbp); 614 } 615 } else if (pbp->b_resid == 0) { 616 617 #ifdef DIAGNOSTIC 618 if (vnx->vx_pending != 0) 619 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 620 #endif 621 622 if ((vnx->vx_flags & VX_BUSY) == 0) { 623 #ifdef DEBUG 624 if (vnddebug & VDB_IO) 625 printf("vndiodone: pbp %p iodone\n", pbp); 626 #endif 627 putvndxfer(vnx); 628 biodone(pbp); 629 } 630 } 631 632 vnd->sc_tab.b_active--; 633 vndstart(vnd); 634 splx(s); 635 } 636 637 /* ARGSUSED */ 638 int 639 vndread(dev, uio, flags) 640 dev_t dev; 641 struct uio *uio; 642 int flags; 643 { 644 int unit = vndunit(dev); 645 struct vnd_softc *sc; 646 647 #ifdef DEBUG 648 if (vnddebug & VDB_FOLLOW) 649 printf("vndread(0x%x, %p)\n", dev, uio); 650 #endif 651 652 if (unit >= numvnd) 653 return (ENXIO); 654 sc = &vnd_softc[unit]; 655 656 if ((sc->sc_flags & VNF_INITED) == 0) 657 return (ENXIO); 658 659 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 660 } 661 662 /* ARGSUSED */ 663 int 664 vndwrite(dev, uio, flags) 665 dev_t dev; 666 struct uio *uio; 667 int flags; 668 { 669 int unit = vndunit(dev); 670 struct vnd_softc *sc; 671 672 #ifdef DEBUG 673 if (vnddebug & VDB_FOLLOW) 674 printf("vndwrite(0x%x, %p)\n", dev, uio); 675 #endif 676 677 if (unit >= numvnd) 678 return (ENXIO); 679 sc = &vnd_softc[unit]; 680 681 if ((sc->sc_flags & VNF_INITED) == 0) 682 return (ENXIO); 683 684 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 685 } 686 687 /* ARGSUSED */ 688 int 689 vndioctl(dev, cmd, data, flag, p) 690 dev_t dev; 691 u_long cmd; 692 caddr_t data; 693 int flag; 694 struct proc *p; 695 { 696 int unit = vndunit(dev); 697 register struct vnd_softc *vnd; 698 struct vnd_ioctl *vio; 699 struct vattr vattr; 700 struct nameidata nd; 701 int error, part, pmask; 702 size_t geomsize; 703 704 #ifdef DEBUG 705 if (vnddebug & VDB_FOLLOW) 706 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 707 dev, cmd, data, flag, p, unit); 708 #endif 709 error = suser(p->p_ucred, &p->p_acflag); 710 if (error) 711 return (error); 712 if (unit >= numvnd) 713 return (ENXIO); 714 715 vnd = &vnd_softc[unit]; 716 vio = (struct vnd_ioctl *)data; 717 718 /* Must be open for writes for these commands... */ 719 switch (cmd) { 720 case VNDIOCSET: 721 case VNDIOCCLR: 722 case DIOCSDINFO: 723 case DIOCWDINFO: 724 case DIOCWLABEL: 725 if ((flag & FWRITE) == 0) 726 return (EBADF); 727 } 728 729 /* Must be initialized for these... */ 730 switch (cmd) { 731 case VNDIOCCLR: 732 case DIOCGDINFO: 733 case DIOCSDINFO: 734 case DIOCWDINFO: 735 case DIOCGPART: 736 case DIOCWLABEL: 737 case DIOCGDEFLABEL: 738 if ((vnd->sc_flags & VNF_INITED) == 0) 739 return (ENXIO); 740 } 741 742 switch (cmd) { 743 case VNDIOCSET: 744 if (vnd->sc_flags & VNF_INITED) 745 return (EBUSY); 746 747 if ((error = vndlock(vnd)) != 0) 748 return (error); 749 750 /* 751 * Always open for read and write. 752 * This is probably bogus, but it lets vn_open() 753 * weed out directories, sockets, etc. so we don't 754 * have to worry about them. 755 */ 756 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 757 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 758 vndunlock(vnd); 759 return(error); 760 } 761 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 762 if (error) { 763 VOP_UNLOCK(nd.ni_vp); 764 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 765 vndunlock(vnd); 766 return(error); 767 } 768 VOP_UNLOCK(nd.ni_vp); 769 vnd->sc_vp = nd.ni_vp; 770 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 771 772 /* 773 * Use pseudo-geometry specified. If none was provided, 774 * use "standard" Adaptec fictitious geometry. 775 */ 776 if (vio->vnd_flags & VNDIOF_HASGEOM) { 777 778 bcopy(&vio->vnd_geom, &vnd->sc_geom, 779 sizeof(vio->vnd_geom)); 780 781 /* 782 * Sanity-check the sector size. 783 * XXX Don't allow secsize < DEV_BSIZE. Should 784 * XXX we? 785 */ 786 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 787 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 788 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 789 p->p_ucred, p); 790 vndunlock(vnd); 791 return (EINVAL); 792 } 793 794 /* 795 * Compute the size (in DEV_BSIZE blocks) specified 796 * by the geometry. 797 */ 798 geomsize = (vnd->sc_geom.vng_nsectors * 799 vnd->sc_geom.vng_ntracks * 800 vnd->sc_geom.vng_ncylinders) * 801 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 802 803 /* 804 * Sanity-check the size against the specified 805 * geometry. 806 */ 807 if (vnd->sc_size < geomsize) { 808 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 809 p->p_ucred, p); 810 vndunlock(vnd); 811 return (EINVAL); 812 } 813 } else { 814 /* 815 * Size must be at least 2048 DEV_BSIZE blocks 816 * (1M) in order to use this geometry. 817 */ 818 if (vnd->sc_size < (32 * 64)) { 819 vndunlock(vnd); 820 return (EINVAL); 821 } 822 823 vnd->sc_geom.vng_secsize = DEV_BSIZE; 824 vnd->sc_geom.vng_nsectors = 32; 825 vnd->sc_geom.vng_ntracks = 64; 826 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 827 828 /* 829 * Compute the actual size allowed by this geometry. 830 */ 831 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 832 } 833 834 /* 835 * Truncate the size to that specified by 836 * the geometry. 837 * XXX Should we even bother with this? 838 */ 839 vnd->sc_size = geomsize; 840 841 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 842 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 843 vndunlock(vnd); 844 return(error); 845 } 846 vndthrottle(vnd, vnd->sc_vp); 847 vio->vnd_size = dbtob(vnd->sc_size); 848 vnd->sc_flags |= VNF_INITED; 849 #ifdef DEBUG 850 if (vnddebug & VDB_INIT) 851 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 852 vnd->sc_vp, (unsigned long) vnd->sc_size, 853 vnd->sc_geom.vng_secsize, 854 vnd->sc_geom.vng_nsectors, 855 vnd->sc_geom.vng_ntracks, 856 vnd->sc_geom.vng_ncylinders); 857 #endif 858 859 /* Attach the disk. */ 860 bzero(vnd->sc_xname, sizeof(vnd->sc_xname)); /* XXX */ 861 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 862 vnd->sc_dkdev.dk_name = vnd->sc_xname; 863 disk_attach(&vnd->sc_dkdev); 864 865 /* Try and read the disklabel. */ 866 vndgetdisklabel(dev); 867 868 vndunlock(vnd); 869 870 break; 871 872 case VNDIOCCLR: 873 if ((error = vndlock(vnd)) != 0) 874 return (error); 875 876 /* 877 * Don't unconfigure if any other partitions are open 878 * or if both the character and block flavors of this 879 * partition are open. 880 */ 881 part = DISKPART(dev); 882 pmask = (1 << part); 883 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 884 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 885 (vnd->sc_dkdev.dk_copenmask & pmask))) { 886 vndunlock(vnd); 887 return (EBUSY); 888 } 889 890 vndclear(vnd); 891 #ifdef DEBUG 892 if (vnddebug & VDB_INIT) 893 printf("vndioctl: CLRed\n"); 894 #endif 895 896 /* Detatch the disk. */ 897 disk_detach(&vnd->sc_dkdev); 898 899 vndunlock(vnd); 900 901 break; 902 903 case DIOCGDINFO: 904 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 905 break; 906 907 case DIOCGPART: 908 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 909 ((struct partinfo *)data)->part = 910 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 911 break; 912 913 case DIOCWDINFO: 914 case DIOCSDINFO: 915 if ((error = vndlock(vnd)) != 0) 916 return (error); 917 918 vnd->sc_flags |= VNF_LABELLING; 919 920 error = setdisklabel(vnd->sc_dkdev.dk_label, 921 (struct disklabel *)data, 0, vnd->sc_dkdev.dk_cpulabel); 922 if (error == 0) { 923 if (cmd == DIOCWDINFO) 924 error = writedisklabel(VNDLABELDEV(dev), 925 vndstrategy, vnd->sc_dkdev.dk_label, 926 vnd->sc_dkdev.dk_cpulabel); 927 } 928 929 vnd->sc_flags &= ~VNF_LABELLING; 930 931 vndunlock(vnd); 932 933 if (error) 934 return (error); 935 break; 936 937 case DIOCWLABEL: 938 if (*(int *)data != 0) 939 vnd->sc_flags |= VNF_WLABEL; 940 else 941 vnd->sc_flags &= ~VNF_WLABEL; 942 break; 943 944 case DIOCGDEFLABEL: 945 vndgetdefaultlabel(vnd, (struct disklabel *)data); 946 break; 947 948 default: 949 return (ENOTTY); 950 } 951 952 return (0); 953 } 954 955 /* 956 * Duplicate the current processes' credentials. Since we are called only 957 * as the result of a SET ioctl and only root can do that, any future access 958 * to this "disk" is essentially as root. Note that credentials may change 959 * if some other uid can write directly to the mapped file (NFS). 960 */ 961 int 962 vndsetcred(vnd, cred) 963 register struct vnd_softc *vnd; 964 struct ucred *cred; 965 { 966 struct uio auio; 967 struct iovec aiov; 968 char *tmpbuf; 969 int error; 970 971 vnd->sc_cred = crdup(cred); 972 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 973 974 /* XXX: Horrible kludge to establish credentials for NFS */ 975 aiov.iov_base = tmpbuf; 976 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 977 auio.uio_iov = &aiov; 978 auio.uio_iovcnt = 1; 979 auio.uio_offset = 0; 980 auio.uio_rw = UIO_READ; 981 auio.uio_segflg = UIO_SYSSPACE; 982 auio.uio_resid = aiov.iov_len; 983 VOP_LOCK(vnd->sc_vp); 984 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 985 VOP_UNLOCK(vnd->sc_vp); 986 987 free(tmpbuf, M_TEMP); 988 return (error); 989 } 990 991 /* 992 * Set maxactive based on FS type 993 */ 994 void 995 vndthrottle(vnd, vp) 996 register struct vnd_softc *vnd; 997 struct vnode *vp; 998 { 999 #ifdef NFS 1000 extern int (**nfsv2_vnodeop_p) __P((void *)); 1001 1002 if (vp->v_op == nfsv2_vnodeop_p) 1003 vnd->sc_maxactive = 2; 1004 else 1005 #endif 1006 vnd->sc_maxactive = 8; 1007 1008 if (vnd->sc_maxactive < 1) 1009 vnd->sc_maxactive = 1; 1010 } 1011 1012 void 1013 vndshutdown() 1014 { 1015 register struct vnd_softc *vnd; 1016 1017 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1018 if (vnd->sc_flags & VNF_INITED) 1019 vndclear(vnd); 1020 } 1021 1022 void 1023 vndclear(vnd) 1024 register struct vnd_softc *vnd; 1025 { 1026 register struct vnode *vp = vnd->sc_vp; 1027 struct proc *p = curproc; /* XXX */ 1028 1029 #ifdef DEBUG 1030 if (vnddebug & VDB_FOLLOW) 1031 printf("vndclear(%p): vp %p\n", vnd, vp); 1032 #endif 1033 vnd->sc_flags &= ~VNF_INITED; 1034 if (vp == (struct vnode *)0) 1035 panic("vndioctl: null vp"); 1036 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1037 crfree(vnd->sc_cred); 1038 vnd->sc_vp = (struct vnode *)0; 1039 vnd->sc_cred = (struct ucred *)0; 1040 vnd->sc_size = 0; 1041 } 1042 1043 int 1044 vndsize(dev) 1045 dev_t dev; 1046 { 1047 struct vnd_softc *sc; 1048 struct disklabel *lp; 1049 int part, unit, omask; 1050 int size; 1051 1052 unit = vndunit(dev); 1053 if (unit >= numvnd) 1054 return (-1); 1055 sc = &vnd_softc[unit]; 1056 1057 if ((sc->sc_flags & VNF_INITED) == 0) 1058 return (-1); 1059 1060 part = DISKPART(dev); 1061 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1062 lp = sc->sc_dkdev.dk_label; 1063 1064 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1065 return (-1); 1066 1067 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1068 size = -1; 1069 else 1070 size = lp->d_partitions[part].p_size * 1071 (lp->d_secsize / DEV_BSIZE); 1072 1073 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1074 return (-1); 1075 1076 return (size); 1077 } 1078 1079 int 1080 vnddump(dev, blkno, va, size) 1081 dev_t dev; 1082 daddr_t blkno; 1083 caddr_t va; 1084 size_t size; 1085 { 1086 1087 /* Not implemented. */ 1088 return ENXIO; 1089 } 1090 1091 void 1092 vndgetdefaultlabel(sc, lp) 1093 struct vnd_softc *sc; 1094 struct disklabel *lp; 1095 { 1096 struct vndgeom *vng = &sc->sc_geom; 1097 struct partition *pp; 1098 1099 bzero(lp, sizeof(*lp)); 1100 1101 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1102 lp->d_secsize = vng->vng_secsize; 1103 lp->d_nsectors = vng->vng_nsectors; 1104 lp->d_ntracks = vng->vng_ntracks; 1105 lp->d_ncylinders = vng->vng_ncylinders; 1106 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1107 1108 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1109 lp->d_type = DTYPE_VND; 1110 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1111 lp->d_rpm = 3600; 1112 lp->d_interleave = 1; 1113 lp->d_flags = 0; 1114 1115 pp = &lp->d_partitions[RAW_PART]; 1116 pp->p_offset = 0; 1117 pp->p_size = lp->d_secperunit; 1118 pp->p_fstype = FS_UNUSED; 1119 lp->d_npartitions = RAW_PART + 1; 1120 1121 lp->d_magic = DISKMAGIC; 1122 lp->d_magic2 = DISKMAGIC; 1123 lp->d_checksum = dkcksum(lp); 1124 } 1125 1126 /* 1127 * Read the disklabel from a vnd. If one is not present, create a fake one. 1128 */ 1129 void 1130 vndgetdisklabel(dev) 1131 dev_t dev; 1132 { 1133 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1134 char *errstring; 1135 struct disklabel *lp = sc->sc_dkdev.dk_label; 1136 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1137 int i; 1138 1139 bzero(clp, sizeof(*clp)); 1140 1141 vndgetdefaultlabel(sc, lp); 1142 1143 /* 1144 * Call the generic disklabel extraction routine. 1145 */ 1146 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1147 if (errstring) { 1148 /* 1149 * Lack of disklabel is common, but we print the warning 1150 * anyway, since it might contain other useful information. 1151 */ 1152 printf("%s: %s\n", sc->sc_xname, errstring); 1153 1154 /* 1155 * For historical reasons, if there's no disklabel 1156 * present, all partitions must be FS_BSDFFS and 1157 * occupy the entire disk. 1158 */ 1159 for (i = 0; i < MAXPARTITIONS; i++) { 1160 /* 1161 * Don't wipe out port specific hack (such as 1162 * dos partition hack of i386 port). 1163 */ 1164 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1165 continue; 1166 1167 lp->d_partitions[i].p_size = lp->d_secperunit; 1168 lp->d_partitions[i].p_offset = 0; 1169 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1170 } 1171 1172 strncpy(lp->d_packname, "default label", 1173 sizeof(lp->d_packname)); 1174 1175 lp->d_checksum = dkcksum(lp); 1176 } 1177 } 1178 1179 /* 1180 * Wait interruptibly for an exclusive lock. 1181 * 1182 * XXX 1183 * Several drivers do this; it should be abstracted and made MP-safe. 1184 */ 1185 static int 1186 vndlock(sc) 1187 struct vnd_softc *sc; 1188 { 1189 int error; 1190 1191 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1192 sc->sc_flags |= VNF_WANTED; 1193 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1194 return (error); 1195 } 1196 sc->sc_flags |= VNF_LOCKED; 1197 return (0); 1198 } 1199 1200 /* 1201 * Unlock and wake up any waiters. 1202 */ 1203 static void 1204 vndunlock(sc) 1205 struct vnd_softc *sc; 1206 { 1207 1208 sc->sc_flags &= ~VNF_LOCKED; 1209 if ((sc->sc_flags & VNF_WANTED) != 0) { 1210 sc->sc_flags &= ~VNF_WANTED; 1211 wakeup(sc); 1212 } 1213 } 1214