1 /* $NetBSD: vnd.c,v 1.73 2001/09/30 12:32:09 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include "fs_nfs.h" 101 102 #include <sys/param.h> 103 #include <sys/systm.h> 104 #include <sys/namei.h> 105 #include <sys/proc.h> 106 #include <sys/errno.h> 107 #include <sys/buf.h> 108 #include <sys/malloc.h> 109 #include <sys/ioctl.h> 110 #include <sys/disklabel.h> 111 #include <sys/device.h> 112 #include <sys/disk.h> 113 #include <sys/stat.h> 114 #include <sys/mount.h> 115 #include <sys/vnode.h> 116 #include <sys/file.h> 117 #include <sys/uio.h> 118 #include <sys/conf.h> 119 120 #include <miscfs/specfs/specdev.h> 121 122 #include <dev/vndvar.h> 123 124 #if defined(VNDDEBUG) && !defined(DEBUG) 125 #define DEBUG 126 #endif 127 128 #ifdef DEBUG 129 int dovndcluster = 1; 130 #define VDB_FOLLOW 0x01 131 #define VDB_INIT 0x02 132 #define VDB_IO 0x04 133 #define VDB_LABEL 0x08 134 int vnddebug = 0x00; 135 #endif 136 137 #define vndunit(x) DISKUNIT(x) 138 139 struct vndxfer { 140 struct buf *vx_bp; /* Pointer to parent buffer */ 141 int vx_error; 142 int vx_pending; /* # of pending aux buffers */ 143 int vx_flags; 144 #define VX_BUSY 1 145 }; 146 147 struct vndbuf { 148 struct buf vb_buf; 149 struct vndxfer *vb_xfer; 150 }; 151 152 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 153 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 154 155 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 156 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 157 158 struct vnd_softc *vnd_softc; 159 int numvnd = 0; 160 161 #define VNDLABELDEV(dev) \ 162 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 163 164 /* called by main() at boot time */ 165 void vndattach __P((int)); 166 167 void vndclear __P((struct vnd_softc *)); 168 void vndstart __P((struct vnd_softc *)); 169 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 170 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 171 void vndiodone __P((struct buf *)); 172 void vndshutdown __P((void)); 173 174 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 175 void vndgetdisklabel __P((dev_t)); 176 177 static int vndlock __P((struct vnd_softc *)); 178 static void vndunlock __P((struct vnd_softc *)); 179 180 void 181 vndattach(num) 182 int num; 183 { 184 int i; 185 char *mem; 186 187 if (num <= 0) 188 return; 189 i = num * sizeof(struct vnd_softc); 190 mem = malloc(i, M_DEVBUF, M_NOWAIT); 191 if (mem == NULL) { 192 printf("WARNING: no memory for vnode disks\n"); 193 return; 194 } 195 memset(mem, 0, i); 196 vnd_softc = (struct vnd_softc *)mem; 197 numvnd = num; 198 199 for (i = 0; i < numvnd; i++) 200 BUFQ_INIT(&vnd_softc[i].sc_tab); 201 } 202 203 int 204 vndopen(dev, flags, mode, p) 205 dev_t dev; 206 int flags, mode; 207 struct proc *p; 208 { 209 int unit = vndunit(dev); 210 struct vnd_softc *sc; 211 int error = 0, part, pmask; 212 struct disklabel *lp; 213 214 #ifdef DEBUG 215 if (vnddebug & VDB_FOLLOW) 216 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 217 #endif 218 if (unit >= numvnd) 219 return (ENXIO); 220 sc = &vnd_softc[unit]; 221 222 if ((error = vndlock(sc)) != 0) 223 return (error); 224 225 lp = sc->sc_dkdev.dk_label; 226 227 part = DISKPART(dev); 228 pmask = (1 << part); 229 230 /* 231 * If we're initialized, check to see if there are any other 232 * open partitions. If not, then it's safe to update the 233 * in-core disklabel. 234 */ 235 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 236 vndgetdisklabel(dev); 237 238 /* Check that the partitions exists. */ 239 if (part != RAW_PART) { 240 if (((sc->sc_flags & VNF_INITED) == 0) || 241 ((part >= lp->d_npartitions) || 242 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 243 error = ENXIO; 244 goto done; 245 } 246 } 247 248 /* Prevent our unit from being unconfigured while open. */ 249 switch (mode) { 250 case S_IFCHR: 251 sc->sc_dkdev.dk_copenmask |= pmask; 252 break; 253 254 case S_IFBLK: 255 sc->sc_dkdev.dk_bopenmask |= pmask; 256 break; 257 } 258 sc->sc_dkdev.dk_openmask = 259 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 260 261 done: 262 vndunlock(sc); 263 return (error); 264 } 265 266 int 267 vndclose(dev, flags, mode, p) 268 dev_t dev; 269 int flags, mode; 270 struct proc *p; 271 { 272 int unit = vndunit(dev); 273 struct vnd_softc *sc; 274 int error = 0, part; 275 276 #ifdef DEBUG 277 if (vnddebug & VDB_FOLLOW) 278 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 279 #endif 280 281 if (unit >= numvnd) 282 return (ENXIO); 283 sc = &vnd_softc[unit]; 284 285 if ((error = vndlock(sc)) != 0) 286 return (error); 287 288 part = DISKPART(dev); 289 290 /* ...that much closer to allowing unconfiguration... */ 291 switch (mode) { 292 case S_IFCHR: 293 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 294 break; 295 296 case S_IFBLK: 297 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 298 break; 299 } 300 sc->sc_dkdev.dk_openmask = 301 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 302 303 vndunlock(sc); 304 return (0); 305 } 306 307 /* 308 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 309 */ 310 void 311 vndstrategy(bp) 312 struct buf *bp; 313 { 314 int unit = vndunit(bp->b_dev); 315 struct vnd_softc *vnd = &vnd_softc[unit]; 316 struct vndxfer *vnx; 317 int s, bsize, resid; 318 off_t bn; 319 caddr_t addr; 320 int sz, flags, error, wlabel; 321 struct disklabel *lp; 322 struct partition *pp; 323 324 #ifdef DEBUG 325 if (vnddebug & VDB_FOLLOW) 326 printf("vndstrategy(%p): unit %d\n", bp, unit); 327 #endif 328 if ((vnd->sc_flags & VNF_INITED) == 0) { 329 bp->b_error = ENXIO; 330 bp->b_flags |= B_ERROR; 331 goto done; 332 } 333 334 /* If it's a nil transfer, wake up the top half now. */ 335 if (bp->b_bcount == 0) 336 goto done; 337 338 lp = vnd->sc_dkdev.dk_label; 339 340 /* 341 * The transfer must be a whole number of blocks. 342 */ 343 if ((bp->b_bcount % lp->d_secsize) != 0) { 344 bp->b_error = EINVAL; 345 bp->b_flags |= B_ERROR; 346 goto done; 347 } 348 349 /* 350 * Do bounds checking and adjust transfer. If there's an error, 351 * the bounds check will flag that for us. 352 */ 353 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 354 if (DISKPART(bp->b_dev) != RAW_PART) 355 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 356 goto done; 357 358 bp->b_resid = bp->b_bcount; 359 360 /* 361 * Put the block number in terms of the logical blocksize 362 * of the "device". 363 */ 364 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 365 366 /* 367 * Translate the partition-relative block number to an absolute. 368 */ 369 if (DISKPART(bp->b_dev) != RAW_PART) { 370 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 371 bn += pp->p_offset; 372 } 373 374 /* ...and convert to a byte offset within the file. */ 375 bn *= lp->d_secsize; 376 377 if (vnd->sc_vp->v_mount == NULL) { 378 bp->b_error = ENXIO; 379 bp->b_flags |= B_ERROR; 380 goto done; 381 } 382 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 383 addr = bp->b_data; 384 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 385 386 /* Allocate a header for this transfer and link it to the buffer */ 387 s = splbio(); 388 vnx = VND_GETXFER(vnd); 389 splx(s); 390 vnx->vx_flags = VX_BUSY; 391 vnx->vx_error = 0; 392 vnx->vx_pending = 0; 393 vnx->vx_bp = bp; 394 395 for (resid = bp->b_resid; resid; resid -= sz) { 396 struct vndbuf *nbp; 397 struct vnode *vp; 398 daddr_t nbn; 399 int off, nra; 400 401 nra = 0; 402 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 403 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 404 VOP_UNLOCK(vnd->sc_vp, 0); 405 406 if (error == 0 && (long)nbn == -1) 407 error = EIO; 408 409 /* 410 * If there was an error or a hole in the file...punt. 411 * Note that we may have to wait for any operations 412 * that we have already fired off before releasing 413 * the buffer. 414 * 415 * XXX we could deal with holes here but it would be 416 * a hassle (in the write case). 417 */ 418 if (error) { 419 s = splbio(); 420 vnx->vx_error = error; 421 goto out; 422 } 423 424 #ifdef DEBUG 425 if (!dovndcluster) 426 nra = 0; 427 #endif 428 429 if ((off = bn % bsize) != 0) 430 sz = bsize - off; 431 else 432 sz = (1 + nra) * bsize; 433 if (resid < sz) 434 sz = resid; 435 #ifdef DEBUG 436 if (vnddebug & VDB_IO) 437 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 438 vnd->sc_vp, vp, (long long)bn, nbn, sz); 439 #endif 440 441 s = splbio(); 442 nbp = VND_GETBUF(vnd); 443 splx(s); 444 nbp->vb_buf.b_flags = flags; 445 nbp->vb_buf.b_bcount = sz; 446 nbp->vb_buf.b_bufsize = bp->b_bufsize; 447 nbp->vb_buf.b_error = 0; 448 nbp->vb_buf.b_data = addr; 449 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 450 nbp->vb_buf.b_proc = bp->b_proc; 451 nbp->vb_buf.b_iodone = vndiodone; 452 nbp->vb_buf.b_vp = NULLVP; 453 LIST_INIT(&nbp->vb_buf.b_dep); 454 455 nbp->vb_xfer = vnx; 456 457 /* 458 * Just sort by block number 459 */ 460 s = splbio(); 461 if (vnx->vx_error != 0) { 462 VND_PUTBUF(vnd, nbp); 463 goto out; 464 } 465 vnx->vx_pending++; 466 bgetvp(vp, &nbp->vb_buf); 467 disksort_blkno(&vnd->sc_tab, &nbp->vb_buf); 468 vndstart(vnd); 469 splx(s); 470 bn += sz; 471 addr += sz; 472 } 473 474 s = splbio(); 475 476 out: /* Arrive here at splbio */ 477 vnx->vx_flags &= ~VX_BUSY; 478 if (vnx->vx_pending == 0) { 479 if (vnx->vx_error != 0) { 480 bp->b_error = vnx->vx_error; 481 bp->b_flags |= B_ERROR; 482 } 483 VND_PUTXFER(vnd, vnx); 484 biodone(bp); 485 } 486 splx(s); 487 return; 488 489 done: 490 biodone(bp); 491 } 492 493 /* 494 * Feed requests sequentially. 495 * We do it this way to keep from flooding NFS servers if we are connected 496 * to an NFS file. This places the burden on the client rather than the 497 * server. 498 */ 499 void 500 vndstart(vnd) 501 struct vnd_softc *vnd; 502 { 503 struct buf *bp; 504 505 /* 506 * Dequeue now since lower level strategy routine might 507 * queue using same links 508 */ 509 510 if ((vnd->sc_flags & VNF_BUSY) != 0) 511 return; 512 513 vnd->sc_flags |= VNF_BUSY; 514 515 while (vnd->sc_active < vnd->sc_maxactive) { 516 bp = BUFQ_FIRST(&vnd->sc_tab); 517 if (bp == NULL) 518 break; 519 BUFQ_REMOVE(&vnd->sc_tab, bp); 520 vnd->sc_active++; 521 #ifdef DEBUG 522 if (vnddebug & VDB_IO) 523 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 524 " flags %lx addr %p cnt 0x%lx\n", 525 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 526 bp->b_flags, bp->b_data, bp->b_bcount); 527 #endif 528 529 /* Instrumentation. */ 530 disk_busy(&vnd->sc_dkdev); 531 532 if ((bp->b_flags & B_READ) == 0) 533 bp->b_vp->v_numoutput++; 534 VOP_STRATEGY(bp); 535 } 536 vnd->sc_flags &= ~VNF_BUSY; 537 } 538 539 void 540 vndiodone(bp) 541 struct buf *bp; 542 { 543 struct vndbuf *vbp = (struct vndbuf *) bp; 544 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 545 struct buf *pbp = vnx->vx_bp; 546 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 547 int s, resid; 548 549 s = splbio(); 550 #ifdef DEBUG 551 if (vnddebug & VDB_IO) 552 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 553 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 554 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 555 vbp->vb_buf.b_bcount); 556 #endif 557 558 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 559 pbp->b_resid -= resid; 560 disk_unbusy(&vnd->sc_dkdev, resid); 561 vnx->vx_pending--; 562 563 if (vbp->vb_buf.b_error) { 564 #ifdef DEBUG 565 if (vnddebug & VDB_IO) 566 printf("vndiodone: vbp %p error %d\n", vbp, 567 vbp->vb_buf.b_error); 568 #endif 569 vnx->vx_error = vbp->vb_buf.b_error; 570 } 571 572 if (vbp->vb_buf.b_vp != NULLVP) 573 brelvp(&vbp->vb_buf); 574 575 VND_PUTBUF(vnd, vbp); 576 577 /* 578 * Wrap up this transaction if it has run to completion or, in 579 * case of an error, when all auxiliary buffers have returned. 580 */ 581 if (vnx->vx_error != 0) { 582 pbp->b_flags |= B_ERROR; 583 pbp->b_error = vnx->vx_error; 584 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 585 586 #ifdef DEBUG 587 if (vnddebug & VDB_IO) 588 printf("vndiodone: pbp %p iodone: error %d\n", 589 pbp, vnx->vx_error); 590 #endif 591 VND_PUTXFER(vnd, vnx); 592 biodone(pbp); 593 } 594 } else if (pbp->b_resid == 0) { 595 596 #ifdef DIAGNOSTIC 597 if (vnx->vx_pending != 0) 598 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 599 #endif 600 601 if ((vnx->vx_flags & VX_BUSY) == 0) { 602 #ifdef DEBUG 603 if (vnddebug & VDB_IO) 604 printf("vndiodone: pbp %p iodone\n", pbp); 605 #endif 606 VND_PUTXFER(vnd, vnx); 607 biodone(pbp); 608 } 609 } 610 611 vnd->sc_active--; 612 vndstart(vnd); 613 splx(s); 614 } 615 616 /* ARGSUSED */ 617 int 618 vndread(dev, uio, flags) 619 dev_t dev; 620 struct uio *uio; 621 int flags; 622 { 623 int unit = vndunit(dev); 624 struct vnd_softc *sc; 625 626 #ifdef DEBUG 627 if (vnddebug & VDB_FOLLOW) 628 printf("vndread(0x%x, %p)\n", dev, uio); 629 #endif 630 631 if (unit >= numvnd) 632 return (ENXIO); 633 sc = &vnd_softc[unit]; 634 635 if ((sc->sc_flags & VNF_INITED) == 0) 636 return (ENXIO); 637 638 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 639 } 640 641 /* ARGSUSED */ 642 int 643 vndwrite(dev, uio, flags) 644 dev_t dev; 645 struct uio *uio; 646 int flags; 647 { 648 int unit = vndunit(dev); 649 struct vnd_softc *sc; 650 651 #ifdef DEBUG 652 if (vnddebug & VDB_FOLLOW) 653 printf("vndwrite(0x%x, %p)\n", dev, uio); 654 #endif 655 656 if (unit >= numvnd) 657 return (ENXIO); 658 sc = &vnd_softc[unit]; 659 660 if ((sc->sc_flags & VNF_INITED) == 0) 661 return (ENXIO); 662 663 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 664 } 665 666 /* ARGSUSED */ 667 int 668 vndioctl(dev, cmd, data, flag, p) 669 dev_t dev; 670 u_long cmd; 671 caddr_t data; 672 int flag; 673 struct proc *p; 674 { 675 int unit = vndunit(dev); 676 struct vnd_softc *vnd; 677 struct vnd_ioctl *vio; 678 struct vattr vattr; 679 struct nameidata nd; 680 int error, part, pmask; 681 size_t geomsize; 682 #ifdef __HAVE_OLD_DISKLABEL 683 struct disklabel newlabel; 684 #endif 685 686 #ifdef DEBUG 687 if (vnddebug & VDB_FOLLOW) 688 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 689 dev, cmd, data, flag, p, unit); 690 #endif 691 error = suser(p->p_ucred, &p->p_acflag); 692 if (error) 693 return (error); 694 if (unit >= numvnd) 695 return (ENXIO); 696 697 vnd = &vnd_softc[unit]; 698 vio = (struct vnd_ioctl *)data; 699 700 /* Must be open for writes for these commands... */ 701 switch (cmd) { 702 case VNDIOCSET: 703 case VNDIOCCLR: 704 case DIOCSDINFO: 705 case DIOCWDINFO: 706 #ifdef __HAVE_OLD_DISKLABEL 707 case ODIOCSDINFO: 708 case ODIOCWDINFO: 709 #endif 710 case DIOCWLABEL: 711 if ((flag & FWRITE) == 0) 712 return (EBADF); 713 } 714 715 /* Must be initialized for these... */ 716 switch (cmd) { 717 case VNDIOCCLR: 718 case DIOCGDINFO: 719 case DIOCSDINFO: 720 case DIOCWDINFO: 721 case DIOCGPART: 722 case DIOCWLABEL: 723 case DIOCGDEFLABEL: 724 #ifdef __HAVE_OLD_DISKLABEL 725 case ODIOCGDINFO: 726 case ODIOCSDINFO: 727 case ODIOCWDINFO: 728 case ODIOCGDEFLABEL: 729 #endif 730 if ((vnd->sc_flags & VNF_INITED) == 0) 731 return (ENXIO); 732 } 733 734 switch (cmd) { 735 case VNDIOCSET: 736 if (vnd->sc_flags & VNF_INITED) 737 return (EBUSY); 738 739 if ((error = vndlock(vnd)) != 0) 740 return (error); 741 742 /* 743 * Always open for read and write. 744 * This is probably bogus, but it lets vn_open() 745 * weed out directories, sockets, etc. so we don't 746 * have to worry about them. 747 */ 748 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 749 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 750 vndunlock(vnd); 751 return(error); 752 } 753 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 754 if (error) { 755 VOP_UNLOCK(nd.ni_vp, 0); 756 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 757 vndunlock(vnd); 758 return(error); 759 } 760 VOP_UNLOCK(nd.ni_vp, 0); 761 vnd->sc_vp = nd.ni_vp; 762 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 763 764 /* 765 * Use pseudo-geometry specified. If none was provided, 766 * use "standard" Adaptec fictitious geometry. 767 */ 768 if (vio->vnd_flags & VNDIOF_HASGEOM) { 769 770 memcpy(&vnd->sc_geom, &vio->vnd_geom, 771 sizeof(vio->vnd_geom)); 772 773 /* 774 * Sanity-check the sector size. 775 * XXX Don't allow secsize < DEV_BSIZE. Should 776 * XXX we? 777 */ 778 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 779 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 780 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 781 p->p_ucred, p); 782 vndunlock(vnd); 783 return (EINVAL); 784 } 785 786 /* 787 * Compute the size (in DEV_BSIZE blocks) specified 788 * by the geometry. 789 */ 790 geomsize = (vnd->sc_geom.vng_nsectors * 791 vnd->sc_geom.vng_ntracks * 792 vnd->sc_geom.vng_ncylinders) * 793 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 794 795 /* 796 * Sanity-check the size against the specified 797 * geometry. 798 */ 799 if (vnd->sc_size < geomsize) { 800 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 801 p->p_ucred, p); 802 vndunlock(vnd); 803 return (EINVAL); 804 } 805 } else { 806 /* 807 * Size must be at least 2048 DEV_BSIZE blocks 808 * (1M) in order to use this geometry. 809 */ 810 if (vnd->sc_size < (32 * 64)) { 811 vndunlock(vnd); 812 return (EINVAL); 813 } 814 815 vnd->sc_geom.vng_secsize = DEV_BSIZE; 816 vnd->sc_geom.vng_nsectors = 32; 817 vnd->sc_geom.vng_ntracks = 64; 818 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 819 820 /* 821 * Compute the actual size allowed by this geometry. 822 */ 823 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 824 } 825 826 /* 827 * Truncate the size to that specified by 828 * the geometry. 829 * XXX Should we even bother with this? 830 */ 831 vnd->sc_size = geomsize; 832 833 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 834 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 835 vndunlock(vnd); 836 return(error); 837 } 838 vndthrottle(vnd, vnd->sc_vp); 839 vio->vnd_size = dbtob(vnd->sc_size); 840 vnd->sc_flags |= VNF_INITED; 841 #ifdef DEBUG 842 if (vnddebug & VDB_INIT) 843 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 844 vnd->sc_vp, (unsigned long) vnd->sc_size, 845 vnd->sc_geom.vng_secsize, 846 vnd->sc_geom.vng_nsectors, 847 vnd->sc_geom.vng_ntracks, 848 vnd->sc_geom.vng_ncylinders); 849 #endif 850 851 /* Attach the disk. */ 852 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 853 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 854 vnd->sc_dkdev.dk_name = vnd->sc_xname; 855 disk_attach(&vnd->sc_dkdev); 856 857 /* Initialize the xfer and buffer pools. */ 858 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 859 0, 0, "vndxpl", 0, NULL, NULL, M_DEVBUF); 860 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 861 0, 0, "vndbpl", 0, NULL, NULL, M_DEVBUF); 862 863 /* Try and read the disklabel. */ 864 vndgetdisklabel(dev); 865 866 vndunlock(vnd); 867 868 break; 869 870 case VNDIOCCLR: 871 if ((error = vndlock(vnd)) != 0) 872 return (error); 873 874 /* 875 * Don't unconfigure if any other partitions are open 876 * or if both the character and block flavors of this 877 * partition are open. 878 */ 879 part = DISKPART(dev); 880 pmask = (1 << part); 881 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 882 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 883 (vnd->sc_dkdev.dk_copenmask & pmask))) { 884 vndunlock(vnd); 885 return (EBUSY); 886 } 887 888 vndclear(vnd); 889 #ifdef DEBUG 890 if (vnddebug & VDB_INIT) 891 printf("vndioctl: CLRed\n"); 892 #endif 893 894 /* Destroy the xfer and buffer pools. */ 895 pool_destroy(&vnd->sc_vxpool); 896 pool_destroy(&vnd->sc_vbpool); 897 898 /* Detatch the disk. */ 899 disk_detach(&vnd->sc_dkdev); 900 901 vndunlock(vnd); 902 903 break; 904 905 case DIOCGDINFO: 906 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 907 break; 908 909 #ifdef __HAVE_OLD_DISKLABEL 910 case ODIOCGDINFO: 911 newlabel = *(vnd->sc_dkdev.dk_label); 912 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 913 return ENOTTY; 914 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 915 break; 916 #endif 917 918 case DIOCGPART: 919 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 920 ((struct partinfo *)data)->part = 921 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 922 break; 923 924 case DIOCWDINFO: 925 case DIOCSDINFO: 926 #ifdef __HAVE_OLD_DISKLABEL 927 case ODIOCWDINFO: 928 case ODIOCSDINFO: 929 #endif 930 { 931 struct disklabel *lp; 932 933 if ((error = vndlock(vnd)) != 0) 934 return (error); 935 936 vnd->sc_flags |= VNF_LABELLING; 937 938 #ifdef __HAVE_OLD_DISKLABEL 939 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 940 memset(&newlabel, 0, sizeof newlabel); 941 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 942 lp = &newlabel; 943 } else 944 #endif 945 lp = (struct disklabel *)data; 946 947 error = setdisklabel(vnd->sc_dkdev.dk_label, 948 lp, 0, vnd->sc_dkdev.dk_cpulabel); 949 if (error == 0) { 950 if (cmd == DIOCWDINFO 951 #ifdef __HAVE_OLD_DISKLABEL 952 || cmd == ODIOCWDINFO 953 #endif 954 ) 955 error = writedisklabel(VNDLABELDEV(dev), 956 vndstrategy, vnd->sc_dkdev.dk_label, 957 vnd->sc_dkdev.dk_cpulabel); 958 } 959 960 vnd->sc_flags &= ~VNF_LABELLING; 961 962 vndunlock(vnd); 963 964 if (error) 965 return (error); 966 break; 967 } 968 969 case DIOCWLABEL: 970 if (*(int *)data != 0) 971 vnd->sc_flags |= VNF_WLABEL; 972 else 973 vnd->sc_flags &= ~VNF_WLABEL; 974 break; 975 976 case DIOCGDEFLABEL: 977 vndgetdefaultlabel(vnd, (struct disklabel *)data); 978 break; 979 980 #ifdef __HAVE_OLD_DISKLABEL 981 case ODIOCGDEFLABEL: 982 vndgetdefaultlabel(vnd, &newlabel); 983 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 984 return ENOTTY; 985 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 986 break; 987 #endif 988 989 default: 990 return (ENOTTY); 991 } 992 993 return (0); 994 } 995 996 /* 997 * Duplicate the current processes' credentials. Since we are called only 998 * as the result of a SET ioctl and only root can do that, any future access 999 * to this "disk" is essentially as root. Note that credentials may change 1000 * if some other uid can write directly to the mapped file (NFS). 1001 */ 1002 int 1003 vndsetcred(vnd, cred) 1004 struct vnd_softc *vnd; 1005 struct ucred *cred; 1006 { 1007 struct uio auio; 1008 struct iovec aiov; 1009 char *tmpbuf; 1010 int error; 1011 1012 vnd->sc_cred = crdup(cred); 1013 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1014 1015 /* XXX: Horrible kludge to establish credentials for NFS */ 1016 aiov.iov_base = tmpbuf; 1017 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1018 auio.uio_iov = &aiov; 1019 auio.uio_iovcnt = 1; 1020 auio.uio_offset = 0; 1021 auio.uio_rw = UIO_READ; 1022 auio.uio_segflg = UIO_SYSSPACE; 1023 auio.uio_resid = aiov.iov_len; 1024 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1025 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1026 if (error == 0) { 1027 /* 1028 * Because vnd does all IO directly through the vnode 1029 * we need to flush (at least) the buffer from the above 1030 * VOP_READ from the buffer cache to prevent cache 1031 * incoherencies. Also, be careful to write dirty 1032 * buffers back to stable storage. 1033 */ 1034 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1035 curproc, 0, 0); 1036 } 1037 VOP_UNLOCK(vnd->sc_vp, 0); 1038 1039 free(tmpbuf, M_TEMP); 1040 return (error); 1041 } 1042 1043 /* 1044 * Set maxactive based on FS type 1045 */ 1046 void 1047 vndthrottle(vnd, vp) 1048 struct vnd_softc *vnd; 1049 struct vnode *vp; 1050 { 1051 #ifdef NFS 1052 extern int (**nfsv2_vnodeop_p) __P((void *)); 1053 1054 if (vp->v_op == nfsv2_vnodeop_p) 1055 vnd->sc_maxactive = 2; 1056 else 1057 #endif 1058 vnd->sc_maxactive = 8; 1059 1060 if (vnd->sc_maxactive < 1) 1061 vnd->sc_maxactive = 1; 1062 } 1063 1064 void 1065 vndshutdown() 1066 { 1067 struct vnd_softc *vnd; 1068 1069 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1070 if (vnd->sc_flags & VNF_INITED) 1071 vndclear(vnd); 1072 } 1073 1074 void 1075 vndclear(vnd) 1076 struct vnd_softc *vnd; 1077 { 1078 struct vnode *vp = vnd->sc_vp; 1079 struct proc *p = curproc; /* XXX */ 1080 1081 #ifdef DEBUG 1082 if (vnddebug & VDB_FOLLOW) 1083 printf("vndclear(%p): vp %p\n", vnd, vp); 1084 #endif 1085 vnd->sc_flags &= ~VNF_INITED; 1086 if (vp == (struct vnode *)0) 1087 panic("vndioctl: null vp"); 1088 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1089 crfree(vnd->sc_cred); 1090 vnd->sc_vp = (struct vnode *)0; 1091 vnd->sc_cred = (struct ucred *)0; 1092 vnd->sc_size = 0; 1093 } 1094 1095 int 1096 vndsize(dev) 1097 dev_t dev; 1098 { 1099 struct vnd_softc *sc; 1100 struct disklabel *lp; 1101 int part, unit, omask; 1102 int size; 1103 1104 unit = vndunit(dev); 1105 if (unit >= numvnd) 1106 return (-1); 1107 sc = &vnd_softc[unit]; 1108 1109 if ((sc->sc_flags & VNF_INITED) == 0) 1110 return (-1); 1111 1112 part = DISKPART(dev); 1113 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1114 lp = sc->sc_dkdev.dk_label; 1115 1116 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1117 return (-1); 1118 1119 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1120 size = -1; 1121 else 1122 size = lp->d_partitions[part].p_size * 1123 (lp->d_secsize / DEV_BSIZE); 1124 1125 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1126 return (-1); 1127 1128 return (size); 1129 } 1130 1131 int 1132 vnddump(dev, blkno, va, size) 1133 dev_t dev; 1134 daddr_t blkno; 1135 caddr_t va; 1136 size_t size; 1137 { 1138 1139 /* Not implemented. */ 1140 return ENXIO; 1141 } 1142 1143 void 1144 vndgetdefaultlabel(sc, lp) 1145 struct vnd_softc *sc; 1146 struct disklabel *lp; 1147 { 1148 struct vndgeom *vng = &sc->sc_geom; 1149 struct partition *pp; 1150 1151 memset(lp, 0, sizeof(*lp)); 1152 1153 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1154 lp->d_secsize = vng->vng_secsize; 1155 lp->d_nsectors = vng->vng_nsectors; 1156 lp->d_ntracks = vng->vng_ntracks; 1157 lp->d_ncylinders = vng->vng_ncylinders; 1158 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1159 1160 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1161 lp->d_type = DTYPE_VND; 1162 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1163 lp->d_rpm = 3600; 1164 lp->d_interleave = 1; 1165 lp->d_flags = 0; 1166 1167 pp = &lp->d_partitions[RAW_PART]; 1168 pp->p_offset = 0; 1169 pp->p_size = lp->d_secperunit; 1170 pp->p_fstype = FS_UNUSED; 1171 lp->d_npartitions = RAW_PART + 1; 1172 1173 lp->d_magic = DISKMAGIC; 1174 lp->d_magic2 = DISKMAGIC; 1175 lp->d_checksum = dkcksum(lp); 1176 } 1177 1178 /* 1179 * Read the disklabel from a vnd. If one is not present, create a fake one. 1180 */ 1181 void 1182 vndgetdisklabel(dev) 1183 dev_t dev; 1184 { 1185 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1186 char *errstring; 1187 struct disklabel *lp = sc->sc_dkdev.dk_label; 1188 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1189 int i; 1190 1191 memset(clp, 0, sizeof(*clp)); 1192 1193 vndgetdefaultlabel(sc, lp); 1194 1195 /* 1196 * Call the generic disklabel extraction routine. 1197 */ 1198 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1199 if (errstring) { 1200 /* 1201 * Lack of disklabel is common, but we print the warning 1202 * anyway, since it might contain other useful information. 1203 */ 1204 printf("%s: %s\n", sc->sc_xname, errstring); 1205 1206 /* 1207 * For historical reasons, if there's no disklabel 1208 * present, all partitions must be FS_BSDFFS and 1209 * occupy the entire disk. 1210 */ 1211 for (i = 0; i < MAXPARTITIONS; i++) { 1212 /* 1213 * Don't wipe out port specific hack (such as 1214 * dos partition hack of i386 port). 1215 */ 1216 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1217 continue; 1218 1219 lp->d_partitions[i].p_size = lp->d_secperunit; 1220 lp->d_partitions[i].p_offset = 0; 1221 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1222 } 1223 1224 strncpy(lp->d_packname, "default label", 1225 sizeof(lp->d_packname)); 1226 1227 lp->d_checksum = dkcksum(lp); 1228 } 1229 } 1230 1231 /* 1232 * Wait interruptibly for an exclusive lock. 1233 * 1234 * XXX 1235 * Several drivers do this; it should be abstracted and made MP-safe. 1236 */ 1237 static int 1238 vndlock(sc) 1239 struct vnd_softc *sc; 1240 { 1241 int error; 1242 1243 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1244 sc->sc_flags |= VNF_WANTED; 1245 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1246 return (error); 1247 } 1248 sc->sc_flags |= VNF_LOCKED; 1249 return (0); 1250 } 1251 1252 /* 1253 * Unlock and wake up any waiters. 1254 */ 1255 static void 1256 vndunlock(sc) 1257 struct vnd_softc *sc; 1258 { 1259 1260 sc->sc_flags &= ~VNF_LOCKED; 1261 if ((sc->sc_flags & VNF_WANTED) != 0) { 1262 sc->sc_flags &= ~VNF_WANTED; 1263 wakeup(sc); 1264 } 1265 } 1266