1 /* $NetBSD: spec_vnops.c,v 1.39 1998/03/01 02:21:34 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95 36 */ 37 38 #include <sys/param.h> 39 #include <sys/proc.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/conf.h> 43 #include <sys/buf.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/stat.h> 48 #include <sys/errno.h> 49 #include <sys/ioctl.h> 50 #include <sys/file.h> 51 #include <sys/disklabel.h> 52 #include <sys/lockf.h> 53 54 #include <miscfs/genfs/genfs.h> 55 #include <miscfs/specfs/specdev.h> 56 57 /* symbolic sleep message strings for devices */ 58 const char devopn[] = "devopn"; 59 const char devio[] = "devio"; 60 const char devwait[] = "devwait"; 61 const char devin[] = "devin"; 62 const char devout[] = "devout"; 63 const char devioc[] = "devioc"; 64 const char devcls[] = "devcls"; 65 66 int (**spec_vnodeop_p) __P((void *)); 67 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 68 { &vop_default_desc, vn_default_error }, 69 { &vop_lookup_desc, spec_lookup }, /* lookup */ 70 { &vop_create_desc, spec_create }, /* create */ 71 { &vop_mknod_desc, spec_mknod }, /* mknod */ 72 { &vop_open_desc, spec_open }, /* open */ 73 { &vop_close_desc, spec_close }, /* close */ 74 { &vop_access_desc, spec_access }, /* access */ 75 { &vop_getattr_desc, spec_getattr }, /* getattr */ 76 { &vop_setattr_desc, spec_setattr }, /* setattr */ 77 { &vop_read_desc, spec_read }, /* read */ 78 { &vop_write_desc, spec_write }, /* write */ 79 { &vop_lease_desc, spec_lease_check }, /* lease */ 80 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 81 { &vop_poll_desc, spec_poll }, /* poll */ 82 { &vop_revoke_desc, spec_revoke }, /* revoke */ 83 { &vop_mmap_desc, spec_mmap }, /* mmap */ 84 { &vop_fsync_desc, spec_fsync }, /* fsync */ 85 { &vop_seek_desc, spec_seek }, /* seek */ 86 { &vop_remove_desc, spec_remove }, /* remove */ 87 { &vop_link_desc, spec_link }, /* link */ 88 { &vop_rename_desc, spec_rename }, /* rename */ 89 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 90 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 91 { &vop_symlink_desc, spec_symlink }, /* symlink */ 92 { &vop_readdir_desc, spec_readdir }, /* readdir */ 93 { &vop_readlink_desc, spec_readlink }, /* readlink */ 94 { &vop_abortop_desc, spec_abortop }, /* abortop */ 95 { &vop_inactive_desc, spec_inactive }, /* inactive */ 96 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 97 { &vop_lock_desc, spec_lock }, /* lock */ 98 { &vop_unlock_desc, spec_unlock }, /* unlock */ 99 { &vop_bmap_desc, spec_bmap }, /* bmap */ 100 { &vop_strategy_desc, spec_strategy }, /* strategy */ 101 { &vop_print_desc, spec_print }, /* print */ 102 { &vop_islocked_desc, spec_islocked }, /* islocked */ 103 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 104 { &vop_advlock_desc, spec_advlock }, /* advlock */ 105 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 106 { &vop_valloc_desc, spec_valloc }, /* valloc */ 107 { &vop_vfree_desc, spec_vfree }, /* vfree */ 108 { &vop_truncate_desc, spec_truncate }, /* truncate */ 109 { &vop_update_desc, spec_update }, /* update */ 110 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 111 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } 112 }; 113 struct vnodeopv_desc spec_vnodeop_opv_desc = 114 { &spec_vnodeop_p, spec_vnodeop_entries }; 115 116 /* 117 * Trivial lookup routine that always fails. 118 */ 119 int 120 spec_lookup(v) 121 void *v; 122 { 123 struct vop_lookup_args /* { 124 struct vnode *a_dvp; 125 struct vnode **a_vpp; 126 struct componentname *a_cnp; 127 } */ *ap = v; 128 129 *ap->a_vpp = NULL; 130 return (ENOTDIR); 131 } 132 133 /* 134 * Open a special file. 135 */ 136 /* ARGSUSED */ 137 int 138 spec_open(v) 139 void *v; 140 { 141 struct vop_open_args /* { 142 struct vnode *a_vp; 143 int a_mode; 144 struct ucred *a_cred; 145 struct proc *a_p; 146 } */ *ap = v; 147 struct proc *p = ap->a_p; 148 struct vnode *bvp, *vp = ap->a_vp; 149 dev_t bdev, dev = (dev_t)vp->v_rdev; 150 register int maj = major(dev); 151 int error; 152 153 /* 154 * Don't allow open if fs is mounted -nodev. 155 */ 156 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 157 return (ENXIO); 158 159 switch (vp->v_type) { 160 161 case VCHR: 162 if ((u_int)maj >= nchrdev) 163 return (ENXIO); 164 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 165 /* 166 * When running in very secure mode, do not allow 167 * opens for writing of any disk character devices. 168 */ 169 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 170 return (EPERM); 171 /* 172 * When running in secure mode, do not allow opens 173 * for writing of /dev/mem, /dev/kmem, or character 174 * devices whose corresponding block devices are 175 * currently mounted. 176 */ 177 if (securelevel >= 1) { 178 if ((bdev = chrtoblk(dev)) != (dev_t)NODEV && 179 vfinddev(bdev, VBLK, &bvp) && 180 bvp->v_usecount > 0 && 181 (error = vfs_mountedon(bvp))) 182 return (error); 183 if (iskmemdev(dev)) 184 return (EPERM); 185 } 186 } 187 if (cdevsw[maj].d_type == D_TTY) 188 vp->v_flag |= VISTTY; 189 VOP_UNLOCK(vp, 0); 190 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); 191 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 192 return (error); 193 194 case VBLK: 195 if ((u_int)maj >= nblkdev) 196 return (ENXIO); 197 /* 198 * When running in very secure mode, do not allow 199 * opens for writing of any disk block devices. 200 */ 201 if (securelevel >= 2 && ap->a_cred != FSCRED && 202 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 203 return (EPERM); 204 /* 205 * Do not allow opens of block devices that are 206 * currently mounted. 207 */ 208 if ((error = vfs_mountedon(vp)) != 0) 209 return (error); 210 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p)); 211 case VNON: 212 case VLNK: 213 case VDIR: 214 case VREG: 215 case VBAD: 216 case VFIFO: 217 case VSOCK: 218 break; 219 } 220 return (0); 221 } 222 223 /* 224 * Vnode op for read 225 */ 226 /* ARGSUSED */ 227 int 228 spec_read(v) 229 void *v; 230 { 231 struct vop_read_args /* { 232 struct vnode *a_vp; 233 struct uio *a_uio; 234 int a_ioflag; 235 struct ucred *a_cred; 236 } */ *ap = v; 237 register struct vnode *vp = ap->a_vp; 238 register struct uio *uio = ap->a_uio; 239 struct proc *p = uio->uio_procp; 240 struct buf *bp; 241 daddr_t bn, nextbn; 242 long bsize, bscale, ssize; 243 struct partinfo dpart; 244 int n, on, majordev; 245 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *)); 246 int error = 0; 247 248 #ifdef DIAGNOSTIC 249 if (uio->uio_rw != UIO_READ) 250 panic("spec_read mode"); 251 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 252 panic("spec_read proc"); 253 #endif 254 if (uio->uio_resid == 0) 255 return (0); 256 257 switch (vp->v_type) { 258 259 case VCHR: 260 VOP_UNLOCK(vp, 0); 261 error = (*cdevsw[major(vp->v_rdev)].d_read) 262 (vp->v_rdev, uio, ap->a_ioflag); 263 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 264 return (error); 265 266 case VBLK: 267 if (uio->uio_offset < 0) 268 return (EINVAL); 269 bsize = BLKDEV_IOSIZE; 270 ssize = DEV_BSIZE; 271 if ((majordev = major(vp->v_rdev)) < nblkdev && 272 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 273 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { 274 if (dpart.part->p_fstype == FS_BSDFFS && 275 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 276 bsize = dpart.part->p_frag * 277 dpart.part->p_fsize; 278 if (dpart.disklab->d_secsize != 0) 279 ssize = dpart.disklab->d_secsize; 280 } 281 bscale = bsize / ssize; 282 do { 283 bn = (uio->uio_offset / ssize) &~ (bscale - 1); 284 on = uio->uio_offset % bsize; 285 n = min((unsigned)(bsize - on), uio->uio_resid); 286 if (vp->v_lastr + bscale == bn) { 287 nextbn = bn + bscale; 288 error = breadn(vp, bn, (int)bsize, &nextbn, 289 (int *)&bsize, 1, NOCRED, &bp); 290 } else 291 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 292 vp->v_lastr = bn; 293 n = min(n, bsize - bp->b_resid); 294 if (error) { 295 brelse(bp); 296 return (error); 297 } 298 error = uiomove((char *)bp->b_data + on, n, uio); 299 brelse(bp); 300 } while (error == 0 && uio->uio_resid > 0 && n != 0); 301 return (error); 302 303 default: 304 panic("spec_read type"); 305 } 306 /* NOTREACHED */ 307 } 308 309 /* 310 * Vnode op for write 311 */ 312 /* ARGSUSED */ 313 int 314 spec_write(v) 315 void *v; 316 { 317 struct vop_write_args /* { 318 struct vnode *a_vp; 319 struct uio *a_uio; 320 int a_ioflag; 321 struct ucred *a_cred; 322 } */ *ap = v; 323 register struct vnode *vp = ap->a_vp; 324 register struct uio *uio = ap->a_uio; 325 struct proc *p = uio->uio_procp; 326 struct buf *bp; 327 daddr_t bn; 328 long bsize, bscale, ssize; 329 struct partinfo dpart; 330 int n, on, majordev; 331 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *)); 332 int error = 0; 333 334 #ifdef DIAGNOSTIC 335 if (uio->uio_rw != UIO_WRITE) 336 panic("spec_write mode"); 337 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 338 panic("spec_write proc"); 339 #endif 340 341 switch (vp->v_type) { 342 343 case VCHR: 344 VOP_UNLOCK(vp, 0); 345 error = (*cdevsw[major(vp->v_rdev)].d_write) 346 (vp->v_rdev, uio, ap->a_ioflag); 347 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 348 return (error); 349 350 case VBLK: 351 if (uio->uio_resid == 0) 352 return (0); 353 if (uio->uio_offset < 0) 354 return (EINVAL); 355 bsize = BLKDEV_IOSIZE; 356 ssize = DEV_BSIZE; 357 if ((majordev = major(vp->v_rdev)) < nblkdev && 358 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 359 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { 360 if (dpart.part->p_fstype == FS_BSDFFS && 361 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 362 bsize = dpart.part->p_frag * 363 dpart.part->p_fsize; 364 if (dpart.disklab->d_secsize != 0) 365 ssize = dpart.disklab->d_secsize; 366 } 367 bscale = bsize / ssize; 368 do { 369 bn = (uio->uio_offset / ssize) &~ (bscale - 1); 370 on = uio->uio_offset % bsize; 371 n = min((unsigned)(bsize - on), uio->uio_resid); 372 if (n == bsize) 373 bp = getblk(vp, bn, bsize, 0, 0); 374 else 375 error = bread(vp, bn, bsize, NOCRED, &bp); 376 n = min(n, bsize - bp->b_resid); 377 if (error) { 378 brelse(bp); 379 return (error); 380 } 381 error = uiomove((char *)bp->b_data + on, n, uio); 382 if (n + on == bsize) 383 bawrite(bp); 384 else 385 bdwrite(bp); 386 } while (error == 0 && uio->uio_resid > 0 && n != 0); 387 return (error); 388 389 default: 390 panic("spec_write type"); 391 } 392 /* NOTREACHED */ 393 } 394 395 /* 396 * Device ioctl operation. 397 */ 398 /* ARGSUSED */ 399 int 400 spec_ioctl(v) 401 void *v; 402 { 403 struct vop_ioctl_args /* { 404 struct vnode *a_vp; 405 u_long a_command; 406 caddr_t a_data; 407 int a_fflag; 408 struct ucred *a_cred; 409 struct proc *a_p; 410 } */ *ap = v; 411 dev_t dev = ap->a_vp->v_rdev; 412 int maj = major(dev); 413 414 switch (ap->a_vp->v_type) { 415 416 case VCHR: 417 return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data, 418 ap->a_fflag, ap->a_p)); 419 420 case VBLK: 421 if (ap->a_command == 0 && (long)ap->a_data == B_TAPE) 422 if (bdevsw[maj].d_type == D_TAPE) 423 return (0); 424 else 425 return (1); 426 return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data, 427 ap->a_fflag, ap->a_p)); 428 429 default: 430 panic("spec_ioctl"); 431 /* NOTREACHED */ 432 } 433 } 434 435 /* ARGSUSED */ 436 int 437 spec_poll(v) 438 void *v; 439 { 440 struct vop_poll_args /* { 441 struct vnode *a_vp; 442 int a_events; 443 struct proc *a_p; 444 } */ *ap = v; 445 register dev_t dev; 446 447 switch (ap->a_vp->v_type) { 448 449 case VCHR: 450 dev = ap->a_vp->v_rdev; 451 return (*cdevsw[major(dev)].d_poll)(dev, ap->a_events, ap->a_p); 452 453 default: 454 return (genfs_poll(v)); 455 } 456 } 457 /* 458 * Synch buffers associated with a block device 459 */ 460 /* ARGSUSED */ 461 int 462 spec_fsync(v) 463 void *v; 464 { 465 struct vop_fsync_args /* { 466 struct vnode *a_vp; 467 struct ucred *a_cred; 468 int a_waitfor; 469 struct proc *a_p; 470 } */ *ap = v; 471 register struct vnode *vp = ap->a_vp; 472 473 if (vp->v_type == VBLK) 474 vflushbuf(vp, ap->a_waitfor == MNT_WAIT); 475 return (0); 476 } 477 478 /* 479 * Just call the device strategy routine 480 */ 481 int 482 spec_strategy(v) 483 void *v; 484 { 485 struct vop_strategy_args /* { 486 struct buf *a_bp; 487 } */ *ap = v; 488 489 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 490 return (0); 491 } 492 493 int 494 spec_inactive(v) 495 void *v; 496 { 497 struct vop_inactive_args /* { 498 struct vnode *a_vp; 499 struct proc *a_p; 500 } */ *ap = v; 501 502 VOP_UNLOCK(ap->a_vp, 0); 503 return (0); 504 } 505 506 /* 507 * This is a noop, simply returning what one has been given. 508 */ 509 int 510 spec_bmap(v) 511 void *v; 512 { 513 struct vop_bmap_args /* { 514 struct vnode *a_vp; 515 daddr_t a_bn; 516 struct vnode **a_vpp; 517 daddr_t *a_bnp; 518 int *a_runp; 519 } */ *ap = v; 520 521 if (ap->a_vpp != NULL) 522 *ap->a_vpp = ap->a_vp; 523 if (ap->a_bnp != NULL) 524 *ap->a_bnp = ap->a_bn; 525 if (ap->a_runp != NULL) 526 *ap->a_runp = 0; 527 return (0); 528 } 529 530 /* 531 * Device close routine 532 */ 533 /* ARGSUSED */ 534 int 535 spec_close(v) 536 void *v; 537 { 538 struct vop_close_args /* { 539 struct vnode *a_vp; 540 int a_fflag; 541 struct ucred *a_cred; 542 struct proc *a_p; 543 } */ *ap = v; 544 register struct vnode *vp = ap->a_vp; 545 dev_t dev = vp->v_rdev; 546 int (*devclose) __P((dev_t, int, int, struct proc *)); 547 int mode, error; 548 549 switch (vp->v_type) { 550 551 case VCHR: 552 /* 553 * Hack: a tty device that is a controlling terminal 554 * has a reference from the session structure. 555 * We cannot easily tell that a character device is 556 * a controlling terminal, unless it is the closing 557 * process' controlling terminal. In that case, 558 * if the reference count is 2 (this last descriptor 559 * plus the session), release the reference from the session. 560 */ 561 if (vcount(vp) == 2 && ap->a_p && 562 vp == ap->a_p->p_session->s_ttyvp) { 563 vrele(vp); 564 ap->a_p->p_session->s_ttyvp = NULL; 565 } 566 /* 567 * If the vnode is locked, then we are in the midst 568 * of forcably closing the device, otherwise we only 569 * close on last reference. 570 */ 571 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 572 return (0); 573 devclose = cdevsw[major(dev)].d_close; 574 mode = S_IFCHR; 575 break; 576 577 case VBLK: 578 /* 579 * On last close of a block device (that isn't mounted) 580 * we must invalidate any in core blocks, so that 581 * we can, for instance, change floppy disks. 582 */ 583 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); 584 if (error) 585 return (error); 586 /* 587 * We do not want to really close the device if it 588 * is still in use unless we are trying to close it 589 * forcibly. Since every use (buffer, vnode, swap, cmap) 590 * holds a reference to the vnode, and because we mark 591 * any other vnodes that alias this device, when the 592 * sum of the reference counts on all the aliased 593 * vnodes descends to one, we are on last close. 594 */ 595 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 596 return (0); 597 devclose = bdevsw[major(dev)].d_close; 598 mode = S_IFBLK; 599 break; 600 601 default: 602 panic("spec_close: not special"); 603 } 604 605 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 606 } 607 608 /* 609 * Print out the contents of a special device vnode. 610 */ 611 int 612 spec_print(v) 613 void *v; 614 { 615 struct vop_print_args /* { 616 struct vnode *a_vp; 617 } */ *ap = v; 618 619 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 620 minor(ap->a_vp->v_rdev)); 621 return 0; 622 } 623 624 /* 625 * Return POSIX pathconf information applicable to special devices. 626 */ 627 int 628 spec_pathconf(v) 629 void *v; 630 { 631 struct vop_pathconf_args /* { 632 struct vnode *a_vp; 633 int a_name; 634 register_t *a_retval; 635 } */ *ap = v; 636 637 switch (ap->a_name) { 638 case _PC_LINK_MAX: 639 *ap->a_retval = LINK_MAX; 640 return (0); 641 case _PC_MAX_CANON: 642 *ap->a_retval = MAX_CANON; 643 return (0); 644 case _PC_MAX_INPUT: 645 *ap->a_retval = MAX_INPUT; 646 return (0); 647 case _PC_PIPE_BUF: 648 *ap->a_retval = PIPE_BUF; 649 return (0); 650 case _PC_CHOWN_RESTRICTED: 651 *ap->a_retval = 1; 652 return (0); 653 case _PC_VDISABLE: 654 *ap->a_retval = _POSIX_VDISABLE; 655 return (0); 656 default: 657 return (EINVAL); 658 } 659 /* NOTREACHED */ 660 } 661 662 /* 663 * Advisory record locking support. 664 */ 665 int 666 spec_advlock(v) 667 void *v; 668 { 669 struct vop_advlock_args /* { 670 struct vnode *a_vp; 671 caddr_t a_id; 672 int a_op; 673 struct flock *a_fl; 674 int a_flags; 675 } */ *ap = v; 676 register struct vnode *vp = ap->a_vp; 677 678 return (lf_advlock(&vp->v_speclockf, (off_t)0, ap->a_id, ap->a_op, 679 ap->a_fl, ap->a_flags)); 680 } 681