1 /* $NetBSD: spec_vnops.c,v 1.17 1994/07/16 11:40:56 paulus Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94 36 */ 37 38 #include <sys/param.h> 39 #include <sys/proc.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/conf.h> 43 #include <sys/buf.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/stat.h> 48 #include <sys/errno.h> 49 #include <sys/ioctl.h> 50 #include <sys/file.h> 51 #include <sys/disklabel.h> 52 #include <miscfs/specfs/specdev.h> 53 54 /* symbolic sleep message strings for devices */ 55 char devopn[] = "devopn"; 56 char devio[] = "devio"; 57 char devwait[] = "devwait"; 58 char devin[] = "devin"; 59 char devout[] = "devout"; 60 char devioc[] = "devioc"; 61 char devcls[] = "devcls"; 62 63 int (**spec_vnodeop_p)(); 64 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 65 { &vop_default_desc, vn_default_error }, 66 { &vop_lookup_desc, spec_lookup }, /* lookup */ 67 { &vop_create_desc, spec_create }, /* create */ 68 { &vop_mknod_desc, spec_mknod }, /* mknod */ 69 { &vop_open_desc, spec_open }, /* open */ 70 { &vop_close_desc, spec_close }, /* close */ 71 { &vop_access_desc, spec_access }, /* access */ 72 { &vop_getattr_desc, spec_getattr }, /* getattr */ 73 { &vop_setattr_desc, spec_setattr }, /* setattr */ 74 { &vop_read_desc, spec_read }, /* read */ 75 { &vop_write_desc, spec_write }, /* write */ 76 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 77 { &vop_select_desc, spec_select }, /* select */ 78 { &vop_mmap_desc, spec_mmap }, /* mmap */ 79 { &vop_fsync_desc, spec_fsync }, /* fsync */ 80 { &vop_seek_desc, spec_seek }, /* seek */ 81 { &vop_remove_desc, spec_remove }, /* remove */ 82 { &vop_link_desc, spec_link }, /* link */ 83 { &vop_rename_desc, spec_rename }, /* rename */ 84 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 85 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 86 { &vop_symlink_desc, spec_symlink }, /* symlink */ 87 { &vop_readdir_desc, spec_readdir }, /* readdir */ 88 { &vop_readlink_desc, spec_readlink }, /* readlink */ 89 { &vop_abortop_desc, spec_abortop }, /* abortop */ 90 { &vop_inactive_desc, spec_inactive }, /* inactive */ 91 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 92 { &vop_lock_desc, spec_lock }, /* lock */ 93 { &vop_unlock_desc, spec_unlock }, /* unlock */ 94 { &vop_bmap_desc, spec_bmap }, /* bmap */ 95 { &vop_strategy_desc, spec_strategy }, /* strategy */ 96 { &vop_print_desc, spec_print }, /* print */ 97 { &vop_islocked_desc, spec_islocked }, /* islocked */ 98 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 99 { &vop_advlock_desc, spec_advlock }, /* advlock */ 100 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 101 { &vop_valloc_desc, spec_valloc }, /* valloc */ 102 { &vop_vfree_desc, spec_vfree }, /* vfree */ 103 { &vop_truncate_desc, spec_truncate }, /* truncate */ 104 { &vop_update_desc, spec_update }, /* update */ 105 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 106 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 107 }; 108 struct vnodeopv_desc spec_vnodeop_opv_desc = 109 { &spec_vnodeop_p, spec_vnodeop_entries }; 110 111 /* 112 * Trivial lookup routine that always fails. 113 */ 114 int 115 spec_lookup(ap) 116 struct vop_lookup_args /* { 117 struct vnode *a_dvp; 118 struct vnode **a_vpp; 119 struct componentname *a_cnp; 120 } */ *ap; 121 { 122 123 *ap->a_vpp = NULL; 124 return (ENOTDIR); 125 } 126 127 /* 128 * Open a special file. 129 */ 130 /* ARGSUSED */ 131 spec_open(ap) 132 struct vop_open_args /* { 133 struct vnode *a_vp; 134 int a_mode; 135 struct ucred *a_cred; 136 struct proc *a_p; 137 } */ *ap; 138 { 139 struct vnode *bvp, *vp = ap->a_vp; 140 dev_t bdev, dev = (dev_t)vp->v_rdev; 141 register int maj = major(dev); 142 int error; 143 144 /* 145 * Don't allow open if fs is mounted -nodev. 146 */ 147 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 148 return (ENXIO); 149 150 switch (vp->v_type) { 151 152 case VCHR: 153 if ((u_int)maj >= nchrdev) 154 return (ENXIO); 155 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 156 /* 157 * When running in very secure mode, do not allow 158 * opens for writing of any disk character devices. 159 */ 160 if (securelevel >= 2 && isdisk(dev, VCHR)) 161 return (EPERM); 162 /* 163 * When running in secure mode, do not allow opens 164 * for writing of /dev/mem, /dev/kmem, or character 165 * devices whose corresponding block devices are 166 * currently mounted. 167 */ 168 if (securelevel >= 1) { 169 if ((bdev = chrtoblk(dev)) != NODEV && 170 vfinddev(bdev, VBLK, &bvp) && 171 bvp->v_usecount > 0 && 172 (error = vfs_mountedon(bvp))) 173 return (error); 174 if (iskmemdev(dev)) 175 return (EPERM); 176 } 177 } 178 VOP_UNLOCK(vp); 179 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 180 VOP_LOCK(vp); 181 return (error); 182 183 case VBLK: 184 if ((u_int)maj >= nblkdev) 185 return (ENXIO); 186 /* 187 * When running in very secure mode, do not allow 188 * opens for writing of any disk block devices. 189 */ 190 if (securelevel >= 2 && ap->a_cred != FSCRED && 191 (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) 192 return (EPERM); 193 /* 194 * Do not allow opens of block devices that are 195 * currently mounted. 196 */ 197 if (error = vfs_mountedon(vp)) 198 return (error); 199 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 200 } 201 return (0); 202 } 203 204 /* 205 * Vnode op for read 206 */ 207 /* ARGSUSED */ 208 spec_read(ap) 209 struct vop_read_args /* { 210 struct vnode *a_vp; 211 struct uio *a_uio; 212 int a_ioflag; 213 struct ucred *a_cred; 214 } */ *ap; 215 { 216 register struct vnode *vp = ap->a_vp; 217 register struct uio *uio = ap->a_uio; 218 struct proc *p = uio->uio_procp; 219 struct buf *bp; 220 daddr_t bn, nextbn; 221 long bsize, bscale, ssize; 222 struct partinfo dpart; 223 int n, on, majordev, (*ioctl)(); 224 int error = 0; 225 dev_t dev; 226 227 #ifdef DIAGNOSTIC 228 if (uio->uio_rw != UIO_READ) 229 panic("spec_read mode"); 230 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 231 panic("spec_read proc"); 232 #endif 233 if (uio->uio_resid == 0) 234 return (0); 235 236 switch (vp->v_type) { 237 238 case VCHR: 239 VOP_UNLOCK(vp); 240 error = (*cdevsw[major(vp->v_rdev)].d_read) 241 (vp->v_rdev, uio, ap->a_ioflag); 242 VOP_LOCK(vp); 243 return (error); 244 245 case VBLK: 246 if (uio->uio_offset < 0) 247 return (EINVAL); 248 bsize = BLKDEV_IOSIZE; 249 ssize = DEV_BSIZE; 250 dev = vp->v_rdev; 251 if ((majordev = major(dev)) < nblkdev && 252 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 253 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { 254 if (dpart.part->p_fstype == FS_BSDFFS && 255 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 256 bsize = dpart.part->p_frag * 257 dpart.part->p_fsize; 258 if (dpart.disklab->d_secsize != 0) 259 ssize = dpart.disklab->d_secsize; 260 } 261 bscale = bsize / ssize; 262 do { 263 bn = (uio->uio_offset / ssize) &~ (bscale - 1); 264 on = uio->uio_offset % bsize; 265 n = min((unsigned)(bsize - on), uio->uio_resid); 266 if (vp->v_lastr + bscale == bn) { 267 nextbn = bn + bscale; 268 error = breadn(vp, bn, (int)bsize, &nextbn, 269 (int *)&bsize, 1, NOCRED, &bp); 270 } else 271 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 272 vp->v_lastr = bn; 273 n = min(n, bsize - bp->b_resid); 274 if (error) { 275 brelse(bp); 276 return (error); 277 } 278 error = uiomove((char *)bp->b_data + on, n, uio); 279 if (n + on == bsize) 280 bp->b_flags |= B_AGE; 281 brelse(bp); 282 } while (error == 0 && uio->uio_resid > 0 && n != 0); 283 return (error); 284 285 default: 286 panic("spec_read type"); 287 } 288 /* NOTREACHED */ 289 } 290 291 /* 292 * Vnode op for write 293 */ 294 /* ARGSUSED */ 295 spec_write(ap) 296 struct vop_write_args /* { 297 struct vnode *a_vp; 298 struct uio *a_uio; 299 int a_ioflag; 300 struct ucred *a_cred; 301 } */ *ap; 302 { 303 register struct vnode *vp = ap->a_vp; 304 register struct uio *uio = ap->a_uio; 305 struct proc *p = uio->uio_procp; 306 struct buf *bp; 307 daddr_t bn; 308 int bsize, blkmask, ssize; 309 struct partinfo dpart; 310 register int n, on; 311 int error = 0; 312 313 #ifdef DIAGNOSTIC 314 if (uio->uio_rw != UIO_WRITE) 315 panic("spec_write mode"); 316 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 317 panic("spec_write proc"); 318 #endif 319 320 switch (vp->v_type) { 321 322 case VCHR: 323 VOP_UNLOCK(vp); 324 error = (*cdevsw[major(vp->v_rdev)].d_write) 325 (vp->v_rdev, uio, ap->a_ioflag); 326 VOP_LOCK(vp); 327 return (error); 328 329 case VBLK: 330 if (uio->uio_resid == 0) 331 return (0); 332 if (uio->uio_offset < 0) 333 return (EINVAL); 334 bsize = BLKDEV_IOSIZE; 335 ssize = DEV_BSIZE; 336 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 337 (caddr_t)&dpart, FREAD, p) == 0) { 338 if (dpart.part->p_fstype == FS_BSDFFS && 339 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 340 bsize = dpart.part->p_frag * 341 dpart.part->p_fsize; 342 if (dpart.disklab->d_secsize != 0) 343 ssize = dpart.disklab->d_secsize; 344 } 345 blkmask = (bsize / ssize) - 1; 346 do { 347 bn = (uio->uio_offset / ssize) &~ blkmask; 348 on = uio->uio_offset % bsize; 349 n = min((unsigned)(bsize - on), uio->uio_resid); 350 if (n == bsize) 351 bp = getblk(vp, bn, bsize, 0, 0); 352 else 353 error = bread(vp, bn, bsize, NOCRED, &bp); 354 n = min(n, bsize - bp->b_resid); 355 if (error) { 356 brelse(bp); 357 return (error); 358 } 359 error = uiomove((char *)bp->b_data + on, n, uio); 360 if (n + on == bsize) { 361 bp->b_flags |= B_AGE; 362 bawrite(bp); 363 } else 364 bdwrite(bp); 365 } while (error == 0 && uio->uio_resid > 0 && n != 0); 366 return (error); 367 368 default: 369 panic("spec_write type"); 370 } 371 /* NOTREACHED */ 372 } 373 374 /* 375 * Device ioctl operation. 376 */ 377 /* ARGSUSED */ 378 spec_ioctl(ap) 379 struct vop_ioctl_args /* { 380 struct vnode *a_vp; 381 int a_command; 382 caddr_t a_data; 383 int a_fflag; 384 struct ucred *a_cred; 385 struct proc *a_p; 386 } */ *ap; 387 { 388 dev_t dev = ap->a_vp->v_rdev; 389 390 switch (ap->a_vp->v_type) { 391 392 case VCHR: 393 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 394 ap->a_fflag, ap->a_p)); 395 396 case VBLK: 397 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 398 if (bdevsw[major(dev)].d_flags & B_TAPE) 399 return (0); 400 else 401 return (1); 402 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 403 ap->a_fflag, ap->a_p)); 404 405 default: 406 panic("spec_ioctl"); 407 /* NOTREACHED */ 408 } 409 } 410 411 /* ARGSUSED */ 412 spec_select(ap) 413 struct vop_select_args /* { 414 struct vnode *a_vp; 415 int a_which; 416 int a_fflags; 417 struct ucred *a_cred; 418 struct proc *a_p; 419 } */ *ap; 420 { 421 register dev_t dev; 422 423 switch (ap->a_vp->v_type) { 424 425 default: 426 return (1); /* XXX */ 427 428 case VCHR: 429 dev = ap->a_vp->v_rdev; 430 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 431 } 432 } 433 /* 434 * Synch buffers associated with a block device 435 */ 436 /* ARGSUSED */ 437 int 438 spec_fsync(ap) 439 struct vop_fsync_args /* { 440 struct vnode *a_vp; 441 struct ucred *a_cred; 442 int a_waitfor; 443 struct proc *a_p; 444 } */ *ap; 445 { 446 register struct vnode *vp = ap->a_vp; 447 register struct buf *bp; 448 struct buf *nbp; 449 int s; 450 451 if (vp->v_type == VCHR) 452 return (0); 453 /* 454 * Flush all dirty buffers associated with a block device. 455 */ 456 loop: 457 s = splbio(); 458 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 459 nbp = bp->b_vnbufs.le_next; 460 if ((bp->b_flags & B_BUSY)) 461 continue; 462 if ((bp->b_flags & B_DELWRI) == 0) 463 panic("spec_fsync: not dirty"); 464 bremfree(bp); 465 bp->b_flags |= B_BUSY; 466 splx(s); 467 bawrite(bp); 468 goto loop; 469 } 470 if (ap->a_waitfor == MNT_WAIT) { 471 while (vp->v_numoutput) { 472 vp->v_flag |= VBWAIT; 473 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 474 } 475 #ifdef DIAGNOSTIC 476 if (vp->v_dirtyblkhd.lh_first) { 477 vprint("spec_fsync: dirty", vp); 478 goto loop; 479 } 480 #endif 481 } 482 splx(s); 483 return (0); 484 } 485 486 /* 487 * Just call the device strategy routine 488 */ 489 spec_strategy(ap) 490 struct vop_strategy_args /* { 491 struct buf *a_bp; 492 } */ *ap; 493 { 494 495 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 496 return (0); 497 } 498 499 /* 500 * This is a noop, simply returning what one has been given. 501 */ 502 spec_bmap(ap) 503 struct vop_bmap_args /* { 504 struct vnode *a_vp; 505 daddr_t a_bn; 506 struct vnode **a_vpp; 507 daddr_t *a_bnp; 508 } */ *ap; 509 { 510 511 if (ap->a_vpp != NULL) 512 *ap->a_vpp = ap->a_vp; 513 if (ap->a_bnp != NULL) 514 *ap->a_bnp = ap->a_bn; 515 return (0); 516 } 517 518 /* 519 * At the moment we do not do any locking. 520 */ 521 /* ARGSUSED */ 522 spec_lock(ap) 523 struct vop_lock_args /* { 524 struct vnode *a_vp; 525 } */ *ap; 526 { 527 528 return (0); 529 } 530 531 /* ARGSUSED */ 532 spec_unlock(ap) 533 struct vop_unlock_args /* { 534 struct vnode *a_vp; 535 } */ *ap; 536 { 537 538 return (0); 539 } 540 541 /* 542 * Device close routine 543 */ 544 /* ARGSUSED */ 545 spec_close(ap) 546 struct vop_close_args /* { 547 struct vnode *a_vp; 548 int a_fflag; 549 struct ucred *a_cred; 550 struct proc *a_p; 551 } */ *ap; 552 { 553 register struct vnode *vp = ap->a_vp; 554 dev_t dev = vp->v_rdev; 555 int (*devclose) __P((dev_t, int, int, struct proc *)); 556 int mode, error; 557 558 switch (vp->v_type) { 559 560 case VCHR: 561 /* 562 * Hack: a tty device that is a controlling terminal 563 * has a reference from the session structure. 564 * We cannot easily tell that a character device is 565 * a controlling terminal, unless it is the closing 566 * process' controlling terminal. In that case, 567 * if the reference count is 2 (this last descriptor 568 * plus the session), release the reference from the session. 569 */ 570 if (vcount(vp) == 2 && ap->a_p && 571 vp == ap->a_p->p_session->s_ttyvp) { 572 vrele(vp); 573 ap->a_p->p_session->s_ttyvp = NULL; 574 } 575 /* 576 * If the vnode is locked, then we are in the midst 577 * of forcably closing the device, otherwise we only 578 * close on last reference. 579 */ 580 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 581 return (0); 582 devclose = cdevsw[major(dev)].d_close; 583 mode = S_IFCHR; 584 break; 585 586 case VBLK: 587 /* 588 * On last close of a block device (that isn't mounted) 589 * we must invalidate any in core blocks, so that 590 * we can, for instance, change floppy disks. 591 */ 592 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 593 return (error); 594 /* 595 * We do not want to really close the device if it 596 * is still in use unless we are trying to close it 597 * forcibly. Since every use (buffer, vnode, swap, cmap) 598 * holds a reference to the vnode, and because we mark 599 * any other vnodes that alias this device, when the 600 * sum of the reference counts on all the aliased 601 * vnodes descends to one, we are on last close. 602 */ 603 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 604 return (0); 605 devclose = bdevsw[major(dev)].d_close; 606 mode = S_IFBLK; 607 break; 608 609 default: 610 panic("spec_close: not special"); 611 } 612 613 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 614 } 615 616 /* 617 * Print out the contents of a special device vnode. 618 */ 619 spec_print(ap) 620 struct vop_print_args /* { 621 struct vnode *a_vp; 622 } */ *ap; 623 { 624 625 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 626 minor(ap->a_vp->v_rdev)); 627 } 628 629 /* 630 * Return POSIX pathconf information applicable to special devices. 631 */ 632 spec_pathconf(ap) 633 struct vop_pathconf_args /* { 634 struct vnode *a_vp; 635 int a_name; 636 int *a_retval; 637 } */ *ap; 638 { 639 640 switch (ap->a_name) { 641 case _PC_LINK_MAX: 642 *ap->a_retval = LINK_MAX; 643 return (0); 644 case _PC_MAX_CANON: 645 *ap->a_retval = MAX_CANON; 646 return (0); 647 case _PC_MAX_INPUT: 648 *ap->a_retval = MAX_INPUT; 649 return (0); 650 case _PC_PIPE_BUF: 651 *ap->a_retval = PIPE_BUF; 652 return (0); 653 case _PC_CHOWN_RESTRICTED: 654 *ap->a_retval = 1; 655 return (0); 656 case _PC_VDISABLE: 657 *ap->a_retval = _POSIX_VDISABLE; 658 return (0); 659 default: 660 return (EINVAL); 661 } 662 /* NOTREACHED */ 663 } 664 665 /* 666 * Special device advisory byte-level locks. 667 */ 668 /* ARGSUSED */ 669 spec_advlock(ap) 670 struct vop_advlock_args /* { 671 struct vnode *a_vp; 672 caddr_t a_id; 673 int a_op; 674 struct flock *a_fl; 675 int a_flags; 676 } */ *ap; 677 { 678 679 return (EOPNOTSUPP); 680 } 681 682 /* 683 * Special device failed operation 684 */ 685 spec_ebadf() 686 { 687 688 return (EBADF); 689 } 690 691 /* 692 * Special device bad operation 693 */ 694 spec_badop() 695 { 696 697 panic("spec_badop called"); 698 /* NOTREACHED */ 699 } 700