1 /* $NetBSD: spec_vnops.c,v 1.121 2009/01/11 02:45:54 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.121 2009/01/11 02:45:54 christos Exp $"); 62 63 #include <sys/param.h> 64 #include <sys/proc.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/buf.h> 69 #include <sys/mount.h> 70 #include <sys/namei.h> 71 #include <sys/vnode.h> 72 #include <sys/stat.h> 73 #include <sys/errno.h> 74 #include <sys/ioctl.h> 75 #include <sys/poll.h> 76 #include <sys/file.h> 77 #include <sys/disklabel.h> 78 #include <sys/lockf.h> 79 #include <sys/tty.h> 80 #include <sys/kauth.h> 81 #include <sys/fstrans.h> 82 83 #include <miscfs/genfs/genfs.h> 84 #include <miscfs/specfs/specdev.h> 85 86 /* symbolic sleep message strings for devices */ 87 const char devopn[] = "devopn"; 88 const char devio[] = "devio"; 89 const char devwait[] = "devwait"; 90 const char devin[] = "devin"; 91 const char devout[] = "devout"; 92 const char devioc[] = "devioc"; 93 const char devcls[] = "devcls"; 94 95 vnode_t *specfs_hash[SPECHSZ]; 96 97 /* 98 * This vnode operations vector is used for special device nodes 99 * created from whole cloth by the kernel. For the ops vector for 100 * vnodes built from special devices found in a filesystem, see (e.g) 101 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other 102 * filesystems. 103 */ 104 105 int (**spec_vnodeop_p)(void *); 106 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 107 { &vop_default_desc, vn_default_error }, 108 { &vop_lookup_desc, spec_lookup }, /* lookup */ 109 { &vop_create_desc, spec_create }, /* create */ 110 { &vop_mknod_desc, spec_mknod }, /* mknod */ 111 { &vop_open_desc, spec_open }, /* open */ 112 { &vop_close_desc, spec_close }, /* close */ 113 { &vop_access_desc, spec_access }, /* access */ 114 { &vop_getattr_desc, spec_getattr }, /* getattr */ 115 { &vop_setattr_desc, spec_setattr }, /* setattr */ 116 { &vop_read_desc, spec_read }, /* read */ 117 { &vop_write_desc, spec_write }, /* write */ 118 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */ 119 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 120 { &vop_poll_desc, spec_poll }, /* poll */ 121 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 122 { &vop_revoke_desc, spec_revoke }, /* revoke */ 123 { &vop_mmap_desc, spec_mmap }, /* mmap */ 124 { &vop_fsync_desc, spec_fsync }, /* fsync */ 125 { &vop_seek_desc, spec_seek }, /* seek */ 126 { &vop_remove_desc, spec_remove }, /* remove */ 127 { &vop_link_desc, spec_link }, /* link */ 128 { &vop_rename_desc, spec_rename }, /* rename */ 129 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 130 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 131 { &vop_symlink_desc, spec_symlink }, /* symlink */ 132 { &vop_readdir_desc, spec_readdir }, /* readdir */ 133 { &vop_readlink_desc, spec_readlink }, /* readlink */ 134 { &vop_abortop_desc, spec_abortop }, /* abortop */ 135 { &vop_inactive_desc, spec_inactive }, /* inactive */ 136 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 137 { &vop_lock_desc, spec_lock }, /* lock */ 138 { &vop_unlock_desc, spec_unlock }, /* unlock */ 139 { &vop_bmap_desc, spec_bmap }, /* bmap */ 140 { &vop_strategy_desc, spec_strategy }, /* strategy */ 141 { &vop_print_desc, spec_print }, /* print */ 142 { &vop_islocked_desc, spec_islocked }, /* islocked */ 143 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 144 { &vop_advlock_desc, spec_advlock }, /* advlock */ 145 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 146 { &vop_getpages_desc, spec_getpages }, /* getpages */ 147 { &vop_putpages_desc, spec_putpages }, /* putpages */ 148 { NULL, NULL } 149 }; 150 const struct vnodeopv_desc spec_vnodeop_opv_desc = 151 { &spec_vnodeop_p, spec_vnodeop_entries }; 152 153 /* 154 * Returns true if dev is /dev/mem or /dev/kmem. 155 */ 156 int 157 iskmemdev(dev_t dev) 158 { 159 /* mem_no is emitted by config(8) to generated devsw.c */ 160 extern const int mem_no; 161 162 /* minor 14 is /dev/io on i386 with COMPAT_10 */ 163 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14)); 164 } 165 166 /* 167 * Initialize a vnode that represents a device. 168 */ 169 void 170 spec_node_init(vnode_t *vp, dev_t rdev) 171 { 172 specnode_t *sn; 173 specdev_t *sd; 174 vnode_t *vp2; 175 vnode_t **vpp; 176 177 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 178 KASSERT(vp->v_specnode == NULL); 179 180 /* 181 * Search the hash table for this device. If known, add a 182 * reference to the device structure. If not known, create 183 * a new entry to represent the device. In all cases add 184 * the vnode to the hash table. 185 */ 186 sn = kmem_alloc(sizeof(*sn), KM_SLEEP); 187 if (sn == NULL) { 188 /* XXX */ 189 panic("spec_node_init: unable to allocate memory"); 190 } 191 sd = kmem_alloc(sizeof(*sd), KM_SLEEP); 192 if (sd == NULL) { 193 /* XXX */ 194 panic("spec_node_init: unable to allocate memory"); 195 } 196 mutex_enter(&device_lock); 197 vpp = &specfs_hash[SPECHASH(rdev)]; 198 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) { 199 KASSERT(vp2->v_specnode != NULL); 200 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) { 201 break; 202 } 203 } 204 if (vp2 == NULL) { 205 /* No existing record, create a new one. */ 206 sd->sd_rdev = rdev; 207 sd->sd_mountpoint = NULL; 208 sd->sd_lockf = NULL; 209 sd->sd_refcnt = 1; 210 sd->sd_opencnt = 0; 211 sd->sd_bdevvp = NULL; 212 sn->sn_dev = sd; 213 sd = NULL; 214 } else { 215 /* Use the existing record. */ 216 sn->sn_dev = vp2->v_specnode->sn_dev; 217 sn->sn_dev->sd_refcnt++; 218 } 219 /* Insert vnode into the hash chain. */ 220 sn->sn_opencnt = 0; 221 sn->sn_rdev = rdev; 222 sn->sn_gone = false; 223 vp->v_specnode = sn; 224 vp->v_specnext = *vpp; 225 *vpp = vp; 226 mutex_exit(&device_lock); 227 228 /* Free the record we allocated if unused. */ 229 if (sd != NULL) { 230 kmem_free(sd, sizeof(*sd)); 231 } 232 } 233 234 /* 235 * A vnode representing a special device is going away. Close 236 * the device if the vnode holds it open. 237 */ 238 void 239 spec_node_revoke(vnode_t *vp) 240 { 241 specnode_t *sn; 242 specdev_t *sd; 243 244 sn = vp->v_specnode; 245 sd = sn->sn_dev; 246 247 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 248 KASSERT(vp->v_specnode != NULL); 249 KASSERT((vp->v_iflag & VI_XLOCK) != 0); 250 KASSERT(sn->sn_gone == false); 251 252 mutex_enter(&device_lock); 253 KASSERT(sn->sn_opencnt <= sd->sd_opencnt); 254 if (sn->sn_opencnt != 0) { 255 sd->sd_opencnt -= (sn->sn_opencnt - 1); 256 sn->sn_opencnt = 1; 257 sn->sn_gone = true; 258 mutex_exit(&device_lock); 259 260 VOP_CLOSE(vp, FNONBLOCK, NOCRED); 261 262 mutex_enter(&device_lock); 263 KASSERT(sn->sn_opencnt == 0); 264 } 265 mutex_exit(&device_lock); 266 } 267 268 /* 269 * A vnode representing a special device is being recycled. 270 * Destroy the specfs component. 271 */ 272 void 273 spec_node_destroy(vnode_t *vp) 274 { 275 specnode_t *sn; 276 specdev_t *sd; 277 vnode_t **vpp, *vp2; 278 int refcnt; 279 280 sn = vp->v_specnode; 281 sd = sn->sn_dev; 282 283 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 284 KASSERT(vp->v_specnode != NULL); 285 KASSERT(sn->sn_opencnt == 0); 286 287 mutex_enter(&device_lock); 288 /* Remove from the hash and destroy the node. */ 289 vpp = &specfs_hash[SPECHASH(vp->v_rdev)]; 290 for (vp2 = *vpp;; vp2 = vp2->v_specnext) { 291 if (vp2 == NULL) { 292 panic("spec_node_destroy: corrupt hash"); 293 } 294 if (vp2 == vp) { 295 KASSERT(vp == *vpp); 296 *vpp = vp->v_specnext; 297 break; 298 } 299 if (vp2->v_specnext == vp) { 300 vp2->v_specnext = vp->v_specnext; 301 break; 302 } 303 } 304 sn = vp->v_specnode; 305 vp->v_specnode = NULL; 306 refcnt = sd->sd_refcnt--; 307 KASSERT(refcnt > 0); 308 mutex_exit(&device_lock); 309 310 /* If the device is no longer in use, destroy our record. */ 311 if (refcnt == 1) { 312 KASSERT(sd->sd_opencnt == 0); 313 KASSERT(sd->sd_bdevvp == NULL); 314 kmem_free(sd, sizeof(*sd)); 315 } 316 kmem_free(sn, sizeof(*sn)); 317 } 318 319 /* 320 * Trivial lookup routine that always fails. 321 */ 322 int 323 spec_lookup(void *v) 324 { 325 struct vop_lookup_args /* { 326 struct vnode *a_dvp; 327 struct vnode **a_vpp; 328 struct componentname *a_cnp; 329 } */ *ap = v; 330 331 *ap->a_vpp = NULL; 332 return (ENOTDIR); 333 } 334 335 /* 336 * Open a special file. 337 */ 338 /* ARGSUSED */ 339 int 340 spec_open(void *v) 341 { 342 struct vop_open_args /* { 343 struct vnode *a_vp; 344 int a_mode; 345 kauth_cred_t a_cred; 346 } */ *ap = v; 347 struct lwp *l; 348 struct vnode *vp; 349 dev_t dev; 350 int error; 351 struct partinfo pi; 352 enum kauth_device_req req; 353 specnode_t *sn; 354 specdev_t *sd; 355 356 l = curlwp; 357 vp = ap->a_vp; 358 dev = vp->v_rdev; 359 sn = vp->v_specnode; 360 sd = sn->sn_dev; 361 362 /* 363 * Don't allow open if fs is mounted -nodev. 364 */ 365 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 366 return (ENXIO); 367 368 switch (ap->a_mode & (FREAD | FWRITE)) { 369 case FREAD | FWRITE: 370 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW; 371 break; 372 case FWRITE: 373 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE; 374 break; 375 default: 376 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ; 377 break; 378 } 379 380 switch (vp->v_type) { 381 case VCHR: 382 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 383 if (error != 0) 384 return (error); 385 386 /* 387 * Character devices can accept opens from multiple 388 * vnodes. 389 */ 390 mutex_enter(&device_lock); 391 if (sn->sn_gone) { 392 mutex_exit(&device_lock); 393 return (EBADF); 394 } 395 sd->sd_opencnt++; 396 sn->sn_opencnt++; 397 mutex_exit(&device_lock); 398 if (cdev_type(dev) == D_TTY) 399 vp->v_vflag |= VV_ISTTY; 400 VOP_UNLOCK(vp, 0); 401 error = cdev_open(dev, ap->a_mode, S_IFCHR, l); 402 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 403 break; 404 405 case VBLK: 406 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 407 if (error != 0) 408 return (error); 409 410 /* 411 * For block devices, permit only one open. The buffer 412 * cache cannot remain self-consistent with multiple 413 * vnodes holding a block device open. 414 */ 415 mutex_enter(&device_lock); 416 if (sn->sn_gone) { 417 mutex_exit(&device_lock); 418 return (EBADF); 419 } 420 if (sd->sd_opencnt != 0) { 421 mutex_exit(&device_lock); 422 return EBUSY; 423 } 424 sn->sn_opencnt = 1; 425 sd->sd_opencnt = 1; 426 sd->sd_bdevvp = vp; 427 mutex_exit(&device_lock); 428 429 error = bdev_open(dev, ap->a_mode, S_IFBLK, l); 430 break; 431 432 case VNON: 433 case VLNK: 434 case VDIR: 435 case VREG: 436 case VBAD: 437 case VFIFO: 438 case VSOCK: 439 default: 440 return 0; 441 } 442 443 mutex_enter(&device_lock); 444 if (sn->sn_gone) { 445 if (error == 0) 446 error = EBADF; 447 } else if (error != 0) { 448 sd->sd_opencnt--; 449 sn->sn_opencnt--; 450 if (vp->v_type == VBLK) 451 sd->sd_bdevvp = NULL; 452 453 } 454 mutex_exit(&device_lock); 455 456 if (cdev_type(dev) != D_DISK || error != 0) 457 return error; 458 459 if (vp->v_type == VCHR) 460 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 461 else 462 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 463 if (error == 0) 464 uvm_vnp_setsize(vp, 465 (voff_t)pi.disklab->d_secsize * pi.part->p_size); 466 return 0; 467 } 468 469 /* 470 * Vnode op for read 471 */ 472 /* ARGSUSED */ 473 int 474 spec_read(void *v) 475 { 476 struct vop_read_args /* { 477 struct vnode *a_vp; 478 struct uio *a_uio; 479 int a_ioflag; 480 kauth_cred_t a_cred; 481 } */ *ap = v; 482 struct vnode *vp = ap->a_vp; 483 struct uio *uio = ap->a_uio; 484 struct lwp *l = curlwp; 485 struct buf *bp; 486 daddr_t bn; 487 int bsize, bscale; 488 struct partinfo dpart; 489 int n, on; 490 int error = 0; 491 492 #ifdef DIAGNOSTIC 493 if (uio->uio_rw != UIO_READ) 494 panic("spec_read mode"); 495 if (&uio->uio_vmspace->vm_map != kernel_map && 496 uio->uio_vmspace != curproc->p_vmspace) 497 panic("spec_read proc"); 498 #endif 499 if (uio->uio_resid == 0) 500 return (0); 501 502 switch (vp->v_type) { 503 504 case VCHR: 505 VOP_UNLOCK(vp, 0); 506 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag); 507 vn_lock(vp, LK_SHARED | LK_RETRY); 508 return (error); 509 510 case VBLK: 511 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 512 if (uio->uio_offset < 0) 513 return (EINVAL); 514 bsize = BLKDEV_IOSIZE; 515 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 516 if (dpart.part->p_fstype == FS_BSDFFS && 517 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 518 bsize = dpart.part->p_frag * 519 dpart.part->p_fsize; 520 } 521 bscale = bsize >> DEV_BSHIFT; 522 do { 523 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 524 on = uio->uio_offset % bsize; 525 n = min((unsigned)(bsize - on), uio->uio_resid); 526 error = bread(vp, bn, bsize, NOCRED, 0, &bp); 527 n = min(n, bsize - bp->b_resid); 528 if (error) { 529 brelse(bp, 0); 530 return (error); 531 } 532 error = uiomove((char *)bp->b_data + on, n, uio); 533 brelse(bp, 0); 534 } while (error == 0 && uio->uio_resid > 0 && n != 0); 535 return (error); 536 537 default: 538 panic("spec_read type"); 539 } 540 /* NOTREACHED */ 541 } 542 543 /* 544 * Vnode op for write 545 */ 546 /* ARGSUSED */ 547 int 548 spec_write(void *v) 549 { 550 struct vop_write_args /* { 551 struct vnode *a_vp; 552 struct uio *a_uio; 553 int a_ioflag; 554 kauth_cred_t a_cred; 555 } */ *ap = v; 556 struct vnode *vp = ap->a_vp; 557 struct uio *uio = ap->a_uio; 558 struct lwp *l = curlwp; 559 struct buf *bp; 560 daddr_t bn; 561 int bsize, bscale; 562 struct partinfo dpart; 563 int n, on; 564 int error = 0; 565 566 #ifdef DIAGNOSTIC 567 if (uio->uio_rw != UIO_WRITE) 568 panic("spec_write mode"); 569 if (&uio->uio_vmspace->vm_map != kernel_map && 570 uio->uio_vmspace != curproc->p_vmspace) 571 panic("spec_write proc"); 572 #endif 573 574 switch (vp->v_type) { 575 576 case VCHR: 577 VOP_UNLOCK(vp, 0); 578 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag); 579 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 580 return (error); 581 582 case VBLK: 583 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 584 if (uio->uio_resid == 0) 585 return (0); 586 if (uio->uio_offset < 0) 587 return (EINVAL); 588 bsize = BLKDEV_IOSIZE; 589 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 590 if (dpart.part->p_fstype == FS_BSDFFS && 591 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 592 bsize = dpart.part->p_frag * 593 dpart.part->p_fsize; 594 } 595 bscale = bsize >> DEV_BSHIFT; 596 do { 597 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 598 on = uio->uio_offset % bsize; 599 n = min((unsigned)(bsize - on), uio->uio_resid); 600 if (n == bsize) 601 bp = getblk(vp, bn, bsize, 0, 0); 602 else 603 error = bread(vp, bn, bsize, NOCRED, 604 B_MODIFY, &bp); 605 if (error) { 606 brelse(bp, 0); 607 return (error); 608 } 609 n = min(n, bsize - bp->b_resid); 610 error = uiomove((char *)bp->b_data + on, n, uio); 611 if (error) 612 brelse(bp, 0); 613 else { 614 if (n + on == bsize) 615 bawrite(bp); 616 else 617 bdwrite(bp); 618 error = bp->b_error; 619 } 620 } while (error == 0 && uio->uio_resid > 0 && n != 0); 621 return (error); 622 623 default: 624 panic("spec_write type"); 625 } 626 /* NOTREACHED */ 627 } 628 629 /* 630 * Device ioctl operation. 631 */ 632 /* ARGSUSED */ 633 int 634 spec_ioctl(void *v) 635 { 636 struct vop_ioctl_args /* { 637 struct vnode *a_vp; 638 u_long a_command; 639 void *a_data; 640 int a_fflag; 641 kauth_cred_t a_cred; 642 } */ *ap = v; 643 struct vnode *vp; 644 dev_t dev; 645 646 /* 647 * Extract all the info we need from the vnode, taking care to 648 * avoid a race with VOP_REVOKE(). 649 */ 650 651 vp = ap->a_vp; 652 dev = NODEV; 653 mutex_enter(&vp->v_interlock); 654 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 655 dev = vp->v_rdev; 656 } 657 mutex_exit(&vp->v_interlock); 658 if (dev == NODEV) { 659 return ENXIO; 660 } 661 662 switch (vp->v_type) { 663 664 case VCHR: 665 return cdev_ioctl(dev, ap->a_command, ap->a_data, 666 ap->a_fflag, curlwp); 667 668 case VBLK: 669 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 670 return bdev_ioctl(dev, ap->a_command, ap->a_data, 671 ap->a_fflag, curlwp); 672 673 default: 674 panic("spec_ioctl"); 675 /* NOTREACHED */ 676 } 677 } 678 679 /* ARGSUSED */ 680 int 681 spec_poll(void *v) 682 { 683 struct vop_poll_args /* { 684 struct vnode *a_vp; 685 int a_events; 686 } */ *ap = v; 687 struct vnode *vp; 688 dev_t dev; 689 690 /* 691 * Extract all the info we need from the vnode, taking care to 692 * avoid a race with VOP_REVOKE(). 693 */ 694 695 vp = ap->a_vp; 696 dev = NODEV; 697 mutex_enter(&vp->v_interlock); 698 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 699 dev = vp->v_rdev; 700 } 701 mutex_exit(&vp->v_interlock); 702 if (dev == NODEV) { 703 return POLLERR; 704 } 705 706 switch (vp->v_type) { 707 708 case VCHR: 709 return cdev_poll(dev, ap->a_events, curlwp); 710 711 default: 712 return (genfs_poll(v)); 713 } 714 } 715 716 /* ARGSUSED */ 717 int 718 spec_kqfilter(void *v) 719 { 720 struct vop_kqfilter_args /* { 721 struct vnode *a_vp; 722 struct proc *a_kn; 723 } */ *ap = v; 724 dev_t dev; 725 726 switch (ap->a_vp->v_type) { 727 728 case VCHR: 729 dev = ap->a_vp->v_rdev; 730 return cdev_kqfilter(dev, ap->a_kn); 731 default: 732 /* 733 * Block devices don't support kqfilter, and refuse it 734 * for any other files (like those vflush()ed) too. 735 */ 736 return (EOPNOTSUPP); 737 } 738 } 739 740 /* 741 * Allow mapping of only D_DISK. This is called only for VBLK. 742 */ 743 int 744 spec_mmap(void *v) 745 { 746 struct vop_mmap_args /* { 747 struct vnode *a_vp; 748 vm_prot_t a_prot; 749 kauth_cred_t a_cred; 750 } */ *ap = v; 751 struct vnode *vp = ap->a_vp; 752 753 KASSERT(vp->v_type == VBLK); 754 if (bdev_type(vp->v_rdev) != D_DISK) 755 return EINVAL; 756 757 return 0; 758 } 759 760 /* 761 * Synch buffers associated with a block device 762 */ 763 /* ARGSUSED */ 764 int 765 spec_fsync(void *v) 766 { 767 struct vop_fsync_args /* { 768 struct vnode *a_vp; 769 kauth_cred_t a_cred; 770 int a_flags; 771 off_t offlo; 772 off_t offhi; 773 } */ *ap = v; 774 struct vnode *vp = ap->a_vp; 775 struct mount *mp; 776 int error; 777 778 if (vp->v_type == VBLK) { 779 if ((mp = vp->v_specmountpoint) != NULL) { 780 error = VFS_FSYNC(mp, vp, ap->a_flags | FSYNC_VFS); 781 if (error != EOPNOTSUPP) 782 return error; 783 } 784 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0); 785 } 786 return (0); 787 } 788 789 /* 790 * Just call the device strategy routine 791 */ 792 int 793 spec_strategy(void *v) 794 { 795 struct vop_strategy_args /* { 796 struct vnode *a_vp; 797 struct buf *a_bp; 798 } */ *ap = v; 799 struct vnode *vp = ap->a_vp; 800 struct buf *bp = ap->a_bp; 801 int error; 802 803 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 804 805 error = 0; 806 bp->b_dev = vp->v_rdev; 807 if (!(bp->b_flags & B_READ) && 808 (LIST_FIRST(&bp->b_dep)) != NULL && bioopsp) 809 bioopsp->io_start(bp); 810 811 if (!(bp->b_flags & B_READ)) 812 error = fscow_run(bp, false); 813 814 if (error) { 815 bp->b_error = error; 816 biodone(bp); 817 return (error); 818 } 819 820 bdev_strategy(bp); 821 822 return (0); 823 } 824 825 int 826 spec_inactive(void *v) 827 { 828 struct vop_inactive_args /* { 829 struct vnode *a_vp; 830 struct proc *a_l; 831 } */ *ap = v; 832 833 VOP_UNLOCK(ap->a_vp, 0); 834 return (0); 835 } 836 837 /* 838 * This is a noop, simply returning what one has been given. 839 */ 840 int 841 spec_bmap(void *v) 842 { 843 struct vop_bmap_args /* { 844 struct vnode *a_vp; 845 daddr_t a_bn; 846 struct vnode **a_vpp; 847 daddr_t *a_bnp; 848 int *a_runp; 849 } */ *ap = v; 850 851 if (ap->a_vpp != NULL) 852 *ap->a_vpp = ap->a_vp; 853 if (ap->a_bnp != NULL) 854 *ap->a_bnp = ap->a_bn; 855 if (ap->a_runp != NULL) 856 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1; 857 return (0); 858 } 859 860 /* 861 * Device close routine 862 */ 863 /* ARGSUSED */ 864 int 865 spec_close(void *v) 866 { 867 struct vop_close_args /* { 868 struct vnode *a_vp; 869 int a_fflag; 870 kauth_cred_t a_cred; 871 } */ *ap = v; 872 struct vnode *vp = ap->a_vp; 873 struct session *sess; 874 dev_t dev = vp->v_rdev; 875 int mode, error, flags, flags1, count; 876 specnode_t *sn; 877 specdev_t *sd; 878 879 flags = vp->v_iflag; 880 sn = vp->v_specnode; 881 sd = sn->sn_dev; 882 883 switch (vp->v_type) { 884 885 case VCHR: 886 /* 887 * Hack: a tty device that is a controlling terminal 888 * has a reference from the session structure. We 889 * cannot easily tell that a character device is a 890 * controlling terminal, unless it is the closing 891 * process' controlling terminal. In that case, if the 892 * open count is 1 release the reference from the 893 * session. Also, remove the link from the tty back to 894 * the session and pgrp. 895 * 896 * XXX V. fishy. 897 */ 898 mutex_enter(proc_lock); 899 sess = curlwp->l_proc->p_session; 900 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) { 901 mutex_spin_enter(&tty_lock); 902 sess->s_ttyvp = NULL; 903 if (sess->s_ttyp->t_session != NULL) { 904 sess->s_ttyp->t_pgrp = NULL; 905 sess->s_ttyp->t_session = NULL; 906 mutex_spin_exit(&tty_lock); 907 SESSRELE(sess); 908 mutex_exit(proc_lock); 909 } else { 910 mutex_spin_exit(&tty_lock); 911 if (sess->s_ttyp->t_pgrp != NULL) 912 panic("spec_close: spurious pgrp ref"); 913 mutex_exit(proc_lock); 914 } 915 vrele(vp); 916 } else 917 mutex_exit(proc_lock); 918 919 /* 920 * If the vnode is locked, then we are in the midst 921 * of forcably closing the device, otherwise we only 922 * close on last reference. 923 */ 924 mode = S_IFCHR; 925 break; 926 927 case VBLK: 928 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 929 /* 930 * On last close of a block device (that isn't mounted) 931 * we must invalidate any in core blocks, so that 932 * we can, for instance, change floppy disks. 933 */ 934 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0); 935 if (error) 936 return (error); 937 /* 938 * We do not want to really close the device if it 939 * is still in use unless we are trying to close it 940 * forcibly. Since every use (buffer, vnode, swap, cmap) 941 * holds a reference to the vnode, and because we mark 942 * any other vnodes that alias this device, when the 943 * sum of the reference counts on all the aliased 944 * vnodes descends to one, we are on last close. 945 */ 946 mode = S_IFBLK; 947 break; 948 949 default: 950 panic("spec_close: not special"); 951 } 952 953 mutex_enter(&device_lock); 954 sn->sn_opencnt--; 955 count = --sd->sd_opencnt; 956 if (vp->v_type == VBLK) 957 sd->sd_bdevvp = NULL; 958 mutex_exit(&device_lock); 959 960 if (count != 0) 961 return 0; 962 963 flags1 = ap->a_fflag; 964 965 /* 966 * if VI_XLOCK is set, then we're going away soon, so make this 967 * non-blocking. Also ensures that we won't wedge in vn_lock below. 968 */ 969 if (flags & VI_XLOCK) 970 flags1 |= FNONBLOCK; 971 972 /* 973 * If we're able to block, release the vnode lock & reacquire. We 974 * might end up sleeping for someone else who wants our queues. They 975 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is 976 * set, don't release the lock as we won't be able to regain it. 977 */ 978 if (!(flags1 & FNONBLOCK)) 979 VOP_UNLOCK(vp, 0); 980 981 if (vp->v_type == VBLK) 982 error = bdev_close(dev, flags1, mode, curlwp); 983 else 984 error = cdev_close(dev, flags1, mode, curlwp); 985 986 if (!(flags1 & FNONBLOCK)) 987 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 988 989 return (error); 990 } 991 992 /* 993 * Print out the contents of a special device vnode. 994 */ 995 int 996 spec_print(void *v) 997 { 998 struct vop_print_args /* { 999 struct vnode *a_vp; 1000 } */ *ap = v; 1001 1002 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev), 1003 (unsigned long long)minor(ap->a_vp->v_rdev)); 1004 return 0; 1005 } 1006 1007 /* 1008 * Return POSIX pathconf information applicable to special devices. 1009 */ 1010 int 1011 spec_pathconf(void *v) 1012 { 1013 struct vop_pathconf_args /* { 1014 struct vnode *a_vp; 1015 int a_name; 1016 register_t *a_retval; 1017 } */ *ap = v; 1018 1019 switch (ap->a_name) { 1020 case _PC_LINK_MAX: 1021 *ap->a_retval = LINK_MAX; 1022 return (0); 1023 case _PC_MAX_CANON: 1024 *ap->a_retval = MAX_CANON; 1025 return (0); 1026 case _PC_MAX_INPUT: 1027 *ap->a_retval = MAX_INPUT; 1028 return (0); 1029 case _PC_PIPE_BUF: 1030 *ap->a_retval = PIPE_BUF; 1031 return (0); 1032 case _PC_CHOWN_RESTRICTED: 1033 *ap->a_retval = 1; 1034 return (0); 1035 case _PC_VDISABLE: 1036 *ap->a_retval = _POSIX_VDISABLE; 1037 return (0); 1038 case _PC_SYNC_IO: 1039 *ap->a_retval = 1; 1040 return (0); 1041 default: 1042 return (EINVAL); 1043 } 1044 /* NOTREACHED */ 1045 } 1046 1047 /* 1048 * Advisory record locking support. 1049 */ 1050 int 1051 spec_advlock(void *v) 1052 { 1053 struct vop_advlock_args /* { 1054 struct vnode *a_vp; 1055 void *a_id; 1056 int a_op; 1057 struct flock *a_fl; 1058 int a_flags; 1059 } */ *ap = v; 1060 struct vnode *vp = ap->a_vp; 1061 1062 return lf_advlock(ap, &vp->v_speclockf, (off_t)0); 1063 } 1064