1 /* $NetBSD: spec_vnops.c,v 1.126 2009/10/06 04:28:10 elad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.126 2009/10/06 04:28:10 elad Exp $"); 62 63 #include <sys/param.h> 64 #include <sys/proc.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/buf.h> 69 #include <sys/mount.h> 70 #include <sys/namei.h> 71 #include <sys/vnode.h> 72 #include <sys/stat.h> 73 #include <sys/errno.h> 74 #include <sys/ioctl.h> 75 #include <sys/poll.h> 76 #include <sys/file.h> 77 #include <sys/disklabel.h> 78 #include <sys/lockf.h> 79 #include <sys/tty.h> 80 #include <sys/kauth.h> 81 #include <sys/fstrans.h> 82 #include <sys/module.h> 83 84 #include <miscfs/genfs/genfs.h> 85 #include <miscfs/specfs/specdev.h> 86 87 /* symbolic sleep message strings for devices */ 88 const char devopn[] = "devopn"; 89 const char devio[] = "devio"; 90 const char devwait[] = "devwait"; 91 const char devin[] = "devin"; 92 const char devout[] = "devout"; 93 const char devioc[] = "devioc"; 94 const char devcls[] = "devcls"; 95 96 vnode_t *specfs_hash[SPECHSZ]; 97 98 /* 99 * This vnode operations vector is used for special device nodes 100 * created from whole cloth by the kernel. For the ops vector for 101 * vnodes built from special devices found in a filesystem, see (e.g) 102 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other 103 * filesystems. 104 */ 105 106 int (**spec_vnodeop_p)(void *); 107 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 108 { &vop_default_desc, vn_default_error }, 109 { &vop_lookup_desc, spec_lookup }, /* lookup */ 110 { &vop_create_desc, spec_create }, /* create */ 111 { &vop_mknod_desc, spec_mknod }, /* mknod */ 112 { &vop_open_desc, spec_open }, /* open */ 113 { &vop_close_desc, spec_close }, /* close */ 114 { &vop_access_desc, spec_access }, /* access */ 115 { &vop_getattr_desc, spec_getattr }, /* getattr */ 116 { &vop_setattr_desc, spec_setattr }, /* setattr */ 117 { &vop_read_desc, spec_read }, /* read */ 118 { &vop_write_desc, spec_write }, /* write */ 119 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */ 120 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 121 { &vop_poll_desc, spec_poll }, /* poll */ 122 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 123 { &vop_revoke_desc, spec_revoke }, /* revoke */ 124 { &vop_mmap_desc, spec_mmap }, /* mmap */ 125 { &vop_fsync_desc, spec_fsync }, /* fsync */ 126 { &vop_seek_desc, spec_seek }, /* seek */ 127 { &vop_remove_desc, spec_remove }, /* remove */ 128 { &vop_link_desc, spec_link }, /* link */ 129 { &vop_rename_desc, spec_rename }, /* rename */ 130 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 131 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 132 { &vop_symlink_desc, spec_symlink }, /* symlink */ 133 { &vop_readdir_desc, spec_readdir }, /* readdir */ 134 { &vop_readlink_desc, spec_readlink }, /* readlink */ 135 { &vop_abortop_desc, spec_abortop }, /* abortop */ 136 { &vop_inactive_desc, spec_inactive }, /* inactive */ 137 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 138 { &vop_lock_desc, spec_lock }, /* lock */ 139 { &vop_unlock_desc, spec_unlock }, /* unlock */ 140 { &vop_bmap_desc, spec_bmap }, /* bmap */ 141 { &vop_strategy_desc, spec_strategy }, /* strategy */ 142 { &vop_print_desc, spec_print }, /* print */ 143 { &vop_islocked_desc, spec_islocked }, /* islocked */ 144 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 145 { &vop_advlock_desc, spec_advlock }, /* advlock */ 146 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 147 { &vop_getpages_desc, spec_getpages }, /* getpages */ 148 { &vop_putpages_desc, spec_putpages }, /* putpages */ 149 { NULL, NULL } 150 }; 151 const struct vnodeopv_desc spec_vnodeop_opv_desc = 152 { &spec_vnodeop_p, spec_vnodeop_entries }; 153 154 /* Returns true if vnode is /dev/mem or /dev/kmem. */ 155 bool 156 iskmemvp(struct vnode *vp) 157 { 158 return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev)); 159 } 160 161 /* 162 * Returns true if dev is /dev/mem or /dev/kmem. 163 */ 164 int 165 iskmemdev(dev_t dev) 166 { 167 /* mem_no is emitted by config(8) to generated devsw.c */ 168 extern const int mem_no; 169 170 /* minor 14 is /dev/io on i386 with COMPAT_10 */ 171 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14)); 172 } 173 174 /* 175 * Initialize a vnode that represents a device. 176 */ 177 void 178 spec_node_init(vnode_t *vp, dev_t rdev) 179 { 180 specnode_t *sn; 181 specdev_t *sd; 182 vnode_t *vp2; 183 vnode_t **vpp; 184 185 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 186 KASSERT(vp->v_specnode == NULL); 187 188 /* 189 * Search the hash table for this device. If known, add a 190 * reference to the device structure. If not known, create 191 * a new entry to represent the device. In all cases add 192 * the vnode to the hash table. 193 */ 194 sn = kmem_alloc(sizeof(*sn), KM_SLEEP); 195 if (sn == NULL) { 196 /* XXX */ 197 panic("spec_node_init: unable to allocate memory"); 198 } 199 sd = kmem_alloc(sizeof(*sd), KM_SLEEP); 200 if (sd == NULL) { 201 /* XXX */ 202 panic("spec_node_init: unable to allocate memory"); 203 } 204 mutex_enter(&device_lock); 205 vpp = &specfs_hash[SPECHASH(rdev)]; 206 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) { 207 KASSERT(vp2->v_specnode != NULL); 208 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) { 209 break; 210 } 211 } 212 if (vp2 == NULL) { 213 /* No existing record, create a new one. */ 214 sd->sd_rdev = rdev; 215 sd->sd_mountpoint = NULL; 216 sd->sd_lockf = NULL; 217 sd->sd_refcnt = 1; 218 sd->sd_opencnt = 0; 219 sd->sd_bdevvp = NULL; 220 sn->sn_dev = sd; 221 sd = NULL; 222 } else { 223 /* Use the existing record. */ 224 sn->sn_dev = vp2->v_specnode->sn_dev; 225 sn->sn_dev->sd_refcnt++; 226 } 227 /* Insert vnode into the hash chain. */ 228 sn->sn_opencnt = 0; 229 sn->sn_rdev = rdev; 230 sn->sn_gone = false; 231 vp->v_specnode = sn; 232 vp->v_specnext = *vpp; 233 *vpp = vp; 234 mutex_exit(&device_lock); 235 236 /* Free the record we allocated if unused. */ 237 if (sd != NULL) { 238 kmem_free(sd, sizeof(*sd)); 239 } 240 } 241 242 /* 243 * A vnode representing a special device is going away. Close 244 * the device if the vnode holds it open. 245 */ 246 void 247 spec_node_revoke(vnode_t *vp) 248 { 249 specnode_t *sn; 250 specdev_t *sd; 251 252 sn = vp->v_specnode; 253 sd = sn->sn_dev; 254 255 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 256 KASSERT(vp->v_specnode != NULL); 257 KASSERT((vp->v_iflag & VI_XLOCK) != 0); 258 KASSERT(sn->sn_gone == false); 259 260 mutex_enter(&device_lock); 261 KASSERT(sn->sn_opencnt <= sd->sd_opencnt); 262 if (sn->sn_opencnt != 0) { 263 sd->sd_opencnt -= (sn->sn_opencnt - 1); 264 sn->sn_opencnt = 1; 265 sn->sn_gone = true; 266 mutex_exit(&device_lock); 267 268 VOP_CLOSE(vp, FNONBLOCK, NOCRED); 269 270 mutex_enter(&device_lock); 271 KASSERT(sn->sn_opencnt == 0); 272 } 273 mutex_exit(&device_lock); 274 } 275 276 /* 277 * A vnode representing a special device is being recycled. 278 * Destroy the specfs component. 279 */ 280 void 281 spec_node_destroy(vnode_t *vp) 282 { 283 specnode_t *sn; 284 specdev_t *sd; 285 vnode_t **vpp, *vp2; 286 int refcnt; 287 288 sn = vp->v_specnode; 289 sd = sn->sn_dev; 290 291 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 292 KASSERT(vp->v_specnode != NULL); 293 KASSERT(sn->sn_opencnt == 0); 294 295 mutex_enter(&device_lock); 296 /* Remove from the hash and destroy the node. */ 297 vpp = &specfs_hash[SPECHASH(vp->v_rdev)]; 298 for (vp2 = *vpp;; vp2 = vp2->v_specnext) { 299 if (vp2 == NULL) { 300 panic("spec_node_destroy: corrupt hash"); 301 } 302 if (vp2 == vp) { 303 KASSERT(vp == *vpp); 304 *vpp = vp->v_specnext; 305 break; 306 } 307 if (vp2->v_specnext == vp) { 308 vp2->v_specnext = vp->v_specnext; 309 break; 310 } 311 } 312 sn = vp->v_specnode; 313 vp->v_specnode = NULL; 314 refcnt = sd->sd_refcnt--; 315 KASSERT(refcnt > 0); 316 mutex_exit(&device_lock); 317 318 /* If the device is no longer in use, destroy our record. */ 319 if (refcnt == 1) { 320 KASSERT(sd->sd_opencnt == 0); 321 KASSERT(sd->sd_bdevvp == NULL); 322 kmem_free(sd, sizeof(*sd)); 323 } 324 kmem_free(sn, sizeof(*sn)); 325 } 326 327 /* 328 * Trivial lookup routine that always fails. 329 */ 330 int 331 spec_lookup(void *v) 332 { 333 struct vop_lookup_args /* { 334 struct vnode *a_dvp; 335 struct vnode **a_vpp; 336 struct componentname *a_cnp; 337 } */ *ap = v; 338 339 *ap->a_vpp = NULL; 340 return (ENOTDIR); 341 } 342 343 /* 344 * Open a special file. 345 */ 346 /* ARGSUSED */ 347 int 348 spec_open(void *v) 349 { 350 struct vop_open_args /* { 351 struct vnode *a_vp; 352 int a_mode; 353 kauth_cred_t a_cred; 354 } */ *ap = v; 355 struct lwp *l; 356 struct vnode *vp; 357 dev_t dev; 358 int error; 359 struct partinfo pi; 360 enum kauth_device_req req; 361 specnode_t *sn; 362 specdev_t *sd; 363 364 u_int gen; 365 const char *name; 366 367 l = curlwp; 368 vp = ap->a_vp; 369 dev = vp->v_rdev; 370 sn = vp->v_specnode; 371 sd = sn->sn_dev; 372 name = NULL; 373 gen = 0; 374 375 /* 376 * Don't allow open if fs is mounted -nodev. 377 */ 378 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 379 return (ENXIO); 380 381 switch (ap->a_mode & (FREAD | FWRITE)) { 382 case FREAD | FWRITE: 383 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW; 384 break; 385 case FWRITE: 386 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE; 387 break; 388 default: 389 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ; 390 break; 391 } 392 393 switch (vp->v_type) { 394 case VCHR: 395 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 396 if (error != 0) 397 return (error); 398 399 /* 400 * Character devices can accept opens from multiple 401 * vnodes. 402 */ 403 mutex_enter(&device_lock); 404 if (sn->sn_gone) { 405 mutex_exit(&device_lock); 406 return (EBADF); 407 } 408 sd->sd_opencnt++; 409 sn->sn_opencnt++; 410 mutex_exit(&device_lock); 411 if (cdev_type(dev) == D_TTY) 412 vp->v_vflag |= VV_ISTTY; 413 VOP_UNLOCK(vp, 0); 414 do { 415 const struct cdevsw *cdev; 416 417 gen = module_gen; 418 error = cdev_open(dev, ap->a_mode, S_IFCHR, l); 419 if (error != ENXIO) 420 break; 421 422 /* Check if we already have a valid driver */ 423 mutex_enter(&device_lock); 424 cdev = cdevsw_lookup(dev); 425 mutex_exit(&device_lock); 426 if (cdev != NULL) 427 break; 428 429 /* Get device name from devsw_conv array */ 430 if ((name = cdevsw_getname(major(dev))) == NULL) 431 break; 432 433 /* Try to autoload device module */ 434 mutex_enter(&module_lock); 435 (void) module_autoload(name, MODULE_CLASS_DRIVER); 436 mutex_exit(&module_lock); 437 } while (gen != module_gen); 438 439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 440 break; 441 442 case VBLK: 443 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 444 if (error != 0) 445 return (error); 446 447 /* 448 * For block devices, permit only one open. The buffer 449 * cache cannot remain self-consistent with multiple 450 * vnodes holding a block device open. 451 */ 452 mutex_enter(&device_lock); 453 if (sn->sn_gone) { 454 mutex_exit(&device_lock); 455 return (EBADF); 456 } 457 if (sd->sd_opencnt != 0) { 458 mutex_exit(&device_lock); 459 return EBUSY; 460 } 461 sn->sn_opencnt = 1; 462 sd->sd_opencnt = 1; 463 sd->sd_bdevvp = vp; 464 mutex_exit(&device_lock); 465 do { 466 const struct bdevsw *bdev; 467 468 gen = module_gen; 469 error = bdev_open(dev, ap->a_mode, S_IFBLK, l); 470 if (error != ENXIO) 471 break; 472 473 /* Check if we already have a valid driver */ 474 mutex_enter(&device_lock); 475 bdev = bdevsw_lookup(dev); 476 mutex_exit(&device_lock); 477 if (bdev != NULL) 478 break; 479 480 /* Get device name from devsw_conv array */ 481 if ((name = bdevsw_getname(major(dev))) == NULL) 482 break; 483 484 VOP_UNLOCK(vp, 0); 485 486 /* Try to autoload device module */ 487 mutex_enter(&module_lock); 488 (void) module_autoload(name, MODULE_CLASS_DRIVER); 489 mutex_exit(&module_lock); 490 491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 492 } while (gen != module_gen); 493 494 break; 495 496 case VNON: 497 case VLNK: 498 case VDIR: 499 case VREG: 500 case VBAD: 501 case VFIFO: 502 case VSOCK: 503 default: 504 return 0; 505 } 506 507 mutex_enter(&device_lock); 508 if (sn->sn_gone) { 509 if (error == 0) 510 error = EBADF; 511 } else if (error != 0) { 512 sd->sd_opencnt--; 513 sn->sn_opencnt--; 514 if (vp->v_type == VBLK) 515 sd->sd_bdevvp = NULL; 516 517 } 518 mutex_exit(&device_lock); 519 520 if (cdev_type(dev) != D_DISK || error != 0) 521 return error; 522 523 if (vp->v_type == VCHR) 524 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 525 else 526 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 527 if (error == 0) 528 uvm_vnp_setsize(vp, 529 (voff_t)pi.disklab->d_secsize * pi.part->p_size); 530 return 0; 531 } 532 533 /* 534 * Vnode op for read 535 */ 536 /* ARGSUSED */ 537 int 538 spec_read(void *v) 539 { 540 struct vop_read_args /* { 541 struct vnode *a_vp; 542 struct uio *a_uio; 543 int a_ioflag; 544 kauth_cred_t a_cred; 545 } */ *ap = v; 546 struct vnode *vp = ap->a_vp; 547 struct uio *uio = ap->a_uio; 548 struct lwp *l = curlwp; 549 struct buf *bp; 550 daddr_t bn; 551 int bsize, bscale; 552 struct partinfo dpart; 553 int n, on; 554 int error = 0; 555 556 #ifdef DIAGNOSTIC 557 if (uio->uio_rw != UIO_READ) 558 panic("spec_read mode"); 559 if (&uio->uio_vmspace->vm_map != kernel_map && 560 uio->uio_vmspace != curproc->p_vmspace) 561 panic("spec_read proc"); 562 #endif 563 if (uio->uio_resid == 0) 564 return (0); 565 566 switch (vp->v_type) { 567 568 case VCHR: 569 VOP_UNLOCK(vp, 0); 570 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag); 571 vn_lock(vp, LK_SHARED | LK_RETRY); 572 return (error); 573 574 case VBLK: 575 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 576 if (uio->uio_offset < 0) 577 return (EINVAL); 578 bsize = BLKDEV_IOSIZE; 579 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 580 if (dpart.part->p_fstype == FS_BSDFFS && 581 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 582 bsize = dpart.part->p_frag * 583 dpart.part->p_fsize; 584 } 585 bscale = bsize >> DEV_BSHIFT; 586 do { 587 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 588 on = uio->uio_offset % bsize; 589 n = min((unsigned)(bsize - on), uio->uio_resid); 590 error = bread(vp, bn, bsize, NOCRED, 0, &bp); 591 n = min(n, bsize - bp->b_resid); 592 if (error) { 593 brelse(bp, 0); 594 return (error); 595 } 596 error = uiomove((char *)bp->b_data + on, n, uio); 597 brelse(bp, 0); 598 } while (error == 0 && uio->uio_resid > 0 && n != 0); 599 return (error); 600 601 default: 602 panic("spec_read type"); 603 } 604 /* NOTREACHED */ 605 } 606 607 /* 608 * Vnode op for write 609 */ 610 /* ARGSUSED */ 611 int 612 spec_write(void *v) 613 { 614 struct vop_write_args /* { 615 struct vnode *a_vp; 616 struct uio *a_uio; 617 int a_ioflag; 618 kauth_cred_t a_cred; 619 } */ *ap = v; 620 struct vnode *vp = ap->a_vp; 621 struct uio *uio = ap->a_uio; 622 struct lwp *l = curlwp; 623 struct buf *bp; 624 daddr_t bn; 625 int bsize, bscale; 626 struct partinfo dpart; 627 int n, on; 628 int error = 0; 629 630 #ifdef DIAGNOSTIC 631 if (uio->uio_rw != UIO_WRITE) 632 panic("spec_write mode"); 633 if (&uio->uio_vmspace->vm_map != kernel_map && 634 uio->uio_vmspace != curproc->p_vmspace) 635 panic("spec_write proc"); 636 #endif 637 638 switch (vp->v_type) { 639 640 case VCHR: 641 VOP_UNLOCK(vp, 0); 642 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag); 643 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 644 return (error); 645 646 case VBLK: 647 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 648 if (uio->uio_resid == 0) 649 return (0); 650 if (uio->uio_offset < 0) 651 return (EINVAL); 652 bsize = BLKDEV_IOSIZE; 653 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 654 if (dpart.part->p_fstype == FS_BSDFFS && 655 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 656 bsize = dpart.part->p_frag * 657 dpart.part->p_fsize; 658 } 659 bscale = bsize >> DEV_BSHIFT; 660 do { 661 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 662 on = uio->uio_offset % bsize; 663 n = min((unsigned)(bsize - on), uio->uio_resid); 664 if (n == bsize) 665 bp = getblk(vp, bn, bsize, 0, 0); 666 else 667 error = bread(vp, bn, bsize, NOCRED, 668 B_MODIFY, &bp); 669 if (error) { 670 brelse(bp, 0); 671 return (error); 672 } 673 n = min(n, bsize - bp->b_resid); 674 error = uiomove((char *)bp->b_data + on, n, uio); 675 if (error) 676 brelse(bp, 0); 677 else { 678 if (n + on == bsize) 679 bawrite(bp); 680 else 681 bdwrite(bp); 682 error = bp->b_error; 683 } 684 } while (error == 0 && uio->uio_resid > 0 && n != 0); 685 return (error); 686 687 default: 688 panic("spec_write type"); 689 } 690 /* NOTREACHED */ 691 } 692 693 /* 694 * Device ioctl operation. 695 */ 696 /* ARGSUSED */ 697 int 698 spec_ioctl(void *v) 699 { 700 struct vop_ioctl_args /* { 701 struct vnode *a_vp; 702 u_long a_command; 703 void *a_data; 704 int a_fflag; 705 kauth_cred_t a_cred; 706 } */ *ap = v; 707 struct vnode *vp; 708 dev_t dev; 709 710 /* 711 * Extract all the info we need from the vnode, taking care to 712 * avoid a race with VOP_REVOKE(). 713 */ 714 715 vp = ap->a_vp; 716 dev = NODEV; 717 mutex_enter(&vp->v_interlock); 718 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 719 dev = vp->v_rdev; 720 } 721 mutex_exit(&vp->v_interlock); 722 if (dev == NODEV) { 723 return ENXIO; 724 } 725 726 switch (vp->v_type) { 727 728 case VCHR: 729 return cdev_ioctl(dev, ap->a_command, ap->a_data, 730 ap->a_fflag, curlwp); 731 732 case VBLK: 733 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 734 return bdev_ioctl(dev, ap->a_command, ap->a_data, 735 ap->a_fflag, curlwp); 736 737 default: 738 panic("spec_ioctl"); 739 /* NOTREACHED */ 740 } 741 } 742 743 /* ARGSUSED */ 744 int 745 spec_poll(void *v) 746 { 747 struct vop_poll_args /* { 748 struct vnode *a_vp; 749 int a_events; 750 } */ *ap = v; 751 struct vnode *vp; 752 dev_t dev; 753 754 /* 755 * Extract all the info we need from the vnode, taking care to 756 * avoid a race with VOP_REVOKE(). 757 */ 758 759 vp = ap->a_vp; 760 dev = NODEV; 761 mutex_enter(&vp->v_interlock); 762 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 763 dev = vp->v_rdev; 764 } 765 mutex_exit(&vp->v_interlock); 766 if (dev == NODEV) { 767 return POLLERR; 768 } 769 770 switch (vp->v_type) { 771 772 case VCHR: 773 return cdev_poll(dev, ap->a_events, curlwp); 774 775 default: 776 return (genfs_poll(v)); 777 } 778 } 779 780 /* ARGSUSED */ 781 int 782 spec_kqfilter(void *v) 783 { 784 struct vop_kqfilter_args /* { 785 struct vnode *a_vp; 786 struct proc *a_kn; 787 } */ *ap = v; 788 dev_t dev; 789 790 switch (ap->a_vp->v_type) { 791 792 case VCHR: 793 dev = ap->a_vp->v_rdev; 794 return cdev_kqfilter(dev, ap->a_kn); 795 default: 796 /* 797 * Block devices don't support kqfilter, and refuse it 798 * for any other files (like those vflush()ed) too. 799 */ 800 return (EOPNOTSUPP); 801 } 802 } 803 804 /* 805 * Allow mapping of only D_DISK. This is called only for VBLK. 806 */ 807 int 808 spec_mmap(void *v) 809 { 810 struct vop_mmap_args /* { 811 struct vnode *a_vp; 812 vm_prot_t a_prot; 813 kauth_cred_t a_cred; 814 } */ *ap = v; 815 struct vnode *vp = ap->a_vp; 816 817 KASSERT(vp->v_type == VBLK); 818 if (bdev_type(vp->v_rdev) != D_DISK) 819 return EINVAL; 820 821 return 0; 822 } 823 824 /* 825 * Synch buffers associated with a block device 826 */ 827 /* ARGSUSED */ 828 int 829 spec_fsync(void *v) 830 { 831 struct vop_fsync_args /* { 832 struct vnode *a_vp; 833 kauth_cred_t a_cred; 834 int a_flags; 835 off_t offlo; 836 off_t offhi; 837 } */ *ap = v; 838 struct vnode *vp = ap->a_vp; 839 struct mount *mp; 840 int error; 841 842 if (vp->v_type == VBLK) { 843 if ((mp = vp->v_specmountpoint) != NULL) { 844 error = VFS_FSYNC(mp, vp, ap->a_flags | FSYNC_VFS); 845 if (error != EOPNOTSUPP) 846 return error; 847 } 848 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0); 849 } 850 return (0); 851 } 852 853 /* 854 * Just call the device strategy routine 855 */ 856 int 857 spec_strategy(void *v) 858 { 859 struct vop_strategy_args /* { 860 struct vnode *a_vp; 861 struct buf *a_bp; 862 } */ *ap = v; 863 struct vnode *vp = ap->a_vp; 864 struct buf *bp = ap->a_bp; 865 int error; 866 867 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 868 869 error = 0; 870 bp->b_dev = vp->v_rdev; 871 872 if (!(bp->b_flags & B_READ)) 873 error = fscow_run(bp, false); 874 875 if (error) { 876 bp->b_error = error; 877 biodone(bp); 878 return (error); 879 } 880 881 bdev_strategy(bp); 882 883 return (0); 884 } 885 886 int 887 spec_inactive(void *v) 888 { 889 struct vop_inactive_args /* { 890 struct vnode *a_vp; 891 struct proc *a_l; 892 } */ *ap = v; 893 894 VOP_UNLOCK(ap->a_vp, 0); 895 return (0); 896 } 897 898 /* 899 * This is a noop, simply returning what one has been given. 900 */ 901 int 902 spec_bmap(void *v) 903 { 904 struct vop_bmap_args /* { 905 struct vnode *a_vp; 906 daddr_t a_bn; 907 struct vnode **a_vpp; 908 daddr_t *a_bnp; 909 int *a_runp; 910 } */ *ap = v; 911 912 if (ap->a_vpp != NULL) 913 *ap->a_vpp = ap->a_vp; 914 if (ap->a_bnp != NULL) 915 *ap->a_bnp = ap->a_bn; 916 if (ap->a_runp != NULL) 917 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1; 918 return (0); 919 } 920 921 /* 922 * Device close routine 923 */ 924 /* ARGSUSED */ 925 int 926 spec_close(void *v) 927 { 928 struct vop_close_args /* { 929 struct vnode *a_vp; 930 int a_fflag; 931 kauth_cred_t a_cred; 932 } */ *ap = v; 933 struct vnode *vp = ap->a_vp; 934 struct session *sess; 935 dev_t dev = vp->v_rdev; 936 int mode, error, flags, flags1, count; 937 specnode_t *sn; 938 specdev_t *sd; 939 940 flags = vp->v_iflag; 941 sn = vp->v_specnode; 942 sd = sn->sn_dev; 943 944 switch (vp->v_type) { 945 946 case VCHR: 947 /* 948 * Hack: a tty device that is a controlling terminal 949 * has a reference from the session structure. We 950 * cannot easily tell that a character device is a 951 * controlling terminal, unless it is the closing 952 * process' controlling terminal. In that case, if the 953 * open count is 1 release the reference from the 954 * session. Also, remove the link from the tty back to 955 * the session and pgrp. 956 * 957 * XXX V. fishy. 958 */ 959 mutex_enter(proc_lock); 960 sess = curlwp->l_proc->p_session; 961 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) { 962 mutex_spin_enter(&tty_lock); 963 sess->s_ttyvp = NULL; 964 if (sess->s_ttyp->t_session != NULL) { 965 sess->s_ttyp->t_pgrp = NULL; 966 sess->s_ttyp->t_session = NULL; 967 mutex_spin_exit(&tty_lock); 968 /* Releases proc_lock. */ 969 proc_sessrele(sess); 970 } else { 971 mutex_spin_exit(&tty_lock); 972 if (sess->s_ttyp->t_pgrp != NULL) 973 panic("spec_close: spurious pgrp ref"); 974 mutex_exit(proc_lock); 975 } 976 vrele(vp); 977 } else 978 mutex_exit(proc_lock); 979 980 /* 981 * If the vnode is locked, then we are in the midst 982 * of forcably closing the device, otherwise we only 983 * close on last reference. 984 */ 985 mode = S_IFCHR; 986 break; 987 988 case VBLK: 989 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 990 /* 991 * On last close of a block device (that isn't mounted) 992 * we must invalidate any in core blocks, so that 993 * we can, for instance, change floppy disks. 994 */ 995 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0); 996 if (error) 997 return (error); 998 /* 999 * We do not want to really close the device if it 1000 * is still in use unless we are trying to close it 1001 * forcibly. Since every use (buffer, vnode, swap, cmap) 1002 * holds a reference to the vnode, and because we mark 1003 * any other vnodes that alias this device, when the 1004 * sum of the reference counts on all the aliased 1005 * vnodes descends to one, we are on last close. 1006 */ 1007 mode = S_IFBLK; 1008 break; 1009 1010 default: 1011 panic("spec_close: not special"); 1012 } 1013 1014 mutex_enter(&device_lock); 1015 sn->sn_opencnt--; 1016 count = --sd->sd_opencnt; 1017 if (vp->v_type == VBLK) 1018 sd->sd_bdevvp = NULL; 1019 mutex_exit(&device_lock); 1020 1021 if (count != 0) 1022 return 0; 1023 1024 flags1 = ap->a_fflag; 1025 1026 /* 1027 * if VI_XLOCK is set, then we're going away soon, so make this 1028 * non-blocking. Also ensures that we won't wedge in vn_lock below. 1029 */ 1030 if (flags & VI_XLOCK) 1031 flags1 |= FNONBLOCK; 1032 1033 /* 1034 * If we're able to block, release the vnode lock & reacquire. We 1035 * might end up sleeping for someone else who wants our queues. They 1036 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is 1037 * set, don't release the lock as we won't be able to regain it. 1038 */ 1039 if (!(flags1 & FNONBLOCK)) 1040 VOP_UNLOCK(vp, 0); 1041 1042 if (vp->v_type == VBLK) 1043 error = bdev_close(dev, flags1, mode, curlwp); 1044 else 1045 error = cdev_close(dev, flags1, mode, curlwp); 1046 1047 if (!(flags1 & FNONBLOCK)) 1048 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1049 1050 return (error); 1051 } 1052 1053 /* 1054 * Print out the contents of a special device vnode. 1055 */ 1056 int 1057 spec_print(void *v) 1058 { 1059 struct vop_print_args /* { 1060 struct vnode *a_vp; 1061 } */ *ap = v; 1062 1063 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev), 1064 (unsigned long long)minor(ap->a_vp->v_rdev)); 1065 return 0; 1066 } 1067 1068 /* 1069 * Return POSIX pathconf information applicable to special devices. 1070 */ 1071 int 1072 spec_pathconf(void *v) 1073 { 1074 struct vop_pathconf_args /* { 1075 struct vnode *a_vp; 1076 int a_name; 1077 register_t *a_retval; 1078 } */ *ap = v; 1079 1080 switch (ap->a_name) { 1081 case _PC_LINK_MAX: 1082 *ap->a_retval = LINK_MAX; 1083 return (0); 1084 case _PC_MAX_CANON: 1085 *ap->a_retval = MAX_CANON; 1086 return (0); 1087 case _PC_MAX_INPUT: 1088 *ap->a_retval = MAX_INPUT; 1089 return (0); 1090 case _PC_PIPE_BUF: 1091 *ap->a_retval = PIPE_BUF; 1092 return (0); 1093 case _PC_CHOWN_RESTRICTED: 1094 *ap->a_retval = 1; 1095 return (0); 1096 case _PC_VDISABLE: 1097 *ap->a_retval = _POSIX_VDISABLE; 1098 return (0); 1099 case _PC_SYNC_IO: 1100 *ap->a_retval = 1; 1101 return (0); 1102 default: 1103 return (EINVAL); 1104 } 1105 /* NOTREACHED */ 1106 } 1107 1108 /* 1109 * Advisory record locking support. 1110 */ 1111 int 1112 spec_advlock(void *v) 1113 { 1114 struct vop_advlock_args /* { 1115 struct vnode *a_vp; 1116 void *a_id; 1117 int a_op; 1118 struct flock *a_fl; 1119 int a_flags; 1120 } */ *ap = v; 1121 struct vnode *vp = ap->a_vp; 1122 1123 return lf_advlock(ap, &vp->v_speclockf, (off_t)0); 1124 } 1125