1 /* $NetBSD: spec_vnops.c,v 1.124 2009/04/25 15:06:32 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.124 2009/04/25 15:06:32 rmind Exp $"); 62 63 #include <sys/param.h> 64 #include <sys/proc.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/buf.h> 69 #include <sys/mount.h> 70 #include <sys/namei.h> 71 #include <sys/vnode.h> 72 #include <sys/stat.h> 73 #include <sys/errno.h> 74 #include <sys/ioctl.h> 75 #include <sys/poll.h> 76 #include <sys/file.h> 77 #include <sys/disklabel.h> 78 #include <sys/lockf.h> 79 #include <sys/tty.h> 80 #include <sys/kauth.h> 81 #include <sys/fstrans.h> 82 #include <sys/module.h> 83 84 #include <miscfs/genfs/genfs.h> 85 #include <miscfs/specfs/specdev.h> 86 87 /* symbolic sleep message strings for devices */ 88 const char devopn[] = "devopn"; 89 const char devio[] = "devio"; 90 const char devwait[] = "devwait"; 91 const char devin[] = "devin"; 92 const char devout[] = "devout"; 93 const char devioc[] = "devioc"; 94 const char devcls[] = "devcls"; 95 96 vnode_t *specfs_hash[SPECHSZ]; 97 98 /* 99 * This vnode operations vector is used for special device nodes 100 * created from whole cloth by the kernel. For the ops vector for 101 * vnodes built from special devices found in a filesystem, see (e.g) 102 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other 103 * filesystems. 104 */ 105 106 int (**spec_vnodeop_p)(void *); 107 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 108 { &vop_default_desc, vn_default_error }, 109 { &vop_lookup_desc, spec_lookup }, /* lookup */ 110 { &vop_create_desc, spec_create }, /* create */ 111 { &vop_mknod_desc, spec_mknod }, /* mknod */ 112 { &vop_open_desc, spec_open }, /* open */ 113 { &vop_close_desc, spec_close }, /* close */ 114 { &vop_access_desc, spec_access }, /* access */ 115 { &vop_getattr_desc, spec_getattr }, /* getattr */ 116 { &vop_setattr_desc, spec_setattr }, /* setattr */ 117 { &vop_read_desc, spec_read }, /* read */ 118 { &vop_write_desc, spec_write }, /* write */ 119 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */ 120 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 121 { &vop_poll_desc, spec_poll }, /* poll */ 122 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 123 { &vop_revoke_desc, spec_revoke }, /* revoke */ 124 { &vop_mmap_desc, spec_mmap }, /* mmap */ 125 { &vop_fsync_desc, spec_fsync }, /* fsync */ 126 { &vop_seek_desc, spec_seek }, /* seek */ 127 { &vop_remove_desc, spec_remove }, /* remove */ 128 { &vop_link_desc, spec_link }, /* link */ 129 { &vop_rename_desc, spec_rename }, /* rename */ 130 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 131 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 132 { &vop_symlink_desc, spec_symlink }, /* symlink */ 133 { &vop_readdir_desc, spec_readdir }, /* readdir */ 134 { &vop_readlink_desc, spec_readlink }, /* readlink */ 135 { &vop_abortop_desc, spec_abortop }, /* abortop */ 136 { &vop_inactive_desc, spec_inactive }, /* inactive */ 137 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 138 { &vop_lock_desc, spec_lock }, /* lock */ 139 { &vop_unlock_desc, spec_unlock }, /* unlock */ 140 { &vop_bmap_desc, spec_bmap }, /* bmap */ 141 { &vop_strategy_desc, spec_strategy }, /* strategy */ 142 { &vop_print_desc, spec_print }, /* print */ 143 { &vop_islocked_desc, spec_islocked }, /* islocked */ 144 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 145 { &vop_advlock_desc, spec_advlock }, /* advlock */ 146 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 147 { &vop_getpages_desc, spec_getpages }, /* getpages */ 148 { &vop_putpages_desc, spec_putpages }, /* putpages */ 149 { NULL, NULL } 150 }; 151 const struct vnodeopv_desc spec_vnodeop_opv_desc = 152 { &spec_vnodeop_p, spec_vnodeop_entries }; 153 154 /* 155 * Returns true if dev is /dev/mem or /dev/kmem. 156 */ 157 int 158 iskmemdev(dev_t dev) 159 { 160 /* mem_no is emitted by config(8) to generated devsw.c */ 161 extern const int mem_no; 162 163 /* minor 14 is /dev/io on i386 with COMPAT_10 */ 164 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14)); 165 } 166 167 /* 168 * Initialize a vnode that represents a device. 169 */ 170 void 171 spec_node_init(vnode_t *vp, dev_t rdev) 172 { 173 specnode_t *sn; 174 specdev_t *sd; 175 vnode_t *vp2; 176 vnode_t **vpp; 177 178 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 179 KASSERT(vp->v_specnode == NULL); 180 181 /* 182 * Search the hash table for this device. If known, add a 183 * reference to the device structure. If not known, create 184 * a new entry to represent the device. In all cases add 185 * the vnode to the hash table. 186 */ 187 sn = kmem_alloc(sizeof(*sn), KM_SLEEP); 188 if (sn == NULL) { 189 /* XXX */ 190 panic("spec_node_init: unable to allocate memory"); 191 } 192 sd = kmem_alloc(sizeof(*sd), KM_SLEEP); 193 if (sd == NULL) { 194 /* XXX */ 195 panic("spec_node_init: unable to allocate memory"); 196 } 197 mutex_enter(&device_lock); 198 vpp = &specfs_hash[SPECHASH(rdev)]; 199 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) { 200 KASSERT(vp2->v_specnode != NULL); 201 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) { 202 break; 203 } 204 } 205 if (vp2 == NULL) { 206 /* No existing record, create a new one. */ 207 sd->sd_rdev = rdev; 208 sd->sd_mountpoint = NULL; 209 sd->sd_lockf = NULL; 210 sd->sd_refcnt = 1; 211 sd->sd_opencnt = 0; 212 sd->sd_bdevvp = NULL; 213 sn->sn_dev = sd; 214 sd = NULL; 215 } else { 216 /* Use the existing record. */ 217 sn->sn_dev = vp2->v_specnode->sn_dev; 218 sn->sn_dev->sd_refcnt++; 219 } 220 /* Insert vnode into the hash chain. */ 221 sn->sn_opencnt = 0; 222 sn->sn_rdev = rdev; 223 sn->sn_gone = false; 224 vp->v_specnode = sn; 225 vp->v_specnext = *vpp; 226 *vpp = vp; 227 mutex_exit(&device_lock); 228 229 /* Free the record we allocated if unused. */ 230 if (sd != NULL) { 231 kmem_free(sd, sizeof(*sd)); 232 } 233 } 234 235 /* 236 * A vnode representing a special device is going away. Close 237 * the device if the vnode holds it open. 238 */ 239 void 240 spec_node_revoke(vnode_t *vp) 241 { 242 specnode_t *sn; 243 specdev_t *sd; 244 245 sn = vp->v_specnode; 246 sd = sn->sn_dev; 247 248 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 249 KASSERT(vp->v_specnode != NULL); 250 KASSERT((vp->v_iflag & VI_XLOCK) != 0); 251 KASSERT(sn->sn_gone == false); 252 253 mutex_enter(&device_lock); 254 KASSERT(sn->sn_opencnt <= sd->sd_opencnt); 255 if (sn->sn_opencnt != 0) { 256 sd->sd_opencnt -= (sn->sn_opencnt - 1); 257 sn->sn_opencnt = 1; 258 sn->sn_gone = true; 259 mutex_exit(&device_lock); 260 261 VOP_CLOSE(vp, FNONBLOCK, NOCRED); 262 263 mutex_enter(&device_lock); 264 KASSERT(sn->sn_opencnt == 0); 265 } 266 mutex_exit(&device_lock); 267 } 268 269 /* 270 * A vnode representing a special device is being recycled. 271 * Destroy the specfs component. 272 */ 273 void 274 spec_node_destroy(vnode_t *vp) 275 { 276 specnode_t *sn; 277 specdev_t *sd; 278 vnode_t **vpp, *vp2; 279 int refcnt; 280 281 sn = vp->v_specnode; 282 sd = sn->sn_dev; 283 284 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 285 KASSERT(vp->v_specnode != NULL); 286 KASSERT(sn->sn_opencnt == 0); 287 288 mutex_enter(&device_lock); 289 /* Remove from the hash and destroy the node. */ 290 vpp = &specfs_hash[SPECHASH(vp->v_rdev)]; 291 for (vp2 = *vpp;; vp2 = vp2->v_specnext) { 292 if (vp2 == NULL) { 293 panic("spec_node_destroy: corrupt hash"); 294 } 295 if (vp2 == vp) { 296 KASSERT(vp == *vpp); 297 *vpp = vp->v_specnext; 298 break; 299 } 300 if (vp2->v_specnext == vp) { 301 vp2->v_specnext = vp->v_specnext; 302 break; 303 } 304 } 305 sn = vp->v_specnode; 306 vp->v_specnode = NULL; 307 refcnt = sd->sd_refcnt--; 308 KASSERT(refcnt > 0); 309 mutex_exit(&device_lock); 310 311 /* If the device is no longer in use, destroy our record. */ 312 if (refcnt == 1) { 313 KASSERT(sd->sd_opencnt == 0); 314 KASSERT(sd->sd_bdevvp == NULL); 315 kmem_free(sd, sizeof(*sd)); 316 } 317 kmem_free(sn, sizeof(*sn)); 318 } 319 320 /* 321 * Trivial lookup routine that always fails. 322 */ 323 int 324 spec_lookup(void *v) 325 { 326 struct vop_lookup_args /* { 327 struct vnode *a_dvp; 328 struct vnode **a_vpp; 329 struct componentname *a_cnp; 330 } */ *ap = v; 331 332 *ap->a_vpp = NULL; 333 return (ENOTDIR); 334 } 335 336 /* 337 * Open a special file. 338 */ 339 /* ARGSUSED */ 340 int 341 spec_open(void *v) 342 { 343 struct vop_open_args /* { 344 struct vnode *a_vp; 345 int a_mode; 346 kauth_cred_t a_cred; 347 } */ *ap = v; 348 struct lwp *l; 349 struct vnode *vp; 350 dev_t dev; 351 int error; 352 struct partinfo pi; 353 enum kauth_device_req req; 354 specnode_t *sn; 355 specdev_t *sd; 356 357 u_int gen; 358 const char *name; 359 360 l = curlwp; 361 vp = ap->a_vp; 362 dev = vp->v_rdev; 363 sn = vp->v_specnode; 364 sd = sn->sn_dev; 365 name = NULL; 366 gen = 0; 367 368 /* 369 * Don't allow open if fs is mounted -nodev. 370 */ 371 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 372 return (ENXIO); 373 374 switch (ap->a_mode & (FREAD | FWRITE)) { 375 case FREAD | FWRITE: 376 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW; 377 break; 378 case FWRITE: 379 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE; 380 break; 381 default: 382 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ; 383 break; 384 } 385 386 switch (vp->v_type) { 387 case VCHR: 388 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 389 if (error != 0) 390 return (error); 391 392 /* 393 * Character devices can accept opens from multiple 394 * vnodes. 395 */ 396 mutex_enter(&device_lock); 397 if (sn->sn_gone) { 398 mutex_exit(&device_lock); 399 return (EBADF); 400 } 401 sd->sd_opencnt++; 402 sn->sn_opencnt++; 403 mutex_exit(&device_lock); 404 if (cdev_type(dev) == D_TTY) 405 vp->v_vflag |= VV_ISTTY; 406 VOP_UNLOCK(vp, 0); 407 do { 408 gen = module_gen; 409 error = cdev_open(dev, ap->a_mode, S_IFCHR, l); 410 if (error != ENXIO) 411 break; 412 413 /* Get device name from devsw_conv array */ 414 if ((name = cdevsw_getname(major(dev))) == NULL) 415 break; 416 417 /* Try to autoload device module */ 418 mutex_enter(&module_lock); 419 (void) module_autoload(name, MODULE_CLASS_DRIVER); 420 mutex_exit(&module_lock); 421 } while (gen != module_gen); 422 423 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 424 break; 425 426 case VBLK: 427 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 428 if (error != 0) 429 return (error); 430 431 /* 432 * For block devices, permit only one open. The buffer 433 * cache cannot remain self-consistent with multiple 434 * vnodes holding a block device open. 435 */ 436 mutex_enter(&device_lock); 437 if (sn->sn_gone) { 438 mutex_exit(&device_lock); 439 return (EBADF); 440 } 441 if (sd->sd_opencnt != 0) { 442 mutex_exit(&device_lock); 443 return EBUSY; 444 } 445 sn->sn_opencnt = 1; 446 sd->sd_opencnt = 1; 447 sd->sd_bdevvp = vp; 448 mutex_exit(&device_lock); 449 do { 450 gen = module_gen; 451 error = bdev_open(dev, ap->a_mode, S_IFBLK, l); 452 if (error != ENXIO) 453 break; 454 455 /* Get device name from devsw_conv array */ 456 if ((name = bdevsw_getname(major(dev))) == NULL) 457 break; 458 459 VOP_UNLOCK(vp, 0); 460 461 /* Try to autoload device module */ 462 mutex_enter(&module_lock); 463 (void) module_autoload(name, MODULE_CLASS_DRIVER); 464 mutex_exit(&module_lock); 465 466 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 467 } while (gen != module_gen); 468 469 break; 470 471 case VNON: 472 case VLNK: 473 case VDIR: 474 case VREG: 475 case VBAD: 476 case VFIFO: 477 case VSOCK: 478 default: 479 return 0; 480 } 481 482 mutex_enter(&device_lock); 483 if (sn->sn_gone) { 484 if (error == 0) 485 error = EBADF; 486 } else if (error != 0) { 487 sd->sd_opencnt--; 488 sn->sn_opencnt--; 489 if (vp->v_type == VBLK) 490 sd->sd_bdevvp = NULL; 491 492 } 493 mutex_exit(&device_lock); 494 495 if (cdev_type(dev) != D_DISK || error != 0) 496 return error; 497 498 if (vp->v_type == VCHR) 499 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 500 else 501 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 502 if (error == 0) 503 uvm_vnp_setsize(vp, 504 (voff_t)pi.disklab->d_secsize * pi.part->p_size); 505 return 0; 506 } 507 508 /* 509 * Vnode op for read 510 */ 511 /* ARGSUSED */ 512 int 513 spec_read(void *v) 514 { 515 struct vop_read_args /* { 516 struct vnode *a_vp; 517 struct uio *a_uio; 518 int a_ioflag; 519 kauth_cred_t a_cred; 520 } */ *ap = v; 521 struct vnode *vp = ap->a_vp; 522 struct uio *uio = ap->a_uio; 523 struct lwp *l = curlwp; 524 struct buf *bp; 525 daddr_t bn; 526 int bsize, bscale; 527 struct partinfo dpart; 528 int n, on; 529 int error = 0; 530 531 #ifdef DIAGNOSTIC 532 if (uio->uio_rw != UIO_READ) 533 panic("spec_read mode"); 534 if (&uio->uio_vmspace->vm_map != kernel_map && 535 uio->uio_vmspace != curproc->p_vmspace) 536 panic("spec_read proc"); 537 #endif 538 if (uio->uio_resid == 0) 539 return (0); 540 541 switch (vp->v_type) { 542 543 case VCHR: 544 VOP_UNLOCK(vp, 0); 545 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag); 546 vn_lock(vp, LK_SHARED | LK_RETRY); 547 return (error); 548 549 case VBLK: 550 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 551 if (uio->uio_offset < 0) 552 return (EINVAL); 553 bsize = BLKDEV_IOSIZE; 554 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 555 if (dpart.part->p_fstype == FS_BSDFFS && 556 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 557 bsize = dpart.part->p_frag * 558 dpart.part->p_fsize; 559 } 560 bscale = bsize >> DEV_BSHIFT; 561 do { 562 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 563 on = uio->uio_offset % bsize; 564 n = min((unsigned)(bsize - on), uio->uio_resid); 565 error = bread(vp, bn, bsize, NOCRED, 0, &bp); 566 n = min(n, bsize - bp->b_resid); 567 if (error) { 568 brelse(bp, 0); 569 return (error); 570 } 571 error = uiomove((char *)bp->b_data + on, n, uio); 572 brelse(bp, 0); 573 } while (error == 0 && uio->uio_resid > 0 && n != 0); 574 return (error); 575 576 default: 577 panic("spec_read type"); 578 } 579 /* NOTREACHED */ 580 } 581 582 /* 583 * Vnode op for write 584 */ 585 /* ARGSUSED */ 586 int 587 spec_write(void *v) 588 { 589 struct vop_write_args /* { 590 struct vnode *a_vp; 591 struct uio *a_uio; 592 int a_ioflag; 593 kauth_cred_t a_cred; 594 } */ *ap = v; 595 struct vnode *vp = ap->a_vp; 596 struct uio *uio = ap->a_uio; 597 struct lwp *l = curlwp; 598 struct buf *bp; 599 daddr_t bn; 600 int bsize, bscale; 601 struct partinfo dpart; 602 int n, on; 603 int error = 0; 604 605 #ifdef DIAGNOSTIC 606 if (uio->uio_rw != UIO_WRITE) 607 panic("spec_write mode"); 608 if (&uio->uio_vmspace->vm_map != kernel_map && 609 uio->uio_vmspace != curproc->p_vmspace) 610 panic("spec_write proc"); 611 #endif 612 613 switch (vp->v_type) { 614 615 case VCHR: 616 VOP_UNLOCK(vp, 0); 617 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag); 618 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 619 return (error); 620 621 case VBLK: 622 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 623 if (uio->uio_resid == 0) 624 return (0); 625 if (uio->uio_offset < 0) 626 return (EINVAL); 627 bsize = BLKDEV_IOSIZE; 628 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 629 if (dpart.part->p_fstype == FS_BSDFFS && 630 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 631 bsize = dpart.part->p_frag * 632 dpart.part->p_fsize; 633 } 634 bscale = bsize >> DEV_BSHIFT; 635 do { 636 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 637 on = uio->uio_offset % bsize; 638 n = min((unsigned)(bsize - on), uio->uio_resid); 639 if (n == bsize) 640 bp = getblk(vp, bn, bsize, 0, 0); 641 else 642 error = bread(vp, bn, bsize, NOCRED, 643 B_MODIFY, &bp); 644 if (error) { 645 brelse(bp, 0); 646 return (error); 647 } 648 n = min(n, bsize - bp->b_resid); 649 error = uiomove((char *)bp->b_data + on, n, uio); 650 if (error) 651 brelse(bp, 0); 652 else { 653 if (n + on == bsize) 654 bawrite(bp); 655 else 656 bdwrite(bp); 657 error = bp->b_error; 658 } 659 } while (error == 0 && uio->uio_resid > 0 && n != 0); 660 return (error); 661 662 default: 663 panic("spec_write type"); 664 } 665 /* NOTREACHED */ 666 } 667 668 /* 669 * Device ioctl operation. 670 */ 671 /* ARGSUSED */ 672 int 673 spec_ioctl(void *v) 674 { 675 struct vop_ioctl_args /* { 676 struct vnode *a_vp; 677 u_long a_command; 678 void *a_data; 679 int a_fflag; 680 kauth_cred_t a_cred; 681 } */ *ap = v; 682 struct vnode *vp; 683 dev_t dev; 684 685 /* 686 * Extract all the info we need from the vnode, taking care to 687 * avoid a race with VOP_REVOKE(). 688 */ 689 690 vp = ap->a_vp; 691 dev = NODEV; 692 mutex_enter(&vp->v_interlock); 693 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 694 dev = vp->v_rdev; 695 } 696 mutex_exit(&vp->v_interlock); 697 if (dev == NODEV) { 698 return ENXIO; 699 } 700 701 switch (vp->v_type) { 702 703 case VCHR: 704 return cdev_ioctl(dev, ap->a_command, ap->a_data, 705 ap->a_fflag, curlwp); 706 707 case VBLK: 708 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 709 return bdev_ioctl(dev, ap->a_command, ap->a_data, 710 ap->a_fflag, curlwp); 711 712 default: 713 panic("spec_ioctl"); 714 /* NOTREACHED */ 715 } 716 } 717 718 /* ARGSUSED */ 719 int 720 spec_poll(void *v) 721 { 722 struct vop_poll_args /* { 723 struct vnode *a_vp; 724 int a_events; 725 } */ *ap = v; 726 struct vnode *vp; 727 dev_t dev; 728 729 /* 730 * Extract all the info we need from the vnode, taking care to 731 * avoid a race with VOP_REVOKE(). 732 */ 733 734 vp = ap->a_vp; 735 dev = NODEV; 736 mutex_enter(&vp->v_interlock); 737 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 738 dev = vp->v_rdev; 739 } 740 mutex_exit(&vp->v_interlock); 741 if (dev == NODEV) { 742 return POLLERR; 743 } 744 745 switch (vp->v_type) { 746 747 case VCHR: 748 return cdev_poll(dev, ap->a_events, curlwp); 749 750 default: 751 return (genfs_poll(v)); 752 } 753 } 754 755 /* ARGSUSED */ 756 int 757 spec_kqfilter(void *v) 758 { 759 struct vop_kqfilter_args /* { 760 struct vnode *a_vp; 761 struct proc *a_kn; 762 } */ *ap = v; 763 dev_t dev; 764 765 switch (ap->a_vp->v_type) { 766 767 case VCHR: 768 dev = ap->a_vp->v_rdev; 769 return cdev_kqfilter(dev, ap->a_kn); 770 default: 771 /* 772 * Block devices don't support kqfilter, and refuse it 773 * for any other files (like those vflush()ed) too. 774 */ 775 return (EOPNOTSUPP); 776 } 777 } 778 779 /* 780 * Allow mapping of only D_DISK. This is called only for VBLK. 781 */ 782 int 783 spec_mmap(void *v) 784 { 785 struct vop_mmap_args /* { 786 struct vnode *a_vp; 787 vm_prot_t a_prot; 788 kauth_cred_t a_cred; 789 } */ *ap = v; 790 struct vnode *vp = ap->a_vp; 791 792 KASSERT(vp->v_type == VBLK); 793 if (bdev_type(vp->v_rdev) != D_DISK) 794 return EINVAL; 795 796 return 0; 797 } 798 799 /* 800 * Synch buffers associated with a block device 801 */ 802 /* ARGSUSED */ 803 int 804 spec_fsync(void *v) 805 { 806 struct vop_fsync_args /* { 807 struct vnode *a_vp; 808 kauth_cred_t a_cred; 809 int a_flags; 810 off_t offlo; 811 off_t offhi; 812 } */ *ap = v; 813 struct vnode *vp = ap->a_vp; 814 struct mount *mp; 815 int error; 816 817 if (vp->v_type == VBLK) { 818 if ((mp = vp->v_specmountpoint) != NULL) { 819 error = VFS_FSYNC(mp, vp, ap->a_flags | FSYNC_VFS); 820 if (error != EOPNOTSUPP) 821 return error; 822 } 823 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0); 824 } 825 return (0); 826 } 827 828 /* 829 * Just call the device strategy routine 830 */ 831 int 832 spec_strategy(void *v) 833 { 834 struct vop_strategy_args /* { 835 struct vnode *a_vp; 836 struct buf *a_bp; 837 } */ *ap = v; 838 struct vnode *vp = ap->a_vp; 839 struct buf *bp = ap->a_bp; 840 int error; 841 842 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 843 844 error = 0; 845 bp->b_dev = vp->v_rdev; 846 847 if (!(bp->b_flags & B_READ)) 848 error = fscow_run(bp, false); 849 850 if (error) { 851 bp->b_error = error; 852 biodone(bp); 853 return (error); 854 } 855 856 bdev_strategy(bp); 857 858 return (0); 859 } 860 861 int 862 spec_inactive(void *v) 863 { 864 struct vop_inactive_args /* { 865 struct vnode *a_vp; 866 struct proc *a_l; 867 } */ *ap = v; 868 869 VOP_UNLOCK(ap->a_vp, 0); 870 return (0); 871 } 872 873 /* 874 * This is a noop, simply returning what one has been given. 875 */ 876 int 877 spec_bmap(void *v) 878 { 879 struct vop_bmap_args /* { 880 struct vnode *a_vp; 881 daddr_t a_bn; 882 struct vnode **a_vpp; 883 daddr_t *a_bnp; 884 int *a_runp; 885 } */ *ap = v; 886 887 if (ap->a_vpp != NULL) 888 *ap->a_vpp = ap->a_vp; 889 if (ap->a_bnp != NULL) 890 *ap->a_bnp = ap->a_bn; 891 if (ap->a_runp != NULL) 892 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1; 893 return (0); 894 } 895 896 /* 897 * Device close routine 898 */ 899 /* ARGSUSED */ 900 int 901 spec_close(void *v) 902 { 903 struct vop_close_args /* { 904 struct vnode *a_vp; 905 int a_fflag; 906 kauth_cred_t a_cred; 907 } */ *ap = v; 908 struct vnode *vp = ap->a_vp; 909 struct session *sess; 910 dev_t dev = vp->v_rdev; 911 int mode, error, flags, flags1, count; 912 specnode_t *sn; 913 specdev_t *sd; 914 915 flags = vp->v_iflag; 916 sn = vp->v_specnode; 917 sd = sn->sn_dev; 918 919 switch (vp->v_type) { 920 921 case VCHR: 922 /* 923 * Hack: a tty device that is a controlling terminal 924 * has a reference from the session structure. We 925 * cannot easily tell that a character device is a 926 * controlling terminal, unless it is the closing 927 * process' controlling terminal. In that case, if the 928 * open count is 1 release the reference from the 929 * session. Also, remove the link from the tty back to 930 * the session and pgrp. 931 * 932 * XXX V. fishy. 933 */ 934 mutex_enter(proc_lock); 935 sess = curlwp->l_proc->p_session; 936 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) { 937 mutex_spin_enter(&tty_lock); 938 sess->s_ttyvp = NULL; 939 if (sess->s_ttyp->t_session != NULL) { 940 sess->s_ttyp->t_pgrp = NULL; 941 sess->s_ttyp->t_session = NULL; 942 mutex_spin_exit(&tty_lock); 943 /* Releases proc_lock. */ 944 proc_sessrele(sess); 945 } else { 946 mutex_spin_exit(&tty_lock); 947 if (sess->s_ttyp->t_pgrp != NULL) 948 panic("spec_close: spurious pgrp ref"); 949 mutex_exit(proc_lock); 950 } 951 vrele(vp); 952 } else 953 mutex_exit(proc_lock); 954 955 /* 956 * If the vnode is locked, then we are in the midst 957 * of forcably closing the device, otherwise we only 958 * close on last reference. 959 */ 960 mode = S_IFCHR; 961 break; 962 963 case VBLK: 964 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 965 /* 966 * On last close of a block device (that isn't mounted) 967 * we must invalidate any in core blocks, so that 968 * we can, for instance, change floppy disks. 969 */ 970 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0); 971 if (error) 972 return (error); 973 /* 974 * We do not want to really close the device if it 975 * is still in use unless we are trying to close it 976 * forcibly. Since every use (buffer, vnode, swap, cmap) 977 * holds a reference to the vnode, and because we mark 978 * any other vnodes that alias this device, when the 979 * sum of the reference counts on all the aliased 980 * vnodes descends to one, we are on last close. 981 */ 982 mode = S_IFBLK; 983 break; 984 985 default: 986 panic("spec_close: not special"); 987 } 988 989 mutex_enter(&device_lock); 990 sn->sn_opencnt--; 991 count = --sd->sd_opencnt; 992 if (vp->v_type == VBLK) 993 sd->sd_bdevvp = NULL; 994 mutex_exit(&device_lock); 995 996 if (count != 0) 997 return 0; 998 999 flags1 = ap->a_fflag; 1000 1001 /* 1002 * if VI_XLOCK is set, then we're going away soon, so make this 1003 * non-blocking. Also ensures that we won't wedge in vn_lock below. 1004 */ 1005 if (flags & VI_XLOCK) 1006 flags1 |= FNONBLOCK; 1007 1008 /* 1009 * If we're able to block, release the vnode lock & reacquire. We 1010 * might end up sleeping for someone else who wants our queues. They 1011 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is 1012 * set, don't release the lock as we won't be able to regain it. 1013 */ 1014 if (!(flags1 & FNONBLOCK)) 1015 VOP_UNLOCK(vp, 0); 1016 1017 if (vp->v_type == VBLK) 1018 error = bdev_close(dev, flags1, mode, curlwp); 1019 else 1020 error = cdev_close(dev, flags1, mode, curlwp); 1021 1022 if (!(flags1 & FNONBLOCK)) 1023 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1024 1025 return (error); 1026 } 1027 1028 /* 1029 * Print out the contents of a special device vnode. 1030 */ 1031 int 1032 spec_print(void *v) 1033 { 1034 struct vop_print_args /* { 1035 struct vnode *a_vp; 1036 } */ *ap = v; 1037 1038 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev), 1039 (unsigned long long)minor(ap->a_vp->v_rdev)); 1040 return 0; 1041 } 1042 1043 /* 1044 * Return POSIX pathconf information applicable to special devices. 1045 */ 1046 int 1047 spec_pathconf(void *v) 1048 { 1049 struct vop_pathconf_args /* { 1050 struct vnode *a_vp; 1051 int a_name; 1052 register_t *a_retval; 1053 } */ *ap = v; 1054 1055 switch (ap->a_name) { 1056 case _PC_LINK_MAX: 1057 *ap->a_retval = LINK_MAX; 1058 return (0); 1059 case _PC_MAX_CANON: 1060 *ap->a_retval = MAX_CANON; 1061 return (0); 1062 case _PC_MAX_INPUT: 1063 *ap->a_retval = MAX_INPUT; 1064 return (0); 1065 case _PC_PIPE_BUF: 1066 *ap->a_retval = PIPE_BUF; 1067 return (0); 1068 case _PC_CHOWN_RESTRICTED: 1069 *ap->a_retval = 1; 1070 return (0); 1071 case _PC_VDISABLE: 1072 *ap->a_retval = _POSIX_VDISABLE; 1073 return (0); 1074 case _PC_SYNC_IO: 1075 *ap->a_retval = 1; 1076 return (0); 1077 default: 1078 return (EINVAL); 1079 } 1080 /* NOTREACHED */ 1081 } 1082 1083 /* 1084 * Advisory record locking support. 1085 */ 1086 int 1087 spec_advlock(void *v) 1088 { 1089 struct vop_advlock_args /* { 1090 struct vnode *a_vp; 1091 void *a_id; 1092 int a_op; 1093 struct flock *a_fl; 1094 int a_flags; 1095 } */ *ap = v; 1096 struct vnode *vp = ap->a_vp; 1097 1098 return lf_advlock(ap, &vp->v_speclockf, (off_t)0); 1099 } 1100