1 /* $NetBSD: spec_vnops.c,v 1.141 2013/09/30 18:58:00 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.141 2013/09/30 18:58:00 hannken Exp $"); 62 63 #include <sys/param.h> 64 #include <sys/proc.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/buf.h> 69 #include <sys/mount.h> 70 #include <sys/namei.h> 71 #include <sys/vnode.h> 72 #include <sys/stat.h> 73 #include <sys/errno.h> 74 #include <sys/ioctl.h> 75 #include <sys/poll.h> 76 #include <sys/file.h> 77 #include <sys/disklabel.h> 78 #include <sys/lockf.h> 79 #include <sys/tty.h> 80 #include <sys/kauth.h> 81 #include <sys/fstrans.h> 82 #include <sys/module.h> 83 84 #include <miscfs/genfs/genfs.h> 85 #include <miscfs/specfs/specdev.h> 86 87 /* symbolic sleep message strings for devices */ 88 const char devopn[] = "devopn"; 89 const char devio[] = "devio"; 90 const char devwait[] = "devwait"; 91 const char devin[] = "devin"; 92 const char devout[] = "devout"; 93 const char devioc[] = "devioc"; 94 const char devcls[] = "devcls"; 95 96 #define SPECHSZ 64 97 #if ((SPECHSZ&(SPECHSZ-1)) == 0) 98 #define SPECHASH(rdev) (((rdev>>5)+(rdev))&(SPECHSZ-1)) 99 #else 100 #define SPECHASH(rdev) (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ) 101 #endif 102 103 static vnode_t *specfs_hash[SPECHSZ]; 104 105 /* 106 * This vnode operations vector is used for special device nodes 107 * created from whole cloth by the kernel. For the ops vector for 108 * vnodes built from special devices found in a filesystem, see (e.g) 109 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other 110 * filesystems. 111 */ 112 113 int (**spec_vnodeop_p)(void *); 114 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 115 { &vop_default_desc, vn_default_error }, 116 { &vop_lookup_desc, spec_lookup }, /* lookup */ 117 { &vop_create_desc, spec_create }, /* create */ 118 { &vop_mknod_desc, spec_mknod }, /* mknod */ 119 { &vop_open_desc, spec_open }, /* open */ 120 { &vop_close_desc, spec_close }, /* close */ 121 { &vop_access_desc, spec_access }, /* access */ 122 { &vop_getattr_desc, spec_getattr }, /* getattr */ 123 { &vop_setattr_desc, spec_setattr }, /* setattr */ 124 { &vop_read_desc, spec_read }, /* read */ 125 { &vop_write_desc, spec_write }, /* write */ 126 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */ 127 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 128 { &vop_poll_desc, spec_poll }, /* poll */ 129 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 130 { &vop_revoke_desc, spec_revoke }, /* revoke */ 131 { &vop_mmap_desc, spec_mmap }, /* mmap */ 132 { &vop_fsync_desc, spec_fsync }, /* fsync */ 133 { &vop_seek_desc, spec_seek }, /* seek */ 134 { &vop_remove_desc, spec_remove }, /* remove */ 135 { &vop_link_desc, spec_link }, /* link */ 136 { &vop_rename_desc, spec_rename }, /* rename */ 137 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 138 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 139 { &vop_symlink_desc, spec_symlink }, /* symlink */ 140 { &vop_readdir_desc, spec_readdir }, /* readdir */ 141 { &vop_readlink_desc, spec_readlink }, /* readlink */ 142 { &vop_abortop_desc, spec_abortop }, /* abortop */ 143 { &vop_inactive_desc, spec_inactive }, /* inactive */ 144 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 145 { &vop_lock_desc, spec_lock }, /* lock */ 146 { &vop_unlock_desc, spec_unlock }, /* unlock */ 147 { &vop_bmap_desc, spec_bmap }, /* bmap */ 148 { &vop_strategy_desc, spec_strategy }, /* strategy */ 149 { &vop_print_desc, spec_print }, /* print */ 150 { &vop_islocked_desc, spec_islocked }, /* islocked */ 151 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 152 { &vop_advlock_desc, spec_advlock }, /* advlock */ 153 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 154 { &vop_getpages_desc, spec_getpages }, /* getpages */ 155 { &vop_putpages_desc, spec_putpages }, /* putpages */ 156 { NULL, NULL } 157 }; 158 const struct vnodeopv_desc spec_vnodeop_opv_desc = 159 { &spec_vnodeop_p, spec_vnodeop_entries }; 160 161 static kauth_listener_t rawio_listener; 162 163 /* Returns true if vnode is /dev/mem or /dev/kmem. */ 164 bool 165 iskmemvp(struct vnode *vp) 166 { 167 return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev)); 168 } 169 170 /* 171 * Returns true if dev is /dev/mem or /dev/kmem. 172 */ 173 int 174 iskmemdev(dev_t dev) 175 { 176 /* mem_no is emitted by config(8) to generated devsw.c */ 177 extern const int mem_no; 178 179 /* minor 14 is /dev/io on i386 with COMPAT_10 */ 180 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14)); 181 } 182 183 static int 184 rawio_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 185 void *arg0, void *arg1, void *arg2, void *arg3) 186 { 187 int result; 188 189 result = KAUTH_RESULT_DEFER; 190 191 if ((action != KAUTH_DEVICE_RAWIO_SPEC) && 192 (action != KAUTH_DEVICE_RAWIO_PASSTHRU)) 193 return result; 194 195 /* Access is mandated by permissions. */ 196 result = KAUTH_RESULT_ALLOW; 197 198 return result; 199 } 200 201 void 202 spec_init(void) 203 { 204 205 rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE, 206 rawio_listener_cb, NULL); 207 } 208 209 /* 210 * Initialize a vnode that represents a device. 211 */ 212 void 213 spec_node_init(vnode_t *vp, dev_t rdev) 214 { 215 specnode_t *sn; 216 specdev_t *sd; 217 vnode_t *vp2; 218 vnode_t **vpp; 219 220 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 221 KASSERT(vp->v_specnode == NULL); 222 223 /* 224 * Search the hash table for this device. If known, add a 225 * reference to the device structure. If not known, create 226 * a new entry to represent the device. In all cases add 227 * the vnode to the hash table. 228 */ 229 sn = kmem_alloc(sizeof(*sn), KM_SLEEP); 230 if (sn == NULL) { 231 /* XXX */ 232 panic("spec_node_init: unable to allocate memory"); 233 } 234 sd = kmem_alloc(sizeof(*sd), KM_SLEEP); 235 if (sd == NULL) { 236 /* XXX */ 237 panic("spec_node_init: unable to allocate memory"); 238 } 239 mutex_enter(&device_lock); 240 vpp = &specfs_hash[SPECHASH(rdev)]; 241 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) { 242 KASSERT(vp2->v_specnode != NULL); 243 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) { 244 break; 245 } 246 } 247 if (vp2 == NULL) { 248 /* No existing record, create a new one. */ 249 sd->sd_rdev = rdev; 250 sd->sd_mountpoint = NULL; 251 sd->sd_lockf = NULL; 252 sd->sd_refcnt = 1; 253 sd->sd_opencnt = 0; 254 sd->sd_bdevvp = NULL; 255 sn->sn_dev = sd; 256 sd = NULL; 257 } else { 258 /* Use the existing record. */ 259 sn->sn_dev = vp2->v_specnode->sn_dev; 260 sn->sn_dev->sd_refcnt++; 261 } 262 /* Insert vnode into the hash chain. */ 263 sn->sn_opencnt = 0; 264 sn->sn_rdev = rdev; 265 sn->sn_gone = false; 266 vp->v_specnode = sn; 267 vp->v_specnext = *vpp; 268 *vpp = vp; 269 mutex_exit(&device_lock); 270 271 /* Free the record we allocated if unused. */ 272 if (sd != NULL) { 273 kmem_free(sd, sizeof(*sd)); 274 } 275 } 276 277 /* 278 * Lookup a vnode by device number and return it referenced. 279 */ 280 int 281 spec_node_lookup_by_dev(enum vtype type, dev_t dev, vnode_t **vpp) 282 { 283 int error; 284 vnode_t *vp; 285 286 mutex_enter(&device_lock); 287 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 288 if (type == vp->v_type && dev == vp->v_rdev) { 289 mutex_enter(vp->v_interlock); 290 /* If clean or being cleaned, then ignore it. */ 291 if ((vp->v_iflag & (VI_CLEAN | VI_XLOCK)) == 0) 292 break; 293 mutex_exit(vp->v_interlock); 294 } 295 } 296 KASSERT(vp == NULL || mutex_owned(vp->v_interlock)); 297 if (vp == NULL) { 298 mutex_exit(&device_lock); 299 return ENOENT; 300 } 301 /* 302 * If it is an opened block device return the opened vnode. 303 */ 304 if (type == VBLK && vp->v_specnode->sn_dev->sd_bdevvp != NULL) { 305 mutex_exit(vp->v_interlock); 306 vp = vp->v_specnode->sn_dev->sd_bdevvp; 307 mutex_enter(vp->v_interlock); 308 } 309 mutex_exit(&device_lock); 310 error = vget(vp, 0); 311 if (error != 0) 312 return error; 313 *vpp = vp; 314 315 return 0; 316 } 317 318 /* 319 * Lookup a vnode by file system mounted on and return it referenced. 320 */ 321 int 322 spec_node_lookup_by_mount(struct mount *mp, vnode_t **vpp) 323 { 324 int i, error; 325 vnode_t *vp, *vq; 326 327 mutex_enter(&device_lock); 328 for (i = 0, vq = NULL; i < SPECHSZ && vq == NULL; i++) { 329 for (vp = specfs_hash[i]; vp; vp = vp->v_specnext) { 330 if (vp->v_type != VBLK) 331 continue; 332 vq = vp->v_specnode->sn_dev->sd_bdevvp; 333 if (vq != NULL && 334 vq->v_specnode->sn_dev->sd_mountpoint == mp) 335 break; 336 vq = NULL; 337 } 338 } 339 if (vq == NULL) { 340 mutex_exit(&device_lock); 341 return ENOENT; 342 } 343 mutex_enter(vq->v_interlock); 344 mutex_exit(&device_lock); 345 error = vget(vq, 0); 346 if (error != 0) 347 return error; 348 *vpp = vq; 349 350 return 0; 351 352 } 353 354 /* 355 * Get the file system mounted on this block device. 356 */ 357 struct mount * 358 spec_node_getmountedfs(vnode_t *devvp) 359 { 360 struct mount *mp; 361 362 KASSERT(devvp->v_type == VBLK); 363 mp = devvp->v_specnode->sn_dev->sd_mountpoint; 364 365 return mp; 366 } 367 368 /* 369 * Set the file system mounted on this block device. 370 */ 371 void 372 spec_node_setmountedfs(vnode_t *devvp, struct mount *mp) 373 { 374 375 KASSERT(devvp->v_type == VBLK); 376 KASSERT(devvp->v_specnode->sn_dev->sd_mountpoint == NULL || mp == NULL); 377 devvp->v_specnode->sn_dev->sd_mountpoint = mp; 378 } 379 380 /* 381 * A vnode representing a special device is going away. Close 382 * the device if the vnode holds it open. 383 */ 384 void 385 spec_node_revoke(vnode_t *vp) 386 { 387 specnode_t *sn; 388 specdev_t *sd; 389 390 sn = vp->v_specnode; 391 sd = sn->sn_dev; 392 393 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 394 KASSERT(vp->v_specnode != NULL); 395 KASSERT((vp->v_iflag & VI_XLOCK) != 0); 396 KASSERT(sn->sn_gone == false); 397 398 mutex_enter(&device_lock); 399 KASSERT(sn->sn_opencnt <= sd->sd_opencnt); 400 if (sn->sn_opencnt != 0) { 401 sd->sd_opencnt -= (sn->sn_opencnt - 1); 402 sn->sn_opencnt = 1; 403 sn->sn_gone = true; 404 mutex_exit(&device_lock); 405 406 VOP_CLOSE(vp, FNONBLOCK, NOCRED); 407 408 mutex_enter(&device_lock); 409 KASSERT(sn->sn_opencnt == 0); 410 } 411 mutex_exit(&device_lock); 412 } 413 414 /* 415 * A vnode representing a special device is being recycled. 416 * Destroy the specfs component. 417 */ 418 void 419 spec_node_destroy(vnode_t *vp) 420 { 421 specnode_t *sn; 422 specdev_t *sd; 423 vnode_t **vpp, *vp2; 424 int refcnt; 425 426 sn = vp->v_specnode; 427 sd = sn->sn_dev; 428 429 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 430 KASSERT(vp->v_specnode != NULL); 431 KASSERT(sn->sn_opencnt == 0); 432 433 mutex_enter(&device_lock); 434 /* Remove from the hash and destroy the node. */ 435 vpp = &specfs_hash[SPECHASH(vp->v_rdev)]; 436 for (vp2 = *vpp;; vp2 = vp2->v_specnext) { 437 if (vp2 == NULL) { 438 panic("spec_node_destroy: corrupt hash"); 439 } 440 if (vp2 == vp) { 441 KASSERT(vp == *vpp); 442 *vpp = vp->v_specnext; 443 break; 444 } 445 if (vp2->v_specnext == vp) { 446 vp2->v_specnext = vp->v_specnext; 447 break; 448 } 449 } 450 sn = vp->v_specnode; 451 vp->v_specnode = NULL; 452 refcnt = sd->sd_refcnt--; 453 KASSERT(refcnt > 0); 454 mutex_exit(&device_lock); 455 456 /* If the device is no longer in use, destroy our record. */ 457 if (refcnt == 1) { 458 KASSERT(sd->sd_opencnt == 0); 459 KASSERT(sd->sd_bdevvp == NULL); 460 kmem_free(sd, sizeof(*sd)); 461 } 462 kmem_free(sn, sizeof(*sn)); 463 } 464 465 /* 466 * Trivial lookup routine that always fails. 467 */ 468 int 469 spec_lookup(void *v) 470 { 471 struct vop_lookup_args /* { 472 struct vnode *a_dvp; 473 struct vnode **a_vpp; 474 struct componentname *a_cnp; 475 } */ *ap = v; 476 477 *ap->a_vpp = NULL; 478 return (ENOTDIR); 479 } 480 481 /* 482 * Open a special file. 483 */ 484 /* ARGSUSED */ 485 int 486 spec_open(void *v) 487 { 488 struct vop_open_args /* { 489 struct vnode *a_vp; 490 int a_mode; 491 kauth_cred_t a_cred; 492 } */ *ap = v; 493 struct lwp *l; 494 struct vnode *vp; 495 dev_t dev; 496 int error; 497 struct partinfo pi; 498 enum kauth_device_req req; 499 specnode_t *sn; 500 specdev_t *sd; 501 502 u_int gen; 503 const char *name; 504 505 l = curlwp; 506 vp = ap->a_vp; 507 dev = vp->v_rdev; 508 sn = vp->v_specnode; 509 sd = sn->sn_dev; 510 name = NULL; 511 gen = 0; 512 513 /* 514 * Don't allow open if fs is mounted -nodev. 515 */ 516 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 517 return (ENXIO); 518 519 switch (ap->a_mode & (FREAD | FWRITE)) { 520 case FREAD | FWRITE: 521 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW; 522 break; 523 case FWRITE: 524 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE; 525 break; 526 default: 527 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ; 528 break; 529 } 530 531 switch (vp->v_type) { 532 case VCHR: 533 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 534 if (error != 0) 535 return (error); 536 537 /* 538 * Character devices can accept opens from multiple 539 * vnodes. 540 */ 541 mutex_enter(&device_lock); 542 if (sn->sn_gone) { 543 mutex_exit(&device_lock); 544 return (EBADF); 545 } 546 sd->sd_opencnt++; 547 sn->sn_opencnt++; 548 mutex_exit(&device_lock); 549 if (cdev_type(dev) == D_TTY) 550 vp->v_vflag |= VV_ISTTY; 551 VOP_UNLOCK(vp); 552 do { 553 const struct cdevsw *cdev; 554 555 gen = module_gen; 556 error = cdev_open(dev, ap->a_mode, S_IFCHR, l); 557 if (error != ENXIO) 558 break; 559 560 /* Check if we already have a valid driver */ 561 mutex_enter(&device_lock); 562 cdev = cdevsw_lookup(dev); 563 mutex_exit(&device_lock); 564 if (cdev != NULL) 565 break; 566 567 /* Get device name from devsw_conv array */ 568 if ((name = cdevsw_getname(major(dev))) == NULL) 569 break; 570 571 /* Try to autoload device module */ 572 (void) module_autoload(name, MODULE_CLASS_DRIVER); 573 } while (gen != module_gen); 574 575 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 576 break; 577 578 case VBLK: 579 error = kauth_authorize_device_spec(ap->a_cred, req, vp); 580 if (error != 0) 581 return (error); 582 583 /* 584 * For block devices, permit only one open. The buffer 585 * cache cannot remain self-consistent with multiple 586 * vnodes holding a block device open. 587 */ 588 mutex_enter(&device_lock); 589 if (sn->sn_gone) { 590 mutex_exit(&device_lock); 591 return (EBADF); 592 } 593 if (sd->sd_opencnt != 0) { 594 mutex_exit(&device_lock); 595 return EBUSY; 596 } 597 sn->sn_opencnt = 1; 598 sd->sd_opencnt = 1; 599 sd->sd_bdevvp = vp; 600 mutex_exit(&device_lock); 601 do { 602 const struct bdevsw *bdev; 603 604 gen = module_gen; 605 error = bdev_open(dev, ap->a_mode, S_IFBLK, l); 606 if (error != ENXIO) 607 break; 608 609 /* Check if we already have a valid driver */ 610 mutex_enter(&device_lock); 611 bdev = bdevsw_lookup(dev); 612 mutex_exit(&device_lock); 613 if (bdev != NULL) 614 break; 615 616 /* Get device name from devsw_conv array */ 617 if ((name = bdevsw_getname(major(dev))) == NULL) 618 break; 619 620 VOP_UNLOCK(vp); 621 622 /* Try to autoload device module */ 623 (void) module_autoload(name, MODULE_CLASS_DRIVER); 624 625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 626 } while (gen != module_gen); 627 628 break; 629 630 case VNON: 631 case VLNK: 632 case VDIR: 633 case VREG: 634 case VBAD: 635 case VFIFO: 636 case VSOCK: 637 default: 638 return 0; 639 } 640 641 mutex_enter(&device_lock); 642 if (sn->sn_gone) { 643 if (error == 0) 644 error = EBADF; 645 } else if (error != 0) { 646 sd->sd_opencnt--; 647 sn->sn_opencnt--; 648 if (vp->v_type == VBLK) 649 sd->sd_bdevvp = NULL; 650 651 } 652 mutex_exit(&device_lock); 653 654 if (cdev_type(dev) != D_DISK || error != 0) 655 return error; 656 657 if (vp->v_type == VCHR) 658 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 659 else 660 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp); 661 if (error == 0) 662 uvm_vnp_setsize(vp, 663 (voff_t)pi.disklab->d_secsize * pi.part->p_size); 664 return 0; 665 } 666 667 /* 668 * Vnode op for read 669 */ 670 /* ARGSUSED */ 671 int 672 spec_read(void *v) 673 { 674 struct vop_read_args /* { 675 struct vnode *a_vp; 676 struct uio *a_uio; 677 int a_ioflag; 678 kauth_cred_t a_cred; 679 } */ *ap = v; 680 struct vnode *vp = ap->a_vp; 681 struct uio *uio = ap->a_uio; 682 struct lwp *l = curlwp; 683 struct buf *bp; 684 daddr_t bn; 685 int bsize, bscale; 686 struct partinfo dpart; 687 int n, on; 688 int error = 0; 689 690 #ifdef DIAGNOSTIC 691 if (uio->uio_rw != UIO_READ) 692 panic("spec_read mode"); 693 if (&uio->uio_vmspace->vm_map != kernel_map && 694 uio->uio_vmspace != curproc->p_vmspace) 695 panic("spec_read proc"); 696 #endif 697 if (uio->uio_resid == 0) 698 return (0); 699 700 switch (vp->v_type) { 701 702 case VCHR: 703 VOP_UNLOCK(vp); 704 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag); 705 vn_lock(vp, LK_SHARED | LK_RETRY); 706 return (error); 707 708 case VBLK: 709 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 710 if (uio->uio_offset < 0) 711 return (EINVAL); 712 bsize = BLKDEV_IOSIZE; 713 714 /* 715 * dholland 20130616: XXX this logic should not be 716 * here. It is here because the old buffer cache 717 * demands that all accesses to the same blocks need 718 * to be the same size; but it only works for FFS and 719 * nowadays I think it'll fail silently if the size 720 * info in the disklabel is wrong. (Or missing.) The 721 * buffer cache needs to be smarter; or failing that 722 * we need a reliable way here to get the right block 723 * size; or a reliable way to guarantee that (a) the 724 * fs is not mounted when we get here and (b) any 725 * buffers generated here will get purged when the fs 726 * does get mounted. 727 */ 728 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 729 if (dpart.part->p_fstype == FS_BSDFFS && 730 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 731 bsize = dpart.part->p_frag * 732 dpart.part->p_fsize; 733 } 734 735 bscale = bsize >> DEV_BSHIFT; 736 do { 737 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 738 on = uio->uio_offset % bsize; 739 n = min((unsigned)(bsize - on), uio->uio_resid); 740 error = bread(vp, bn, bsize, NOCRED, 0, &bp); 741 if (error) { 742 return (error); 743 } 744 n = min(n, bsize - bp->b_resid); 745 error = uiomove((char *)bp->b_data + on, n, uio); 746 brelse(bp, 0); 747 } while (error == 0 && uio->uio_resid > 0 && n != 0); 748 return (error); 749 750 default: 751 panic("spec_read type"); 752 } 753 /* NOTREACHED */ 754 } 755 756 /* 757 * Vnode op for write 758 */ 759 /* ARGSUSED */ 760 int 761 spec_write(void *v) 762 { 763 struct vop_write_args /* { 764 struct vnode *a_vp; 765 struct uio *a_uio; 766 int a_ioflag; 767 kauth_cred_t a_cred; 768 } */ *ap = v; 769 struct vnode *vp = ap->a_vp; 770 struct uio *uio = ap->a_uio; 771 struct lwp *l = curlwp; 772 struct buf *bp; 773 daddr_t bn; 774 int bsize, bscale; 775 struct partinfo dpart; 776 int n, on; 777 int error = 0; 778 779 #ifdef DIAGNOSTIC 780 if (uio->uio_rw != UIO_WRITE) 781 panic("spec_write mode"); 782 if (&uio->uio_vmspace->vm_map != kernel_map && 783 uio->uio_vmspace != curproc->p_vmspace) 784 panic("spec_write proc"); 785 #endif 786 787 switch (vp->v_type) { 788 789 case VCHR: 790 VOP_UNLOCK(vp); 791 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag); 792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 793 return (error); 794 795 case VBLK: 796 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 797 if (uio->uio_resid == 0) 798 return (0); 799 if (uio->uio_offset < 0) 800 return (EINVAL); 801 bsize = BLKDEV_IOSIZE; 802 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) { 803 if (dpart.part->p_fstype == FS_BSDFFS && 804 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 805 bsize = dpart.part->p_frag * 806 dpart.part->p_fsize; 807 } 808 bscale = bsize >> DEV_BSHIFT; 809 do { 810 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); 811 on = uio->uio_offset % bsize; 812 n = min((unsigned)(bsize - on), uio->uio_resid); 813 if (n == bsize) 814 bp = getblk(vp, bn, bsize, 0, 0); 815 else 816 error = bread(vp, bn, bsize, NOCRED, 817 B_MODIFY, &bp); 818 if (error) { 819 return (error); 820 } 821 n = min(n, bsize - bp->b_resid); 822 error = uiomove((char *)bp->b_data + on, n, uio); 823 if (error) 824 brelse(bp, 0); 825 else { 826 if (n + on == bsize) 827 bawrite(bp); 828 else 829 bdwrite(bp); 830 error = bp->b_error; 831 } 832 } while (error == 0 && uio->uio_resid > 0 && n != 0); 833 return (error); 834 835 default: 836 panic("spec_write type"); 837 } 838 /* NOTREACHED */ 839 } 840 841 /* 842 * Device ioctl operation. 843 */ 844 /* ARGSUSED */ 845 int 846 spec_ioctl(void *v) 847 { 848 struct vop_ioctl_args /* { 849 struct vnode *a_vp; 850 u_long a_command; 851 void *a_data; 852 int a_fflag; 853 kauth_cred_t a_cred; 854 } */ *ap = v; 855 struct vnode *vp; 856 dev_t dev; 857 858 /* 859 * Extract all the info we need from the vnode, taking care to 860 * avoid a race with VOP_REVOKE(). 861 */ 862 863 vp = ap->a_vp; 864 dev = NODEV; 865 mutex_enter(vp->v_interlock); 866 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 867 dev = vp->v_rdev; 868 } 869 mutex_exit(vp->v_interlock); 870 if (dev == NODEV) { 871 return ENXIO; 872 } 873 874 switch (vp->v_type) { 875 876 case VCHR: 877 return cdev_ioctl(dev, ap->a_command, ap->a_data, 878 ap->a_fflag, curlwp); 879 880 case VBLK: 881 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 882 return bdev_ioctl(dev, ap->a_command, ap->a_data, 883 ap->a_fflag, curlwp); 884 885 default: 886 panic("spec_ioctl"); 887 /* NOTREACHED */ 888 } 889 } 890 891 /* ARGSUSED */ 892 int 893 spec_poll(void *v) 894 { 895 struct vop_poll_args /* { 896 struct vnode *a_vp; 897 int a_events; 898 } */ *ap = v; 899 struct vnode *vp; 900 dev_t dev; 901 902 /* 903 * Extract all the info we need from the vnode, taking care to 904 * avoid a race with VOP_REVOKE(). 905 */ 906 907 vp = ap->a_vp; 908 dev = NODEV; 909 mutex_enter(vp->v_interlock); 910 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) { 911 dev = vp->v_rdev; 912 } 913 mutex_exit(vp->v_interlock); 914 if (dev == NODEV) { 915 return POLLERR; 916 } 917 918 switch (vp->v_type) { 919 920 case VCHR: 921 return cdev_poll(dev, ap->a_events, curlwp); 922 923 default: 924 return (genfs_poll(v)); 925 } 926 } 927 928 /* ARGSUSED */ 929 int 930 spec_kqfilter(void *v) 931 { 932 struct vop_kqfilter_args /* { 933 struct vnode *a_vp; 934 struct proc *a_kn; 935 } */ *ap = v; 936 dev_t dev; 937 938 switch (ap->a_vp->v_type) { 939 940 case VCHR: 941 dev = ap->a_vp->v_rdev; 942 return cdev_kqfilter(dev, ap->a_kn); 943 default: 944 /* 945 * Block devices don't support kqfilter, and refuse it 946 * for any other files (like those vflush()ed) too. 947 */ 948 return (EOPNOTSUPP); 949 } 950 } 951 952 /* 953 * Allow mapping of only D_DISK. This is called only for VBLK. 954 */ 955 int 956 spec_mmap(void *v) 957 { 958 struct vop_mmap_args /* { 959 struct vnode *a_vp; 960 vm_prot_t a_prot; 961 kauth_cred_t a_cred; 962 } */ *ap = v; 963 struct vnode *vp = ap->a_vp; 964 965 KASSERT(vp->v_type == VBLK); 966 if (bdev_type(vp->v_rdev) != D_DISK) 967 return EINVAL; 968 969 return 0; 970 } 971 972 /* 973 * Synch buffers associated with a block device 974 */ 975 /* ARGSUSED */ 976 int 977 spec_fsync(void *v) 978 { 979 struct vop_fsync_args /* { 980 struct vnode *a_vp; 981 kauth_cred_t a_cred; 982 int a_flags; 983 off_t offlo; 984 off_t offhi; 985 } */ *ap = v; 986 struct vnode *vp = ap->a_vp; 987 struct mount *mp; 988 int error; 989 990 if (vp->v_type == VBLK) { 991 if ((mp = spec_node_getmountedfs(vp)) != NULL) { 992 error = VFS_FSYNC(mp, vp, ap->a_flags); 993 if (error != EOPNOTSUPP) 994 return error; 995 } 996 return vflushbuf(vp, ap->a_flags); 997 } 998 return (0); 999 } 1000 1001 /* 1002 * Just call the device strategy routine 1003 */ 1004 int 1005 spec_strategy(void *v) 1006 { 1007 struct vop_strategy_args /* { 1008 struct vnode *a_vp; 1009 struct buf *a_bp; 1010 } */ *ap = v; 1011 struct vnode *vp = ap->a_vp; 1012 struct buf *bp = ap->a_bp; 1013 int error; 1014 1015 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 1016 1017 error = 0; 1018 bp->b_dev = vp->v_rdev; 1019 1020 if (!(bp->b_flags & B_READ)) 1021 error = fscow_run(bp, false); 1022 1023 if (error) { 1024 bp->b_error = error; 1025 bp->b_resid = bp->b_bcount; 1026 biodone(bp); 1027 return (error); 1028 } 1029 1030 bdev_strategy(bp); 1031 1032 return (0); 1033 } 1034 1035 int 1036 spec_inactive(void *v) 1037 { 1038 struct vop_inactive_args /* { 1039 struct vnode *a_vp; 1040 struct proc *a_l; 1041 } */ *ap = v; 1042 1043 VOP_UNLOCK(ap->a_vp); 1044 return (0); 1045 } 1046 1047 /* 1048 * This is a noop, simply returning what one has been given. 1049 */ 1050 int 1051 spec_bmap(void *v) 1052 { 1053 struct vop_bmap_args /* { 1054 struct vnode *a_vp; 1055 daddr_t a_bn; 1056 struct vnode **a_vpp; 1057 daddr_t *a_bnp; 1058 int *a_runp; 1059 } */ *ap = v; 1060 1061 if (ap->a_vpp != NULL) 1062 *ap->a_vpp = ap->a_vp; 1063 if (ap->a_bnp != NULL) 1064 *ap->a_bnp = ap->a_bn; 1065 if (ap->a_runp != NULL) 1066 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1; 1067 return (0); 1068 } 1069 1070 /* 1071 * Device close routine 1072 */ 1073 /* ARGSUSED */ 1074 int 1075 spec_close(void *v) 1076 { 1077 struct vop_close_args /* { 1078 struct vnode *a_vp; 1079 int a_fflag; 1080 kauth_cred_t a_cred; 1081 } */ *ap = v; 1082 struct vnode *vp = ap->a_vp; 1083 struct session *sess; 1084 dev_t dev = vp->v_rdev; 1085 int mode, error, flags, flags1, count; 1086 specnode_t *sn; 1087 specdev_t *sd; 1088 1089 flags = vp->v_iflag; 1090 sn = vp->v_specnode; 1091 sd = sn->sn_dev; 1092 1093 switch (vp->v_type) { 1094 1095 case VCHR: 1096 /* 1097 * Hack: a tty device that is a controlling terminal 1098 * has a reference from the session structure. We 1099 * cannot easily tell that a character device is a 1100 * controlling terminal, unless it is the closing 1101 * process' controlling terminal. In that case, if the 1102 * open count is 1 release the reference from the 1103 * session. Also, remove the link from the tty back to 1104 * the session and pgrp. 1105 * 1106 * XXX V. fishy. 1107 */ 1108 mutex_enter(proc_lock); 1109 sess = curlwp->l_proc->p_session; 1110 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) { 1111 mutex_spin_enter(&tty_lock); 1112 sess->s_ttyvp = NULL; 1113 if (sess->s_ttyp->t_session != NULL) { 1114 sess->s_ttyp->t_pgrp = NULL; 1115 sess->s_ttyp->t_session = NULL; 1116 mutex_spin_exit(&tty_lock); 1117 /* Releases proc_lock. */ 1118 proc_sessrele(sess); 1119 } else { 1120 mutex_spin_exit(&tty_lock); 1121 if (sess->s_ttyp->t_pgrp != NULL) 1122 panic("spec_close: spurious pgrp ref"); 1123 mutex_exit(proc_lock); 1124 } 1125 vrele(vp); 1126 } else 1127 mutex_exit(proc_lock); 1128 1129 /* 1130 * If the vnode is locked, then we are in the midst 1131 * of forcably closing the device, otherwise we only 1132 * close on last reference. 1133 */ 1134 mode = S_IFCHR; 1135 break; 1136 1137 case VBLK: 1138 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); 1139 /* 1140 * On last close of a block device (that isn't mounted) 1141 * we must invalidate any in core blocks, so that 1142 * we can, for instance, change floppy disks. 1143 */ 1144 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0); 1145 if (error) 1146 return (error); 1147 /* 1148 * We do not want to really close the device if it 1149 * is still in use unless we are trying to close it 1150 * forcibly. Since every use (buffer, vnode, swap, cmap) 1151 * holds a reference to the vnode, and because we mark 1152 * any other vnodes that alias this device, when the 1153 * sum of the reference counts on all the aliased 1154 * vnodes descends to one, we are on last close. 1155 */ 1156 mode = S_IFBLK; 1157 break; 1158 1159 default: 1160 panic("spec_close: not special"); 1161 } 1162 1163 mutex_enter(&device_lock); 1164 sn->sn_opencnt--; 1165 count = --sd->sd_opencnt; 1166 if (vp->v_type == VBLK) 1167 sd->sd_bdevvp = NULL; 1168 mutex_exit(&device_lock); 1169 1170 if (count != 0) 1171 return 0; 1172 1173 flags1 = ap->a_fflag; 1174 1175 /* 1176 * if VI_XLOCK is set, then we're going away soon, so make this 1177 * non-blocking. Also ensures that we won't wedge in vn_lock below. 1178 */ 1179 if (flags & VI_XLOCK) 1180 flags1 |= FNONBLOCK; 1181 1182 /* 1183 * If we're able to block, release the vnode lock & reacquire. We 1184 * might end up sleeping for someone else who wants our queues. They 1185 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is 1186 * set, don't release the lock as we won't be able to regain it. 1187 */ 1188 if (!(flags1 & FNONBLOCK)) 1189 VOP_UNLOCK(vp); 1190 1191 if (vp->v_type == VBLK) 1192 error = bdev_close(dev, flags1, mode, curlwp); 1193 else 1194 error = cdev_close(dev, flags1, mode, curlwp); 1195 1196 if (!(flags1 & FNONBLOCK)) 1197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1198 1199 return (error); 1200 } 1201 1202 /* 1203 * Print out the contents of a special device vnode. 1204 */ 1205 int 1206 spec_print(void *v) 1207 { 1208 struct vop_print_args /* { 1209 struct vnode *a_vp; 1210 } */ *ap = v; 1211 1212 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev), 1213 (unsigned long long)minor(ap->a_vp->v_rdev)); 1214 return 0; 1215 } 1216 1217 /* 1218 * Return POSIX pathconf information applicable to special devices. 1219 */ 1220 int 1221 spec_pathconf(void *v) 1222 { 1223 struct vop_pathconf_args /* { 1224 struct vnode *a_vp; 1225 int a_name; 1226 register_t *a_retval; 1227 } */ *ap = v; 1228 1229 switch (ap->a_name) { 1230 case _PC_LINK_MAX: 1231 *ap->a_retval = LINK_MAX; 1232 return (0); 1233 case _PC_MAX_CANON: 1234 *ap->a_retval = MAX_CANON; 1235 return (0); 1236 case _PC_MAX_INPUT: 1237 *ap->a_retval = MAX_INPUT; 1238 return (0); 1239 case _PC_PIPE_BUF: 1240 *ap->a_retval = PIPE_BUF; 1241 return (0); 1242 case _PC_CHOWN_RESTRICTED: 1243 *ap->a_retval = 1; 1244 return (0); 1245 case _PC_VDISABLE: 1246 *ap->a_retval = _POSIX_VDISABLE; 1247 return (0); 1248 case _PC_SYNC_IO: 1249 *ap->a_retval = 1; 1250 return (0); 1251 default: 1252 return (EINVAL); 1253 } 1254 /* NOTREACHED */ 1255 } 1256 1257 /* 1258 * Advisory record locking support. 1259 */ 1260 int 1261 spec_advlock(void *v) 1262 { 1263 struct vop_advlock_args /* { 1264 struct vnode *a_vp; 1265 void *a_id; 1266 int a_op; 1267 struct flock *a_fl; 1268 int a_flags; 1269 } */ *ap = v; 1270 struct vnode *vp = ap->a_vp; 1271 1272 return lf_advlock(ap, &vp->v_speclockf, (off_t)0); 1273 } 1274