1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/time.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/fcntl.h> 40 #include <sys/proc.h> 41 #include <sys/priv.h> 42 #include <sys/signalvar.h> 43 #include <sys/vnode.h> 44 #include <sys/uio.h> 45 #include <sys/mount.h> 46 #include <sys/file.h> 47 #include <sys/fcntl.h> 48 #include <sys/namei.h> 49 #include <sys/dirent.h> 50 #include <sys/malloc.h> 51 #include <sys/stat.h> 52 #include <sys/reg.h> 53 #include <vm/vm_pager.h> 54 #include <vm/vm_zone.h> 55 #include <vm/vm_object.h> 56 #include <sys/filio.h> 57 #include <sys/ttycom.h> 58 #include <sys/tty.h> 59 #include <sys/diskslice.h> 60 #include <sys/sysctl.h> 61 #include <sys/devfs.h> 62 #include <sys/pioctl.h> 63 64 #include <machine/limits.h> 65 66 #include <sys/buf2.h> 67 #include <sys/sysref2.h> 68 #include <sys/mplock2.h> 69 #include <vm/vm_page2.h> 70 71 MALLOC_DECLARE(M_DEVFS); 72 #define DEVFS_BADOP (void *)devfs_badop 73 74 static int devfs_badop(struct vop_generic_args *); 75 static int devfs_access(struct vop_access_args *); 76 static int devfs_inactive(struct vop_inactive_args *); 77 static int devfs_reclaim(struct vop_reclaim_args *); 78 static int devfs_readdir(struct vop_readdir_args *); 79 static int devfs_getattr(struct vop_getattr_args *); 80 static int devfs_setattr(struct vop_setattr_args *); 81 static int devfs_readlink(struct vop_readlink_args *); 82 static int devfs_print(struct vop_print_args *); 83 84 static int devfs_nresolve(struct vop_nresolve_args *); 85 static int devfs_nlookupdotdot(struct vop_nlookupdotdot_args *); 86 static int devfs_nmkdir(struct vop_nmkdir_args *); 87 static int devfs_nsymlink(struct vop_nsymlink_args *); 88 static int devfs_nrmdir(struct vop_nrmdir_args *); 89 static int devfs_nremove(struct vop_nremove_args *); 90 91 static int devfs_spec_open(struct vop_open_args *); 92 static int devfs_spec_close(struct vop_close_args *); 93 static int devfs_spec_fsync(struct vop_fsync_args *); 94 95 static int devfs_spec_read(struct vop_read_args *); 96 static int devfs_spec_write(struct vop_write_args *); 97 static int devfs_spec_ioctl(struct vop_ioctl_args *); 98 static int devfs_spec_poll(struct vop_poll_args *); 99 static int devfs_spec_kqfilter(struct vop_kqfilter_args *); 100 static int devfs_spec_strategy(struct vop_strategy_args *); 101 static void devfs_spec_strategy_done(struct bio *); 102 static int devfs_spec_freeblks(struct vop_freeblks_args *); 103 static int devfs_spec_bmap(struct vop_bmap_args *); 104 static int devfs_spec_advlock(struct vop_advlock_args *); 105 static void devfs_spec_getpages_iodone(struct bio *); 106 static int devfs_spec_getpages(struct vop_getpages_args *); 107 108 109 static int devfs_specf_close(struct file *); 110 static int devfs_specf_read(struct file *, struct uio *, struct ucred *, int); 111 static int devfs_specf_write(struct file *, struct uio *, struct ucred *, int); 112 static int devfs_specf_stat(struct file *, struct stat *, struct ucred *); 113 static int devfs_specf_kqfilter(struct file *, struct knote *); 114 static int devfs_specf_poll(struct file *, int, struct ucred *); 115 static int devfs_specf_ioctl(struct file *, u_long, caddr_t, 116 struct ucred *, struct sysmsg *); 117 static __inline int sequential_heuristic(struct uio *, struct file *); 118 119 extern struct lock devfs_lock; 120 121 static int mpsafe_reads, mpsafe_writes, mplock_reads, mplock_writes; 122 123 /* 124 * devfs vnode operations for regular files 125 */ 126 struct vop_ops devfs_vnode_norm_vops = { 127 .vop_default = vop_defaultop, 128 .vop_access = devfs_access, 129 .vop_advlock = DEVFS_BADOP, 130 .vop_bmap = DEVFS_BADOP, 131 .vop_close = vop_stdclose, 132 .vop_getattr = devfs_getattr, 133 .vop_inactive = devfs_inactive, 134 .vop_ncreate = DEVFS_BADOP, 135 .vop_nresolve = devfs_nresolve, 136 .vop_nlookupdotdot = devfs_nlookupdotdot, 137 .vop_nlink = DEVFS_BADOP, 138 .vop_nmkdir = devfs_nmkdir, 139 .vop_nmknod = DEVFS_BADOP, 140 .vop_nremove = devfs_nremove, 141 .vop_nrename = DEVFS_BADOP, 142 .vop_nrmdir = devfs_nrmdir, 143 .vop_nsymlink = devfs_nsymlink, 144 .vop_open = vop_stdopen, 145 .vop_pathconf = vop_stdpathconf, 146 .vop_print = devfs_print, 147 .vop_read = DEVFS_BADOP, 148 .vop_readdir = devfs_readdir, 149 .vop_readlink = devfs_readlink, 150 .vop_reclaim = devfs_reclaim, 151 .vop_setattr = devfs_setattr, 152 .vop_write = DEVFS_BADOP, 153 .vop_ioctl = DEVFS_BADOP 154 }; 155 156 /* 157 * devfs vnode operations for character devices 158 */ 159 struct vop_ops devfs_vnode_dev_vops = { 160 .vop_default = vop_defaultop, 161 .vop_access = devfs_access, 162 .vop_advlock = devfs_spec_advlock, 163 .vop_bmap = devfs_spec_bmap, 164 .vop_close = devfs_spec_close, 165 .vop_freeblks = devfs_spec_freeblks, 166 .vop_fsync = devfs_spec_fsync, 167 .vop_getattr = devfs_getattr, 168 .vop_getpages = devfs_spec_getpages, 169 .vop_inactive = devfs_inactive, 170 .vop_open = devfs_spec_open, 171 .vop_pathconf = vop_stdpathconf, 172 .vop_print = devfs_print, 173 .vop_poll = devfs_spec_poll, 174 .vop_kqfilter = devfs_spec_kqfilter, 175 .vop_read = devfs_spec_read, 176 .vop_readdir = DEVFS_BADOP, 177 .vop_readlink = DEVFS_BADOP, 178 .vop_reclaim = devfs_reclaim, 179 .vop_setattr = devfs_setattr, 180 .vop_strategy = devfs_spec_strategy, 181 .vop_write = devfs_spec_write, 182 .vop_ioctl = devfs_spec_ioctl 183 }; 184 185 struct vop_ops *devfs_vnode_dev_vops_p = &devfs_vnode_dev_vops; 186 187 struct fileops devfs_dev_fileops = { 188 .fo_read = devfs_specf_read, 189 .fo_write = devfs_specf_write, 190 .fo_ioctl = devfs_specf_ioctl, 191 .fo_poll = devfs_specf_poll, 192 .fo_kqfilter = devfs_specf_kqfilter, 193 .fo_stat = devfs_specf_stat, 194 .fo_close = devfs_specf_close, 195 .fo_shutdown = nofo_shutdown 196 }; 197 198 /* 199 * These two functions are possibly temporary hacks for 200 * devices (aka the pty code) which want to control the 201 * node attributes themselves. 202 * 203 * XXX we may ultimately desire to simply remove the uid/gid/mode 204 * from the node entirely. 205 */ 206 static __inline void 207 node_sync_dev_get(struct devfs_node *node) 208 { 209 cdev_t dev; 210 211 if ((dev = node->d_dev) && (dev->si_flags & SI_OVERRIDE)) { 212 node->uid = dev->si_uid; 213 node->gid = dev->si_gid; 214 node->mode = dev->si_perms; 215 } 216 } 217 218 static __inline void 219 node_sync_dev_set(struct devfs_node *node) 220 { 221 cdev_t dev; 222 223 if ((dev = node->d_dev) && (dev->si_flags & SI_OVERRIDE)) { 224 dev->si_uid = node->uid; 225 dev->si_gid = node->gid; 226 dev->si_perms = node->mode; 227 } 228 } 229 230 /* 231 * generic entry point for unsupported operations 232 */ 233 static int 234 devfs_badop(struct vop_generic_args *ap) 235 { 236 return (EIO); 237 } 238 239 240 static int 241 devfs_access(struct vop_access_args *ap) 242 { 243 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 244 int error; 245 246 if (!devfs_node_is_accessible(node)) 247 return ENOENT; 248 node_sync_dev_get(node); 249 error = vop_helper_access(ap, node->uid, node->gid, 250 node->mode, node->flags); 251 252 return error; 253 } 254 255 256 static int 257 devfs_inactive(struct vop_inactive_args *ap) 258 { 259 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 260 261 if (node == NULL || (node->flags & DEVFS_NODE_LINKED) == 0) 262 vrecycle(ap->a_vp); 263 return 0; 264 } 265 266 267 static int 268 devfs_reclaim(struct vop_reclaim_args *ap) 269 { 270 struct devfs_node *node; 271 struct vnode *vp; 272 int locked; 273 274 /* 275 * Check if it is locked already. if not, we acquire the devfs lock 276 */ 277 if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) { 278 lockmgr(&devfs_lock, LK_EXCLUSIVE); 279 locked = 1; 280 } else { 281 locked = 0; 282 } 283 284 /* 285 * Get rid of the devfs_node if it is no longer linked into the 286 * topology. 287 */ 288 vp = ap->a_vp; 289 if ((node = DEVFS_NODE(vp)) != NULL) { 290 node->v_node = NULL; 291 if ((node->flags & DEVFS_NODE_LINKED) == 0) 292 devfs_freep(node); 293 } 294 295 if (locked) 296 lockmgr(&devfs_lock, LK_RELEASE); 297 298 /* 299 * v_rdev needs to be properly released using v_release_rdev 300 * Make sure v_data is NULL as well. 301 */ 302 vp->v_data = NULL; 303 v_release_rdev(vp); 304 return 0; 305 } 306 307 308 static int 309 devfs_readdir(struct vop_readdir_args *ap) 310 { 311 struct devfs_node *dnode = DEVFS_NODE(ap->a_vp); 312 struct devfs_node *node; 313 int cookie_index; 314 int ncookies; 315 int error2; 316 int error; 317 int r; 318 off_t *cookies; 319 off_t saveoff; 320 321 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readdir() called!\n"); 322 323 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) 324 return (EINVAL); 325 if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0) 326 return (error); 327 328 if (!devfs_node_is_accessible(dnode)) { 329 vn_unlock(ap->a_vp); 330 return ENOENT; 331 } 332 333 lockmgr(&devfs_lock, LK_EXCLUSIVE); 334 335 saveoff = ap->a_uio->uio_offset; 336 337 if (ap->a_ncookies) { 338 ncookies = ap->a_uio->uio_resid / 16 + 1; /* Why / 16 ?? */ 339 if (ncookies > 256) 340 ncookies = 256; 341 cookies = kmalloc(256 * sizeof(off_t), M_TEMP, M_WAITOK); 342 cookie_index = 0; 343 } else { 344 ncookies = -1; 345 cookies = NULL; 346 cookie_index = 0; 347 } 348 349 nanotime(&dnode->atime); 350 351 if (saveoff == 0) { 352 r = vop_write_dirent(&error, ap->a_uio, dnode->d_dir.d_ino, 353 DT_DIR, 1, "."); 354 if (r) 355 goto done; 356 if (cookies) 357 cookies[cookie_index] = saveoff; 358 saveoff++; 359 cookie_index++; 360 if (cookie_index == ncookies) 361 goto done; 362 } 363 364 if (saveoff == 1) { 365 if (dnode->parent) { 366 r = vop_write_dirent(&error, ap->a_uio, 367 dnode->parent->d_dir.d_ino, 368 DT_DIR, 2, ".."); 369 } else { 370 r = vop_write_dirent(&error, ap->a_uio, 371 dnode->d_dir.d_ino, 372 DT_DIR, 2, ".."); 373 } 374 if (r) 375 goto done; 376 if (cookies) 377 cookies[cookie_index] = saveoff; 378 saveoff++; 379 cookie_index++; 380 if (cookie_index == ncookies) 381 goto done; 382 } 383 384 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) { 385 if ((node->flags & DEVFS_HIDDEN) || 386 (node->flags & DEVFS_INVISIBLE)) { 387 continue; 388 } 389 390 /* 391 * If the node type is a valid devfs alias, then we make sure that the 392 * target isn't hidden. If it is, we don't show the link in the 393 * directory listing. 394 */ 395 if ((node->node_type == Plink) && (node->link_target != NULL) && 396 (node->link_target->flags & DEVFS_HIDDEN)) 397 continue; 398 399 if (node->cookie < saveoff) 400 continue; 401 402 saveoff = node->cookie; 403 404 error2 = vop_write_dirent(&error, ap->a_uio, node->d_dir.d_ino, 405 node->d_dir.d_type, 406 node->d_dir.d_namlen, 407 node->d_dir.d_name); 408 409 if (error2) 410 break; 411 412 saveoff++; 413 414 if (cookies) 415 cookies[cookie_index] = node->cookie; 416 ++cookie_index; 417 if (cookie_index == ncookies) 418 break; 419 } 420 421 done: 422 lockmgr(&devfs_lock, LK_RELEASE); 423 vn_unlock(ap->a_vp); 424 425 ap->a_uio->uio_offset = saveoff; 426 if (error && cookie_index == 0) { 427 if (cookies) { 428 kfree(cookies, M_TEMP); 429 *ap->a_ncookies = 0; 430 *ap->a_cookies = NULL; 431 } 432 } else { 433 if (cookies) { 434 *ap->a_ncookies = cookie_index; 435 *ap->a_cookies = cookies; 436 } 437 } 438 return (error); 439 } 440 441 442 static int 443 devfs_nresolve(struct vop_nresolve_args *ap) 444 { 445 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 446 struct devfs_node *node, *found = NULL; 447 struct namecache *ncp; 448 struct vnode *vp = NULL; 449 int error = 0; 450 int len; 451 int depth; 452 453 ncp = ap->a_nch->ncp; 454 len = ncp->nc_nlen; 455 456 if (!devfs_node_is_accessible(dnode)) 457 return ENOENT; 458 459 lockmgr(&devfs_lock, LK_EXCLUSIVE); 460 461 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) { 462 error = ENOENT; 463 cache_setvp(ap->a_nch, NULL); 464 goto out; 465 } 466 467 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) { 468 if (len == node->d_dir.d_namlen) { 469 if (!memcmp(ncp->nc_name, node->d_dir.d_name, len)) { 470 found = node; 471 break; 472 } 473 } 474 } 475 476 if (found) { 477 depth = 0; 478 while ((found->node_type == Plink) && (found->link_target)) { 479 if (depth >= 8) { 480 devfs_debug(DEVFS_DEBUG_SHOW, "Recursive link or depth >= 8"); 481 break; 482 } 483 484 found = found->link_target; 485 ++depth; 486 } 487 488 if (!(found->flags & DEVFS_HIDDEN)) 489 devfs_allocv(/*ap->a_dvp->v_mount, */ &vp, found); 490 } 491 492 if (vp == NULL) { 493 error = ENOENT; 494 cache_setvp(ap->a_nch, NULL); 495 goto out; 496 497 } 498 KKASSERT(vp); 499 vn_unlock(vp); 500 cache_setvp(ap->a_nch, vp); 501 vrele(vp); 502 out: 503 lockmgr(&devfs_lock, LK_RELEASE); 504 505 return error; 506 } 507 508 509 static int 510 devfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 511 { 512 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 513 514 *ap->a_vpp = NULL; 515 if (!devfs_node_is_accessible(dnode)) 516 return ENOENT; 517 518 lockmgr(&devfs_lock, LK_EXCLUSIVE); 519 if (dnode->parent != NULL) { 520 devfs_allocv(ap->a_vpp, dnode->parent); 521 vn_unlock(*ap->a_vpp); 522 } 523 lockmgr(&devfs_lock, LK_RELEASE); 524 525 return ((*ap->a_vpp == NULL) ? ENOENT : 0); 526 } 527 528 529 static int 530 devfs_getattr(struct vop_getattr_args *ap) 531 { 532 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 533 struct vattr *vap = ap->a_vap; 534 struct partinfo pinfo; 535 int error = 0; 536 537 #if 0 538 if (!devfs_node_is_accessible(node)) 539 return ENOENT; 540 #endif 541 node_sync_dev_get(node); 542 543 lockmgr(&devfs_lock, LK_EXCLUSIVE); 544 545 /* start by zeroing out the attributes */ 546 VATTR_NULL(vap); 547 548 /* next do all the common fields */ 549 vap->va_type = ap->a_vp->v_type; 550 vap->va_mode = node->mode; 551 vap->va_fileid = DEVFS_NODE(ap->a_vp)->d_dir.d_ino ; 552 vap->va_flags = 0; 553 vap->va_blocksize = DEV_BSIZE; 554 vap->va_bytes = vap->va_size = 0; 555 556 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 557 558 vap->va_atime = node->atime; 559 vap->va_mtime = node->mtime; 560 vap->va_ctime = node->ctime; 561 562 vap->va_nlink = 1; /* number of references to file */ 563 564 vap->va_uid = node->uid; 565 vap->va_gid = node->gid; 566 567 vap->va_rmajor = 0; 568 vap->va_rminor = 0; 569 570 if ((node->node_type == Pdev) && node->d_dev) { 571 reference_dev(node->d_dev); 572 vap->va_rminor = node->d_dev->si_uminor; 573 release_dev(node->d_dev); 574 } 575 576 /* For a softlink the va_size is the length of the softlink */ 577 if (node->symlink_name != 0) { 578 vap->va_bytes = vap->va_size = node->symlink_namelen; 579 } 580 581 /* 582 * For a disk-type device, va_size is the size of the underlying 583 * device, so that lseek() works properly. 584 */ 585 if ((node->d_dev) && (dev_dflags(node->d_dev) & D_DISK)) { 586 bzero(&pinfo, sizeof(pinfo)); 587 error = dev_dioctl(node->d_dev, DIOCGPART, (void *)&pinfo, 588 0, proc0.p_ucred, NULL); 589 if ((error == 0) && (pinfo.media_blksize != 0)) { 590 vap->va_size = pinfo.media_size; 591 } else { 592 vap->va_size = 0; 593 error = 0; 594 } 595 } 596 597 lockmgr(&devfs_lock, LK_RELEASE); 598 599 return (error); 600 } 601 602 603 static int 604 devfs_setattr(struct vop_setattr_args *ap) 605 { 606 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 607 struct vattr *vap; 608 int error = 0; 609 610 if (!devfs_node_is_accessible(node)) 611 return ENOENT; 612 node_sync_dev_get(node); 613 614 lockmgr(&devfs_lock, LK_EXCLUSIVE); 615 616 vap = ap->a_vap; 617 618 if (vap->va_uid != (uid_t)VNOVAL) { 619 if ((ap->a_cred->cr_uid != node->uid) && 620 (!groupmember(node->gid, ap->a_cred))) { 621 error = priv_check(curthread, PRIV_VFS_CHOWN); 622 if (error) 623 goto out; 624 } 625 node->uid = vap->va_uid; 626 } 627 628 if (vap->va_gid != (uid_t)VNOVAL) { 629 if ((ap->a_cred->cr_uid != node->uid) && 630 (!groupmember(node->gid, ap->a_cred))) { 631 error = priv_check(curthread, PRIV_VFS_CHOWN); 632 if (error) 633 goto out; 634 } 635 node->gid = vap->va_gid; 636 } 637 638 if (vap->va_mode != (mode_t)VNOVAL) { 639 if (ap->a_cred->cr_uid != node->uid) { 640 error = priv_check(curthread, PRIV_VFS_ADMIN); 641 if (error) 642 goto out; 643 } 644 node->mode = vap->va_mode; 645 } 646 647 out: 648 node_sync_dev_set(node); 649 nanotime(&node->ctime); 650 lockmgr(&devfs_lock, LK_RELEASE); 651 652 return error; 653 } 654 655 656 static int 657 devfs_readlink(struct vop_readlink_args *ap) 658 { 659 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 660 int ret; 661 662 if (!devfs_node_is_accessible(node)) 663 return ENOENT; 664 665 lockmgr(&devfs_lock, LK_EXCLUSIVE); 666 ret = uiomove(node->symlink_name, node->symlink_namelen, ap->a_uio); 667 lockmgr(&devfs_lock, LK_RELEASE); 668 669 return ret; 670 } 671 672 673 static int 674 devfs_print(struct vop_print_args *ap) 675 { 676 return (0); 677 } 678 679 static int 680 devfs_nmkdir(struct vop_nmkdir_args *ap) 681 { 682 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 683 struct devfs_node *node; 684 685 if (!devfs_node_is_accessible(dnode)) 686 return ENOENT; 687 688 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) 689 goto out; 690 691 lockmgr(&devfs_lock, LK_EXCLUSIVE); 692 devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Pdir, 693 ap->a_nch->ncp->nc_name, dnode, NULL); 694 695 if (*ap->a_vpp) { 696 node = DEVFS_NODE(*ap->a_vpp); 697 node->flags |= DEVFS_USER_CREATED; 698 cache_setunresolved(ap->a_nch); 699 cache_setvp(ap->a_nch, *ap->a_vpp); 700 } 701 lockmgr(&devfs_lock, LK_RELEASE); 702 out: 703 return ((*ap->a_vpp == NULL) ? ENOTDIR : 0); 704 } 705 706 static int 707 devfs_nsymlink(struct vop_nsymlink_args *ap) 708 { 709 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 710 struct devfs_node *node; 711 size_t targetlen; 712 713 if (!devfs_node_is_accessible(dnode)) 714 return ENOENT; 715 716 ap->a_vap->va_type = VLNK; 717 718 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) 719 goto out; 720 721 lockmgr(&devfs_lock, LK_EXCLUSIVE); 722 devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Plink, 723 ap->a_nch->ncp->nc_name, dnode, NULL); 724 725 targetlen = strlen(ap->a_target); 726 if (*ap->a_vpp) { 727 node = DEVFS_NODE(*ap->a_vpp); 728 node->flags |= DEVFS_USER_CREATED; 729 node->symlink_namelen = targetlen; 730 node->symlink_name = kmalloc(targetlen + 1, M_DEVFS, M_WAITOK); 731 memcpy(node->symlink_name, ap->a_target, targetlen); 732 node->symlink_name[targetlen] = '\0'; 733 cache_setunresolved(ap->a_nch); 734 cache_setvp(ap->a_nch, *ap->a_vpp); 735 } 736 lockmgr(&devfs_lock, LK_RELEASE); 737 out: 738 return ((*ap->a_vpp == NULL) ? ENOTDIR : 0); 739 } 740 741 static int 742 devfs_nrmdir(struct vop_nrmdir_args *ap) 743 { 744 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 745 struct devfs_node *node; 746 struct namecache *ncp; 747 int error = ENOENT; 748 749 ncp = ap->a_nch->ncp; 750 751 if (!devfs_node_is_accessible(dnode)) 752 return ENOENT; 753 754 lockmgr(&devfs_lock, LK_EXCLUSIVE); 755 756 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) 757 goto out; 758 759 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) { 760 if (ncp->nc_nlen != node->d_dir.d_namlen) 761 continue; 762 if (memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen)) 763 continue; 764 765 /* 766 * only allow removal of user created dirs 767 */ 768 if ((node->flags & DEVFS_USER_CREATED) == 0) { 769 error = EPERM; 770 goto out; 771 } else if (node->node_type != Pdir) { 772 error = ENOTDIR; 773 goto out; 774 } else if (node->nchildren > 2) { 775 error = ENOTEMPTY; 776 goto out; 777 } else { 778 if (node->v_node) 779 cache_inval_vp(node->v_node, CINV_DESTROY); 780 devfs_unlinkp(node); 781 error = 0; 782 break; 783 } 784 } 785 786 cache_setunresolved(ap->a_nch); 787 cache_setvp(ap->a_nch, NULL); 788 789 out: 790 lockmgr(&devfs_lock, LK_RELEASE); 791 return error; 792 } 793 794 static int 795 devfs_nremove(struct vop_nremove_args *ap) 796 { 797 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp); 798 struct devfs_node *node; 799 struct namecache *ncp; 800 int error = ENOENT; 801 802 ncp = ap->a_nch->ncp; 803 804 if (!devfs_node_is_accessible(dnode)) 805 return ENOENT; 806 807 lockmgr(&devfs_lock, LK_EXCLUSIVE); 808 809 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) 810 goto out; 811 812 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) { 813 if (ncp->nc_nlen != node->d_dir.d_namlen) 814 continue; 815 if (memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen)) 816 continue; 817 818 /* 819 * only allow removal of user created stuff (e.g. symlinks) 820 */ 821 if ((node->flags & DEVFS_USER_CREATED) == 0) { 822 error = EPERM; 823 goto out; 824 } else if (node->node_type == Pdir) { 825 error = EISDIR; 826 goto out; 827 } else { 828 if (node->v_node) 829 cache_inval_vp(node->v_node, CINV_DESTROY); 830 devfs_unlinkp(node); 831 error = 0; 832 break; 833 } 834 } 835 836 cache_setunresolved(ap->a_nch); 837 cache_setvp(ap->a_nch, NULL); 838 839 out: 840 lockmgr(&devfs_lock, LK_RELEASE); 841 return error; 842 } 843 844 845 static int 846 devfs_spec_open(struct vop_open_args *ap) 847 { 848 struct vnode *vp = ap->a_vp; 849 struct vnode *orig_vp = NULL; 850 struct devfs_node *node = DEVFS_NODE(vp); 851 struct devfs_node *newnode; 852 cdev_t dev, ndev = NULL; 853 int error = 0; 854 855 if (node) { 856 if (node->d_dev == NULL) 857 return ENXIO; 858 if (!devfs_node_is_accessible(node)) 859 return ENOENT; 860 } 861 862 if ((dev = vp->v_rdev) == NULL) 863 return ENXIO; 864 865 if (node && ap->a_fp) { 866 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n"); 867 lockmgr(&devfs_lock, LK_EXCLUSIVE); 868 869 ndev = devfs_clone(dev, node->d_dir.d_name, node->d_dir.d_namlen, 870 ap->a_mode, ap->a_cred); 871 if (ndev != NULL) { 872 newnode = devfs_create_device_node( 873 DEVFS_MNTDATA(vp->v_mount)->root_node, 874 ndev, NULL, NULL); 875 /* XXX: possibly destroy device if this happens */ 876 877 if (newnode != NULL) { 878 dev = ndev; 879 devfs_link_dev(dev); 880 881 devfs_debug(DEVFS_DEBUG_DEBUG, 882 "parent here is: %s, node is: |%s|\n", 883 ((node->parent->node_type == Proot) ? 884 "ROOT!" : node->parent->d_dir.d_name), 885 newnode->d_dir.d_name); 886 devfs_debug(DEVFS_DEBUG_DEBUG, 887 "test: %s\n", 888 ((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(node->parent), devfs_node_head)))->d_dir.d_name); 889 890 /* 891 * orig_vp is set to the original vp if we cloned. 892 */ 893 /* node->flags |= DEVFS_CLONED; */ 894 devfs_allocv(&vp, newnode); 895 orig_vp = ap->a_vp; 896 ap->a_vp = vp; 897 } 898 } 899 lockmgr(&devfs_lock, LK_RELEASE); 900 } 901 902 devfs_debug(DEVFS_DEBUG_DEBUG, 903 "devfs_spec_open() called on %s! \n", 904 dev->si_name); 905 906 /* 907 * Make this field valid before any I/O in ->d_open 908 */ 909 if (!dev->si_iosize_max) 910 dev->si_iosize_max = DFLTPHYS; 911 912 if (dev_dflags(dev) & D_TTY) 913 vsetflags(vp, VISTTY); 914 915 vn_unlock(vp); 916 error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred); 917 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 918 919 /* 920 * Clean up any cloned vp if we error out. 921 */ 922 if (error) { 923 if (orig_vp) { 924 vput(vp); 925 ap->a_vp = orig_vp; 926 /* orig_vp = NULL; */ 927 } 928 return error; 929 } 930 931 /* 932 * This checks if the disk device is going to be opened for writing. 933 * It will be only allowed in the cases where securelevel permits it 934 * and it's not mounted R/W. 935 */ 936 if ((dev_dflags(dev) & D_DISK) && (ap->a_mode & FWRITE) && 937 (ap->a_cred != FSCRED)) { 938 939 /* Very secure mode. No open for writing allowed */ 940 if (securelevel >= 2) 941 return EPERM; 942 943 /* 944 * If it is mounted R/W, do not allow to open for writing. 945 * In the case it's mounted read-only but securelevel 946 * is >= 1, then do not allow opening for writing either. 947 */ 948 if (vfs_mountedon(vp)) { 949 if (!(dev->si_mountpoint->mnt_flag & MNT_RDONLY)) 950 return EBUSY; 951 else if (securelevel >= 1) 952 return EPERM; 953 } 954 } 955 956 if (dev_dflags(dev) & D_TTY) { 957 if (dev->si_tty) { 958 struct tty *tp; 959 tp = dev->si_tty; 960 if (!tp->t_stop) { 961 devfs_debug(DEVFS_DEBUG_DEBUG, 962 "devfs: no t_stop\n"); 963 tp->t_stop = nottystop; 964 } 965 } 966 } 967 968 969 if (vn_isdisk(vp, NULL)) { 970 if (!dev->si_bsize_phys) 971 dev->si_bsize_phys = DEV_BSIZE; 972 vinitvmio(vp, IDX_TO_OFF(INT_MAX), PAGE_SIZE, -1); 973 } 974 975 vop_stdopen(ap); 976 #if 0 977 if (node) 978 nanotime(&node->atime); 979 #endif 980 981 if (orig_vp) 982 vn_unlock(vp); 983 984 /* Ugly pty magic, to make pty devices appear once they are opened */ 985 if (node && (node->flags & DEVFS_PTY) == DEVFS_PTY) 986 node->flags &= ~DEVFS_INVISIBLE; 987 988 if (ap->a_fp) { 989 ap->a_fp->f_type = DTYPE_VNODE; 990 ap->a_fp->f_flag = ap->a_mode & FMASK; 991 ap->a_fp->f_ops = &devfs_dev_fileops; 992 ap->a_fp->f_data = vp; 993 } 994 995 return 0; 996 } 997 998 999 static int 1000 devfs_spec_close(struct vop_close_args *ap) 1001 { 1002 struct devfs_node *node = DEVFS_NODE(ap->a_vp); 1003 struct proc *p = curproc; 1004 struct vnode *vp = ap->a_vp; 1005 cdev_t dev = vp->v_rdev; 1006 int error = 0; 1007 int needrelock; 1008 1009 devfs_debug(DEVFS_DEBUG_DEBUG, 1010 "devfs_spec_close() called on %s! \n", 1011 dev->si_name); 1012 1013 /* 1014 * A couple of hacks for devices and tty devices. The 1015 * vnode ref count cannot be used to figure out the 1016 * last close, but we can use v_opencount now that 1017 * revoke works properly. 1018 * 1019 * Detect the last close on a controlling terminal and clear 1020 * the session (half-close). 1021 */ 1022 if (dev) 1023 reference_dev(dev); 1024 1025 if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) { 1026 p->p_session->s_ttyvp = NULL; 1027 vrele(vp); 1028 } 1029 1030 /* 1031 * Vnodes can be opened and closed multiple times. Do not really 1032 * close the device unless (1) it is being closed forcibly, 1033 * (2) the device wants to track closes, or (3) this is the last 1034 * vnode doing its last close on the device. 1035 * 1036 * XXX the VXLOCK (force close) case can leave vnodes referencing 1037 * a closed device. This might not occur now that our revoke is 1038 * fixed. 1039 */ 1040 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n"); 1041 if (dev && ((vp->v_flag & VRECLAIMED) || 1042 (dev_dflags(dev) & D_TRACKCLOSE) || 1043 (vp->v_opencount == 1))) { 1044 /* 1045 * Unlock around dev_dclose() 1046 */ 1047 needrelock = 0; 1048 if (vn_islocked(vp)) { 1049 needrelock = 1; 1050 vn_unlock(vp); 1051 } 1052 error = dev_dclose(dev, ap->a_fflag, S_IFCHR); 1053 1054 /* 1055 * Ugly pty magic, to make pty devices disappear again once 1056 * they are closed 1057 */ 1058 if (node && (node->flags & DEVFS_PTY) == DEVFS_PTY) 1059 node->flags |= DEVFS_INVISIBLE; 1060 1061 if (needrelock) 1062 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1063 } else { 1064 error = 0; 1065 } 1066 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n"); 1067 1068 /* 1069 * Track the actual opens and closes on the vnode. The last close 1070 * disassociates the rdev. If the rdev is already disassociated or 1071 * the opencount is already 0, the vnode might have been revoked 1072 * and no further opencount tracking occurs. 1073 */ 1074 if (dev) 1075 release_dev(dev); 1076 if (vp->v_opencount > 0) 1077 vop_stdclose(ap); 1078 return(error); 1079 1080 } 1081 1082 1083 static int 1084 devfs_specf_close(struct file *fp) 1085 { 1086 struct vnode *vp = (struct vnode *)fp->f_data; 1087 int error; 1088 1089 get_mplock(); 1090 fp->f_ops = &badfileops; 1091 error = vn_close(vp, fp->f_flag); 1092 rel_mplock(); 1093 1094 return (error); 1095 } 1096 1097 1098 /* 1099 * Device-optimized file table vnode read routine. 1100 * 1101 * This bypasses the VOP table and talks directly to the device. Most 1102 * filesystems just route to specfs and can make this optimization. 1103 * 1104 * MPALMOSTSAFE - acquires mplock 1105 */ 1106 static int 1107 devfs_specf_read(struct file *fp, struct uio *uio, 1108 struct ucred *cred, int flags) 1109 { 1110 struct devfs_node *node; 1111 struct vnode *vp; 1112 int ioflag; 1113 int error; 1114 cdev_t dev; 1115 1116 KASSERT(uio->uio_td == curthread, 1117 ("uio_td %p is not td %p", uio->uio_td, curthread)); 1118 1119 if (uio->uio_resid == 0) 1120 return 0; 1121 1122 vp = (struct vnode *)fp->f_data; 1123 if (vp == NULL || vp->v_type == VBAD) 1124 return EBADF; 1125 1126 node = DEVFS_NODE(vp); 1127 1128 if ((dev = vp->v_rdev) == NULL) 1129 return EBADF; 1130 1131 /* only acquire mplock for devices that require it */ 1132 if (!(dev_dflags(dev) & D_MPSAFE_READ)) { 1133 atomic_add_int(&mplock_reads, 1); 1134 get_mplock(); 1135 } else { 1136 atomic_add_int(&mpsafe_reads, 1); 1137 } 1138 1139 reference_dev(dev); 1140 1141 if ((flags & O_FOFFSET) == 0) 1142 uio->uio_offset = fp->f_offset; 1143 1144 ioflag = 0; 1145 if (flags & O_FBLOCKING) { 1146 /* ioflag &= ~IO_NDELAY; */ 1147 } else if (flags & O_FNONBLOCKING) { 1148 ioflag |= IO_NDELAY; 1149 } else if (fp->f_flag & FNONBLOCK) { 1150 ioflag |= IO_NDELAY; 1151 } 1152 if (flags & O_FBUFFERED) { 1153 /* ioflag &= ~IO_DIRECT; */ 1154 } else if (flags & O_FUNBUFFERED) { 1155 ioflag |= IO_DIRECT; 1156 } else if (fp->f_flag & O_DIRECT) { 1157 ioflag |= IO_DIRECT; 1158 } 1159 ioflag |= sequential_heuristic(uio, fp); 1160 1161 error = dev_dread(dev, uio, ioflag); 1162 1163 release_dev(dev); 1164 if (node) 1165 nanotime(&node->atime); 1166 if ((flags & O_FOFFSET) == 0) 1167 fp->f_offset = uio->uio_offset; 1168 fp->f_nextoff = uio->uio_offset; 1169 1170 if (!(dev_dflags(dev) & D_MPSAFE_READ)) 1171 rel_mplock(); 1172 1173 return (error); 1174 } 1175 1176 1177 static int 1178 devfs_specf_write(struct file *fp, struct uio *uio, 1179 struct ucred *cred, int flags) 1180 { 1181 struct devfs_node *node; 1182 struct vnode *vp; 1183 int ioflag; 1184 int error; 1185 cdev_t dev; 1186 1187 KASSERT(uio->uio_td == curthread, 1188 ("uio_td %p is not p %p", uio->uio_td, curthread)); 1189 1190 vp = (struct vnode *)fp->f_data; 1191 if (vp == NULL || vp->v_type == VBAD) 1192 return EBADF; 1193 1194 node = DEVFS_NODE(vp); 1195 1196 if (vp->v_type == VREG) 1197 bwillwrite(uio->uio_resid); 1198 1199 vp = (struct vnode *)fp->f_data; 1200 1201 if ((dev = vp->v_rdev) == NULL) 1202 return EBADF; 1203 1204 /* only acquire mplock for devices that require it */ 1205 if (!(dev_dflags(dev) & D_MPSAFE_WRITE)) { 1206 atomic_add_int(&mplock_writes, 1); 1207 get_mplock(); 1208 } else { 1209 atomic_add_int(&mpsafe_writes, 1); 1210 } 1211 1212 reference_dev(dev); 1213 1214 if ((flags & O_FOFFSET) == 0) 1215 uio->uio_offset = fp->f_offset; 1216 1217 ioflag = IO_UNIT; 1218 if (vp->v_type == VREG && 1219 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) { 1220 ioflag |= IO_APPEND; 1221 } 1222 1223 if (flags & O_FBLOCKING) { 1224 /* ioflag &= ~IO_NDELAY; */ 1225 } else if (flags & O_FNONBLOCKING) { 1226 ioflag |= IO_NDELAY; 1227 } else if (fp->f_flag & FNONBLOCK) { 1228 ioflag |= IO_NDELAY; 1229 } 1230 if (flags & O_FBUFFERED) { 1231 /* ioflag &= ~IO_DIRECT; */ 1232 } else if (flags & O_FUNBUFFERED) { 1233 ioflag |= IO_DIRECT; 1234 } else if (fp->f_flag & O_DIRECT) { 1235 ioflag |= IO_DIRECT; 1236 } 1237 if (flags & O_FASYNCWRITE) { 1238 /* ioflag &= ~IO_SYNC; */ 1239 } else if (flags & O_FSYNCWRITE) { 1240 ioflag |= IO_SYNC; 1241 } else if (fp->f_flag & O_FSYNC) { 1242 ioflag |= IO_SYNC; 1243 } 1244 1245 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)) 1246 ioflag |= IO_SYNC; 1247 ioflag |= sequential_heuristic(uio, fp); 1248 1249 error = dev_dwrite(dev, uio, ioflag); 1250 1251 release_dev(dev); 1252 if (node) { 1253 nanotime(&node->atime); 1254 nanotime(&node->mtime); 1255 } 1256 1257 if ((flags & O_FOFFSET) == 0) 1258 fp->f_offset = uio->uio_offset; 1259 fp->f_nextoff = uio->uio_offset; 1260 1261 if (!(dev_dflags(dev) & D_MPSAFE_WRITE)) 1262 rel_mplock(); 1263 return (error); 1264 } 1265 1266 1267 static int 1268 devfs_specf_stat(struct file *fp, struct stat *sb, struct ucred *cred) 1269 { 1270 struct vnode *vp; 1271 struct vattr vattr; 1272 struct vattr *vap; 1273 u_short mode; 1274 cdev_t dev; 1275 int error; 1276 1277 vp = (struct vnode *)fp->f_data; 1278 if (vp == NULL || vp->v_type == VBAD) 1279 return EBADF; 1280 1281 error = vn_stat(vp, sb, cred); 1282 if (error) 1283 return (error); 1284 1285 vap = &vattr; 1286 error = VOP_GETATTR(vp, vap); 1287 if (error) 1288 return (error); 1289 1290 /* 1291 * Zero the spare stat fields 1292 */ 1293 sb->st_lspare = 0; 1294 sb->st_qspare1 = 0; 1295 sb->st_qspare2 = 0; 1296 1297 /* 1298 * Copy from vattr table ... or not in case it's a cloned device 1299 */ 1300 if (vap->va_fsid != VNOVAL) 1301 sb->st_dev = vap->va_fsid; 1302 else 1303 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 1304 1305 sb->st_ino = vap->va_fileid; 1306 1307 mode = vap->va_mode; 1308 mode |= S_IFCHR; 1309 sb->st_mode = mode; 1310 1311 if (vap->va_nlink > (nlink_t)-1) 1312 sb->st_nlink = (nlink_t)-1; 1313 else 1314 sb->st_nlink = vap->va_nlink; 1315 1316 sb->st_uid = vap->va_uid; 1317 sb->st_gid = vap->va_gid; 1318 sb->st_rdev = dev2udev(DEVFS_NODE(vp)->d_dev); 1319 sb->st_size = vap->va_bytes; 1320 sb->st_atimespec = vap->va_atime; 1321 sb->st_mtimespec = vap->va_mtime; 1322 sb->st_ctimespec = vap->va_ctime; 1323 1324 /* 1325 * A VCHR and VBLK device may track the last access and last modified 1326 * time independantly of the filesystem. This is particularly true 1327 * because device read and write calls may bypass the filesystem. 1328 */ 1329 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1330 dev = vp->v_rdev; 1331 if (dev != NULL) { 1332 if (dev->si_lastread) { 1333 sb->st_atimespec.tv_sec = dev->si_lastread; 1334 sb->st_atimespec.tv_nsec = 0; 1335 } 1336 if (dev->si_lastwrite) { 1337 sb->st_atimespec.tv_sec = dev->si_lastwrite; 1338 sb->st_atimespec.tv_nsec = 0; 1339 } 1340 } 1341 } 1342 1343 /* 1344 * According to www.opengroup.org, the meaning of st_blksize is 1345 * "a filesystem-specific preferred I/O block size for this 1346 * object. In some filesystem types, this may vary from file 1347 * to file" 1348 * Default to PAGE_SIZE after much discussion. 1349 */ 1350 1351 sb->st_blksize = PAGE_SIZE; 1352 1353 sb->st_flags = vap->va_flags; 1354 1355 error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0); 1356 if (error) 1357 sb->st_gen = 0; 1358 else 1359 sb->st_gen = (u_int32_t)vap->va_gen; 1360 1361 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 1362 1363 return (0); 1364 } 1365 1366 1367 static int 1368 devfs_specf_kqfilter(struct file *fp, struct knote *kn) 1369 { 1370 struct vnode *vp; 1371 int error; 1372 cdev_t dev; 1373 1374 get_mplock(); 1375 1376 vp = (struct vnode *)fp->f_data; 1377 if (vp == NULL || vp->v_type == VBAD) { 1378 error = EBADF; 1379 goto done; 1380 } 1381 if ((dev = vp->v_rdev) == NULL) { 1382 error = EBADF; 1383 goto done; 1384 } 1385 reference_dev(dev); 1386 1387 error = dev_dkqfilter(dev, kn); 1388 1389 release_dev(dev); 1390 1391 done: 1392 rel_mplock(); 1393 return (error); 1394 } 1395 1396 1397 static int 1398 devfs_specf_poll(struct file *fp, int events, struct ucred *cred) 1399 { 1400 struct devfs_node *node; 1401 struct vnode *vp; 1402 int error; 1403 cdev_t dev; 1404 1405 get_mplock(); 1406 1407 vp = (struct vnode *)fp->f_data; 1408 if (vp == NULL || vp->v_type == VBAD) { 1409 error = EBADF; 1410 goto done; 1411 } 1412 node = DEVFS_NODE(vp); 1413 1414 if ((dev = vp->v_rdev) == NULL) { 1415 error = EBADF; 1416 goto done; 1417 } 1418 reference_dev(dev); 1419 error = dev_dpoll(dev, events); 1420 1421 release_dev(dev); 1422 1423 #if 0 1424 if (node) 1425 nanotime(&node->atime); 1426 #endif 1427 done: 1428 rel_mplock(); 1429 return (error); 1430 } 1431 1432 1433 /* 1434 * MPALMOSTSAFE - acquires mplock 1435 */ 1436 static int 1437 devfs_specf_ioctl(struct file *fp, u_long com, caddr_t data, 1438 struct ucred *ucred, struct sysmsg *msg) 1439 { 1440 struct devfs_node *node; 1441 struct vnode *vp; 1442 struct vnode *ovp; 1443 cdev_t dev; 1444 int error; 1445 struct fiodname_args *name_args; 1446 size_t namlen; 1447 const char *name; 1448 1449 vp = ((struct vnode *)fp->f_data); 1450 1451 if ((dev = vp->v_rdev) == NULL) 1452 return EBADF; /* device was revoked */ 1453 1454 reference_dev(dev); 1455 1456 node = DEVFS_NODE(vp); 1457 1458 devfs_debug(DEVFS_DEBUG_DEBUG, 1459 "devfs_specf_ioctl() called! for dev %s\n", 1460 dev->si_name); 1461 1462 if (com == FIODTYPE) { 1463 *(int *)data = dev_dflags(dev) & D_TYPEMASK; 1464 error = 0; 1465 goto out; 1466 } else if (com == FIODNAME) { 1467 name_args = (struct fiodname_args *)data; 1468 name = dev->si_name; 1469 namlen = strlen(name) + 1; 1470 1471 devfs_debug(DEVFS_DEBUG_DEBUG, 1472 "ioctl, got: FIODNAME for %s\n", name); 1473 1474 if (namlen <= name_args->len) 1475 error = copyout(dev->si_name, name_args->name, namlen); 1476 else 1477 error = EINVAL; 1478 1479 devfs_debug(DEVFS_DEBUG_DEBUG, 1480 "ioctl stuff: error: %d\n", error); 1481 goto out; 1482 } 1483 1484 /* only acquire mplock for devices that require it */ 1485 if (!(dev_dflags(dev) & D_MPSAFE_IOCTL)) 1486 get_mplock(); 1487 1488 error = dev_dioctl(dev, com, data, fp->f_flag, ucred, msg); 1489 1490 #if 0 1491 if (node) { 1492 nanotime(&node->atime); 1493 nanotime(&node->mtime); 1494 } 1495 #endif 1496 1497 if (!(dev_dflags(dev) & D_MPSAFE_IOCTL)) 1498 rel_mplock(); 1499 1500 if (com == TIOCSCTTY) { 1501 devfs_debug(DEVFS_DEBUG_DEBUG, 1502 "devfs_specf_ioctl: got TIOCSCTTY on %s\n", 1503 dev->si_name); 1504 } 1505 if (error == 0 && com == TIOCSCTTY) { 1506 struct proc *p = curthread->td_proc; 1507 struct session *sess; 1508 1509 devfs_debug(DEVFS_DEBUG_DEBUG, 1510 "devfs_specf_ioctl: dealing with TIOCSCTTY on %s\n", 1511 dev->si_name); 1512 if (p == NULL) { 1513 error = ENOTTY; 1514 goto out; 1515 } 1516 sess = p->p_session; 1517 1518 /* 1519 * Do nothing if reassigning same control tty 1520 */ 1521 if (sess->s_ttyvp == vp) { 1522 error = 0; 1523 goto out; 1524 } 1525 1526 /* 1527 * Get rid of reference to old control tty 1528 */ 1529 ovp = sess->s_ttyvp; 1530 vref(vp); 1531 sess->s_ttyvp = vp; 1532 if (ovp) 1533 vrele(ovp); 1534 } 1535 1536 out: 1537 release_dev(dev); 1538 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() finished! \n"); 1539 return (error); 1540 } 1541 1542 1543 static int 1544 devfs_spec_fsync(struct vop_fsync_args *ap) 1545 { 1546 struct vnode *vp = ap->a_vp; 1547 int error; 1548 1549 if (!vn_isdisk(vp, NULL)) 1550 return (0); 1551 1552 /* 1553 * Flush all dirty buffers associated with a block device. 1554 */ 1555 error = vfsync(vp, ap->a_waitfor, 10000, NULL, NULL); 1556 return (error); 1557 } 1558 1559 static int 1560 devfs_spec_read(struct vop_read_args *ap) 1561 { 1562 struct devfs_node *node; 1563 struct vnode *vp; 1564 struct uio *uio; 1565 cdev_t dev; 1566 int error; 1567 1568 vp = ap->a_vp; 1569 dev = vp->v_rdev; 1570 uio = ap->a_uio; 1571 node = DEVFS_NODE(vp); 1572 1573 if (dev == NULL) /* device was revoked */ 1574 return (EBADF); 1575 if (uio->uio_resid == 0) 1576 return (0); 1577 1578 vn_unlock(vp); 1579 error = dev_dread(dev, uio, ap->a_ioflag); 1580 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1581 1582 if (node) 1583 nanotime(&node->atime); 1584 1585 return (error); 1586 } 1587 1588 /* 1589 * Vnode op for write 1590 * 1591 * spec_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag, 1592 * struct ucred *a_cred) 1593 */ 1594 static int 1595 devfs_spec_write(struct vop_write_args *ap) 1596 { 1597 struct devfs_node *node; 1598 struct vnode *vp; 1599 struct uio *uio; 1600 cdev_t dev; 1601 int error; 1602 1603 vp = ap->a_vp; 1604 dev = vp->v_rdev; 1605 uio = ap->a_uio; 1606 node = DEVFS_NODE(vp); 1607 1608 KKASSERT(uio->uio_segflg != UIO_NOCOPY); 1609 1610 if (dev == NULL) /* device was revoked */ 1611 return (EBADF); 1612 1613 vn_unlock(vp); 1614 error = dev_dwrite(dev, uio, ap->a_ioflag); 1615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1616 1617 if (node) { 1618 nanotime(&node->atime); 1619 nanotime(&node->mtime); 1620 } 1621 1622 return (error); 1623 } 1624 1625 /* 1626 * Device ioctl operation. 1627 * 1628 * spec_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data, 1629 * int a_fflag, struct ucred *a_cred, struct sysmsg *msg) 1630 */ 1631 static int 1632 devfs_spec_ioctl(struct vop_ioctl_args *ap) 1633 { 1634 struct vnode *vp = ap->a_vp; 1635 struct devfs_node *node; 1636 cdev_t dev; 1637 1638 if ((dev = vp->v_rdev) == NULL) 1639 return (EBADF); /* device was revoked */ 1640 node = DEVFS_NODE(vp); 1641 1642 #if 0 1643 if (node) { 1644 nanotime(&node->atime); 1645 nanotime(&node->mtime); 1646 } 1647 #endif 1648 1649 return (dev_dioctl(dev, ap->a_command, ap->a_data, ap->a_fflag, 1650 ap->a_cred, ap->a_sysmsg)); 1651 } 1652 1653 /* 1654 * spec_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred) 1655 */ 1656 /* ARGSUSED */ 1657 static int 1658 devfs_spec_poll(struct vop_poll_args *ap) 1659 { 1660 struct vnode *vp = ap->a_vp; 1661 struct devfs_node *node; 1662 cdev_t dev; 1663 1664 if ((dev = vp->v_rdev) == NULL) 1665 return (EBADF); /* device was revoked */ 1666 node = DEVFS_NODE(vp); 1667 1668 #if 0 1669 if (node) 1670 nanotime(&node->atime); 1671 #endif 1672 1673 return (dev_dpoll(dev, ap->a_events)); 1674 } 1675 1676 /* 1677 * spec_kqfilter(struct vnode *a_vp, struct knote *a_kn) 1678 */ 1679 /* ARGSUSED */ 1680 static int 1681 devfs_spec_kqfilter(struct vop_kqfilter_args *ap) 1682 { 1683 struct vnode *vp = ap->a_vp; 1684 struct devfs_node *node; 1685 cdev_t dev; 1686 1687 if ((dev = vp->v_rdev) == NULL) 1688 return (EBADF); /* device was revoked */ 1689 node = DEVFS_NODE(vp); 1690 1691 #if 0 1692 if (node) 1693 nanotime(&node->atime); 1694 #endif 1695 1696 return (dev_dkqfilter(dev, ap->a_kn)); 1697 } 1698 1699 /* 1700 * Convert a vnode strategy call into a device strategy call. Vnode strategy 1701 * calls are not limited to device DMA limits so we have to deal with the 1702 * case. 1703 * 1704 * spec_strategy(struct vnode *a_vp, struct bio *a_bio) 1705 */ 1706 static int 1707 devfs_spec_strategy(struct vop_strategy_args *ap) 1708 { 1709 struct bio *bio = ap->a_bio; 1710 struct buf *bp = bio->bio_buf; 1711 struct buf *nbp; 1712 struct vnode *vp; 1713 struct mount *mp; 1714 int chunksize; 1715 int maxiosize; 1716 1717 if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL) 1718 buf_start(bp); 1719 1720 /* 1721 * Collect statistics on synchronous and asynchronous read 1722 * and write counts for disks that have associated filesystems. 1723 */ 1724 vp = ap->a_vp; 1725 KKASSERT(vp->v_rdev != NULL); /* XXX */ 1726 if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) { 1727 if (bp->b_cmd == BUF_CMD_READ) { 1728 if (bp->b_flags & BIO_SYNC) 1729 mp->mnt_stat.f_syncreads++; 1730 else 1731 mp->mnt_stat.f_asyncreads++; 1732 } else { 1733 if (bp->b_flags & BIO_SYNC) 1734 mp->mnt_stat.f_syncwrites++; 1735 else 1736 mp->mnt_stat.f_asyncwrites++; 1737 } 1738 } 1739 1740 /* 1741 * Device iosize limitations only apply to read and write. Shortcut 1742 * the I/O if it fits. 1743 */ 1744 if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) { 1745 devfs_debug(DEVFS_DEBUG_DEBUG, 1746 "%s: si_iosize_max not set!\n", 1747 dev_dname(vp->v_rdev)); 1748 maxiosize = MAXPHYS; 1749 } 1750 #if SPEC_CHAIN_DEBUG & 2 1751 maxiosize = 4096; 1752 #endif 1753 if (bp->b_bcount <= maxiosize || 1754 (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) { 1755 dev_dstrategy_chain(vp->v_rdev, bio); 1756 return (0); 1757 } 1758 1759 /* 1760 * Clone the buffer and set up an I/O chain to chunk up the I/O. 1761 */ 1762 nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO); 1763 initbufbio(nbp); 1764 buf_dep_init(nbp); 1765 BUF_LOCKINIT(nbp); 1766 BUF_LOCK(nbp, LK_EXCLUSIVE); 1767 BUF_KERNPROC(nbp); 1768 nbp->b_vp = vp; 1769 nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP); 1770 nbp->b_data = bp->b_data; 1771 nbp->b_bio1.bio_done = devfs_spec_strategy_done; 1772 nbp->b_bio1.bio_offset = bio->bio_offset; 1773 nbp->b_bio1.bio_caller_info1.ptr = bio; 1774 1775 /* 1776 * Start the first transfer 1777 */ 1778 if (vn_isdisk(vp, NULL)) 1779 chunksize = vp->v_rdev->si_bsize_phys; 1780 else 1781 chunksize = DEV_BSIZE; 1782 chunksize = maxiosize / chunksize * chunksize; 1783 #if SPEC_CHAIN_DEBUG & 1 1784 devfs_debug(DEVFS_DEBUG_DEBUG, 1785 "spec_strategy chained I/O chunksize=%d\n", 1786 chunksize); 1787 #endif 1788 nbp->b_cmd = bp->b_cmd; 1789 nbp->b_bcount = chunksize; 1790 nbp->b_bufsize = chunksize; /* used to detect a short I/O */ 1791 nbp->b_bio1.bio_caller_info2.index = chunksize; 1792 1793 #if SPEC_CHAIN_DEBUG & 1 1794 devfs_debug(DEVFS_DEBUG_DEBUG, 1795 "spec_strategy: chain %p offset %d/%d bcount %d\n", 1796 bp, 0, bp->b_bcount, nbp->b_bcount); 1797 #endif 1798 1799 dev_dstrategy(vp->v_rdev, &nbp->b_bio1); 1800 1801 if (DEVFS_NODE(vp)) { 1802 nanotime(&DEVFS_NODE(vp)->atime); 1803 nanotime(&DEVFS_NODE(vp)->mtime); 1804 } 1805 1806 return (0); 1807 } 1808 1809 /* 1810 * Chunked up transfer completion routine - chain transfers until done 1811 */ 1812 static 1813 void 1814 devfs_spec_strategy_done(struct bio *nbio) 1815 { 1816 struct buf *nbp = nbio->bio_buf; 1817 struct bio *bio = nbio->bio_caller_info1.ptr; /* original bio */ 1818 struct buf *bp = bio->bio_buf; /* original bp */ 1819 int chunksize = nbio->bio_caller_info2.index; /* chunking */ 1820 int boffset = nbp->b_data - bp->b_data; 1821 1822 if (nbp->b_flags & B_ERROR) { 1823 /* 1824 * An error terminates the chain, propogate the error back 1825 * to the original bp 1826 */ 1827 bp->b_flags |= B_ERROR; 1828 bp->b_error = nbp->b_error; 1829 bp->b_resid = bp->b_bcount - boffset + 1830 (nbp->b_bcount - nbp->b_resid); 1831 #if SPEC_CHAIN_DEBUG & 1 1832 devfs_debug(DEVFS_DEBUG_DEBUG, 1833 "spec_strategy: chain %p error %d bcount %d/%d\n", 1834 bp, bp->b_error, bp->b_bcount, 1835 bp->b_bcount - bp->b_resid); 1836 #endif 1837 kfree(nbp, M_DEVBUF); 1838 biodone(bio); 1839 } else if (nbp->b_resid) { 1840 /* 1841 * A short read or write terminates the chain 1842 */ 1843 bp->b_error = nbp->b_error; 1844 bp->b_resid = bp->b_bcount - boffset + 1845 (nbp->b_bcount - nbp->b_resid); 1846 #if SPEC_CHAIN_DEBUG & 1 1847 devfs_debug(DEVFS_DEBUG_DEBUG, 1848 "spec_strategy: chain %p short read(1) " 1849 "bcount %d/%d\n", 1850 bp, bp->b_bcount - bp->b_resid, bp->b_bcount); 1851 #endif 1852 kfree(nbp, M_DEVBUF); 1853 biodone(bio); 1854 } else if (nbp->b_bcount != nbp->b_bufsize) { 1855 /* 1856 * A short read or write can also occur by truncating b_bcount 1857 */ 1858 #if SPEC_CHAIN_DEBUG & 1 1859 devfs_debug(DEVFS_DEBUG_DEBUG, 1860 "spec_strategy: chain %p short read(2) " 1861 "bcount %d/%d\n", 1862 bp, nbp->b_bcount + boffset, bp->b_bcount); 1863 #endif 1864 bp->b_error = 0; 1865 bp->b_bcount = nbp->b_bcount + boffset; 1866 bp->b_resid = nbp->b_resid; 1867 kfree(nbp, M_DEVBUF); 1868 biodone(bio); 1869 } else if (nbp->b_bcount + boffset == bp->b_bcount) { 1870 /* 1871 * No more data terminates the chain 1872 */ 1873 #if SPEC_CHAIN_DEBUG & 1 1874 devfs_debug(DEVFS_DEBUG_DEBUG, 1875 "spec_strategy: chain %p finished bcount %d\n", 1876 bp, bp->b_bcount); 1877 #endif 1878 bp->b_error = 0; 1879 bp->b_resid = 0; 1880 kfree(nbp, M_DEVBUF); 1881 biodone(bio); 1882 } else { 1883 /* 1884 * Continue the chain 1885 */ 1886 boffset += nbp->b_bcount; 1887 nbp->b_data = bp->b_data + boffset; 1888 nbp->b_bcount = bp->b_bcount - boffset; 1889 if (nbp->b_bcount > chunksize) 1890 nbp->b_bcount = chunksize; 1891 nbp->b_bio1.bio_done = devfs_spec_strategy_done; 1892 nbp->b_bio1.bio_offset = bio->bio_offset + boffset; 1893 1894 #if SPEC_CHAIN_DEBUG & 1 1895 devfs_debug(DEVFS_DEBUG_DEBUG, 1896 "spec_strategy: chain %p offset %d/%d bcount %d\n", 1897 bp, boffset, bp->b_bcount, nbp->b_bcount); 1898 #endif 1899 1900 dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1); 1901 } 1902 } 1903 1904 /* 1905 * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length) 1906 */ 1907 static int 1908 devfs_spec_freeblks(struct vop_freeblks_args *ap) 1909 { 1910 struct buf *bp; 1911 1912 /* 1913 * XXX: This assumes that strategy does the deed right away. 1914 * XXX: this may not be TRTTD. 1915 */ 1916 KKASSERT(ap->a_vp->v_rdev != NULL); 1917 if ((dev_dflags(ap->a_vp->v_rdev) & D_CANFREE) == 0) 1918 return (0); 1919 bp = geteblk(ap->a_length); 1920 bp->b_cmd = BUF_CMD_FREEBLKS; 1921 bp->b_bio1.bio_offset = ap->a_offset; 1922 bp->b_bcount = ap->a_length; 1923 dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1); 1924 return (0); 1925 } 1926 1927 /* 1928 * Implement degenerate case where the block requested is the block 1929 * returned, and assume that the entire device is contiguous in regards 1930 * to the contiguous block range (runp and runb). 1931 * 1932 * spec_bmap(struct vnode *a_vp, off_t a_loffset, 1933 * off_t *a_doffsetp, int *a_runp, int *a_runb) 1934 */ 1935 static int 1936 devfs_spec_bmap(struct vop_bmap_args *ap) 1937 { 1938 if (ap->a_doffsetp != NULL) 1939 *ap->a_doffsetp = ap->a_loffset; 1940 if (ap->a_runp != NULL) 1941 *ap->a_runp = MAXBSIZE; 1942 if (ap->a_runb != NULL) { 1943 if (ap->a_loffset < MAXBSIZE) 1944 *ap->a_runb = (int)ap->a_loffset; 1945 else 1946 *ap->a_runb = MAXBSIZE; 1947 } 1948 return (0); 1949 } 1950 1951 1952 /* 1953 * Special device advisory byte-level locks. 1954 * 1955 * spec_advlock(struct vnode *a_vp, caddr_t a_id, int a_op, 1956 * struct flock *a_fl, int a_flags) 1957 */ 1958 /* ARGSUSED */ 1959 static int 1960 devfs_spec_advlock(struct vop_advlock_args *ap) 1961 { 1962 return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP); 1963 } 1964 1965 static void 1966 devfs_spec_getpages_iodone(struct bio *bio) 1967 { 1968 bio->bio_buf->b_cmd = BUF_CMD_DONE; 1969 wakeup(bio->bio_buf); 1970 } 1971 1972 /* 1973 * spec_getpages() - get pages associated with device vnode. 1974 * 1975 * Note that spec_read and spec_write do not use the buffer cache, so we 1976 * must fully implement getpages here. 1977 */ 1978 static int 1979 devfs_spec_getpages(struct vop_getpages_args *ap) 1980 { 1981 vm_offset_t kva; 1982 int error; 1983 int i, pcount, size; 1984 struct buf *bp; 1985 vm_page_t m; 1986 vm_ooffset_t offset; 1987 int toff, nextoff, nread; 1988 struct vnode *vp = ap->a_vp; 1989 int blksiz; 1990 int gotreqpage; 1991 1992 error = 0; 1993 pcount = round_page(ap->a_count) / PAGE_SIZE; 1994 1995 /* 1996 * Calculate the offset of the transfer and do sanity check. 1997 */ 1998 offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; 1999 2000 /* 2001 * Round up physical size for real devices. We cannot round using 2002 * v_mount's block size data because v_mount has nothing to do with 2003 * the device. i.e. it's usually '/dev'. We need the physical block 2004 * size for the device itself. 2005 * 2006 * We can't use v_rdev->si_mountpoint because it only exists when the 2007 * block device is mounted. However, we can use v_rdev. 2008 */ 2009 if (vn_isdisk(vp, NULL)) 2010 blksiz = vp->v_rdev->si_bsize_phys; 2011 else 2012 blksiz = DEV_BSIZE; 2013 2014 size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); 2015 2016 bp = getpbuf(NULL); 2017 kva = (vm_offset_t)bp->b_data; 2018 2019 /* 2020 * Map the pages to be read into the kva. 2021 */ 2022 pmap_qenter(kva, ap->a_m, pcount); 2023 2024 /* Build a minimal buffer header. */ 2025 bp->b_cmd = BUF_CMD_READ; 2026 bp->b_bcount = size; 2027 bp->b_resid = 0; 2028 bp->b_runningbufspace = size; 2029 if (size) { 2030 runningbufspace += bp->b_runningbufspace; 2031 ++runningbufcount; 2032 } 2033 2034 bp->b_bio1.bio_offset = offset; 2035 bp->b_bio1.bio_done = devfs_spec_getpages_iodone; 2036 2037 mycpu->gd_cnt.v_vnodein++; 2038 mycpu->gd_cnt.v_vnodepgsin += pcount; 2039 2040 /* Do the input. */ 2041 vn_strategy(ap->a_vp, &bp->b_bio1); 2042 2043 crit_enter(); 2044 2045 /* We definitely need to be at splbio here. */ 2046 while (bp->b_cmd != BUF_CMD_DONE) 2047 tsleep(bp, 0, "spread", 0); 2048 2049 crit_exit(); 2050 2051 if (bp->b_flags & B_ERROR) { 2052 if (bp->b_error) 2053 error = bp->b_error; 2054 else 2055 error = EIO; 2056 } 2057 2058 /* 2059 * If EOF is encountered we must zero-extend the result in order 2060 * to ensure that the page does not contain garabge. When no 2061 * error occurs, an early EOF is indicated if b_bcount got truncated. 2062 * b_resid is relative to b_bcount and should be 0, but some devices 2063 * might indicate an EOF with b_resid instead of truncating b_bcount. 2064 */ 2065 nread = bp->b_bcount - bp->b_resid; 2066 if (nread < ap->a_count) 2067 bzero((caddr_t)kva + nread, ap->a_count - nread); 2068 pmap_qremove(kva, pcount); 2069 2070 gotreqpage = 0; 2071 for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { 2072 nextoff = toff + PAGE_SIZE; 2073 m = ap->a_m[i]; 2074 2075 m->flags &= ~PG_ZERO; 2076 2077 /* 2078 * NOTE: vm_page_undirty/clear_dirty etc do not clear the 2079 * pmap modified bit. pmap modified bit should have 2080 * already been cleared. 2081 */ 2082 if (nextoff <= nread) { 2083 m->valid = VM_PAGE_BITS_ALL; 2084 vm_page_undirty(m); 2085 } else if (toff < nread) { 2086 /* 2087 * Since this is a VM request, we have to supply the 2088 * unaligned offset to allow vm_page_set_valid() 2089 * to zero sub-DEV_BSIZE'd portions of the page. 2090 */ 2091 vm_page_set_valid(m, 0, nread - toff); 2092 vm_page_clear_dirty_end_nonincl(m, 0, nread - toff); 2093 } else { 2094 m->valid = 0; 2095 vm_page_undirty(m); 2096 } 2097 2098 if (i != ap->a_reqpage) { 2099 /* 2100 * Just in case someone was asking for this page we 2101 * now tell them that it is ok to use. 2102 */ 2103 if (!error || (m->valid == VM_PAGE_BITS_ALL)) { 2104 if (m->valid) { 2105 if (m->flags & PG_WANTED) { 2106 vm_page_activate(m); 2107 } else { 2108 vm_page_deactivate(m); 2109 } 2110 vm_page_wakeup(m); 2111 } else { 2112 vm_page_free(m); 2113 } 2114 } else { 2115 vm_page_free(m); 2116 } 2117 } else if (m->valid) { 2118 gotreqpage = 1; 2119 /* 2120 * Since this is a VM request, we need to make the 2121 * entire page presentable by zeroing invalid sections. 2122 */ 2123 if (m->valid != VM_PAGE_BITS_ALL) 2124 vm_page_zero_invalid(m, FALSE); 2125 } 2126 } 2127 if (!gotreqpage) { 2128 m = ap->a_m[ap->a_reqpage]; 2129 devfs_debug(DEVFS_DEBUG_WARNING, 2130 "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n", 2131 devtoname(vp->v_rdev), error, bp, bp->b_vp); 2132 devfs_debug(DEVFS_DEBUG_WARNING, 2133 " size: %d, resid: %d, a_count: %d, valid: 0x%x\n", 2134 size, bp->b_resid, ap->a_count, m->valid); 2135 devfs_debug(DEVFS_DEBUG_WARNING, 2136 " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", 2137 nread, ap->a_reqpage, (u_long)m->pindex, pcount); 2138 /* 2139 * Free the buffer header back to the swap buffer pool. 2140 */ 2141 relpbuf(bp, NULL); 2142 return VM_PAGER_ERROR; 2143 } 2144 /* 2145 * Free the buffer header back to the swap buffer pool. 2146 */ 2147 relpbuf(bp, NULL); 2148 if (DEVFS_NODE(ap->a_vp)) 2149 nanotime(&DEVFS_NODE(ap->a_vp)->mtime); 2150 return VM_PAGER_OK; 2151 } 2152 2153 static __inline 2154 int 2155 sequential_heuristic(struct uio *uio, struct file *fp) 2156 { 2157 /* 2158 * Sequential heuristic - detect sequential operation 2159 */ 2160 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 2161 uio->uio_offset == fp->f_nextoff) { 2162 /* 2163 * XXX we assume that the filesystem block size is 2164 * the default. Not true, but still gives us a pretty 2165 * good indicator of how sequential the read operations 2166 * are. 2167 */ 2168 int tmpseq = fp->f_seqcount; 2169 2170 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 2171 if (tmpseq > IO_SEQMAX) 2172 tmpseq = IO_SEQMAX; 2173 fp->f_seqcount = tmpseq; 2174 return(fp->f_seqcount << IO_SEQSHIFT); 2175 } 2176 2177 /* 2178 * Not sequential, quick draw-down of seqcount 2179 */ 2180 if (fp->f_seqcount > 1) 2181 fp->f_seqcount = 1; 2182 else 2183 fp->f_seqcount = 0; 2184 return(0); 2185 } 2186 2187 extern SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "devfs"); 2188 2189 SYSCTL_INT(_vfs_devfs, OID_AUTO, mpsafe_writes, CTLFLAG_RD, &mpsafe_writes, 2190 0, "mpsafe writes"); 2191 SYSCTL_INT(_vfs_devfs, OID_AUTO, mplock_writes, CTLFLAG_RD, &mplock_writes, 2192 0, "non-mpsafe writes"); 2193 SYSCTL_INT(_vfs_devfs, OID_AUTO, mpsafe_reads, CTLFLAG_RD, &mpsafe_reads, 2194 0, "mpsafe reads"); 2195 SYSCTL_INT(_vfs_devfs, OID_AUTO, mplock_reads, CTLFLAG_RD, &mplock_reads, 2196 0, "non-mpsafe reads"); 2197