1 /*- 2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 7 * 2005 program. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ 31 */ 32 33 /* 34 * tmpfs vnode interface. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/kern_syscall.h> 39 #include <sys/param.h> 40 #include <sys/uio.h> 41 #include <sys/fcntl.h> 42 #include <sys/lockf.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/sysctl.h> 50 #include <sys/unistd.h> 51 #include <sys/vfsops.h> 52 #include <sys/vnode.h> 53 #include <sys/mountctl.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_pageout.h> 60 #include <vm/vm_pager.h> 61 #include <vm/swap_pager.h> 62 63 #include <sys/buf2.h> 64 #include <vm/vm_page2.h> 65 66 #include <vfs/fifofs/fifo.h> 67 #include <vfs/tmpfs/tmpfs_vnops.h> 68 #include "tmpfs.h" 69 70 static void tmpfs_strategy_done(struct bio *bio); 71 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst, int movflags); 72 73 /* 74 * bufcache_mode: 75 * 0 Normal page queue operation on flush. Try to keep in memory. 76 * 1 Try to cache on flush to swap (default). 77 * 2 Always page to swap (not recommended). 78 */ 79 __read_mostly static int tmpfs_cluster_rd_enable = 1; 80 __read_mostly static int tmpfs_cluster_wr_enable = 1; 81 __read_mostly static int tmpfs_bufcache_mode = 1; 82 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem"); 83 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_rd_enable, CTLFLAG_RW, 84 &tmpfs_cluster_rd_enable, 0, ""); 85 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_wr_enable, CTLFLAG_RW, 86 &tmpfs_cluster_wr_enable, 0, ""); 87 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, bufcache_mode, CTLFLAG_RW, 88 &tmpfs_bufcache_mode, 0, ""); 89 90 #define TMPFS_MOVF_FROMBACKING 0x0001 91 #define TMPFS_MOVF_DEACTIVATE 0x0002 92 93 94 static __inline 95 void 96 tmpfs_knote(struct vnode *vp, int flags) 97 { 98 if (flags) 99 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 100 } 101 102 103 /* --------------------------------------------------------------------- */ 104 105 static int 106 tmpfs_nresolve(struct vop_nresolve_args *ap) 107 { 108 struct vnode *dvp = ap->a_dvp; 109 struct vnode *vp = NULL; 110 struct namecache *ncp = ap->a_nch->ncp; 111 struct tmpfs_node *tnode; 112 struct tmpfs_dirent *de; 113 struct tmpfs_node *dnode; 114 int error; 115 116 dnode = VP_TO_TMPFS_DIR(dvp); 117 118 TMPFS_NODE_LOCK_SH(dnode); 119 loop: 120 de = tmpfs_dir_lookup(dnode, NULL, ncp); 121 if (de == NULL) { 122 error = ENOENT; 123 } else { 124 /* 125 * Allocate a vnode for the node we found. Use 126 * tmpfs_alloc_vp()'s deadlock handling mode. 127 */ 128 tnode = de->td_node; 129 error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode, 130 LK_EXCLUSIVE | LK_RETRY, &vp); 131 if (error == EAGAIN) 132 goto loop; 133 if (error) 134 goto out; 135 KKASSERT(vp); 136 } 137 138 out: 139 TMPFS_NODE_UNLOCK(dnode); 140 141 if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) { 142 TMPFS_NODE_LOCK(dnode); 143 dnode->tn_status |= TMPFS_NODE_ACCESSED; 144 TMPFS_NODE_UNLOCK(dnode); 145 } 146 147 /* 148 * Store the result of this lookup in the cache. Avoid this if the 149 * request was for creation, as it does not improve timings on 150 * emprical tests. 151 */ 152 if (vp) { 153 vn_unlock(vp); 154 cache_setvp(ap->a_nch, vp); 155 vrele(vp); 156 } else if (error == ENOENT) { 157 cache_setvp(ap->a_nch, NULL); 158 } 159 return (error); 160 } 161 162 static int 163 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 164 { 165 struct vnode *dvp = ap->a_dvp; 166 struct vnode **vpp = ap->a_vpp; 167 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp); 168 struct ucred *cred = ap->a_cred; 169 int error; 170 171 *vpp = NULL; 172 173 /* Check accessibility of requested node as a first step. */ 174 error = VOP_ACCESS(dvp, VEXEC, cred); 175 if (error != 0) 176 return error; 177 178 if (dnode->tn_dir.tn_parent != NULL) { 179 /* Allocate a new vnode on the matching entry. */ 180 error = tmpfs_alloc_vp(dvp->v_mount, 181 NULL, dnode->tn_dir.tn_parent, 182 LK_EXCLUSIVE | LK_RETRY, vpp); 183 184 if (*vpp) 185 vn_unlock(*vpp); 186 } 187 return (*vpp == NULL) ? ENOENT : 0; 188 } 189 190 /* --------------------------------------------------------------------- */ 191 192 static int 193 tmpfs_ncreate(struct vop_ncreate_args *ap) 194 { 195 struct vnode *dvp = ap->a_dvp; 196 struct vnode **vpp = ap->a_vpp; 197 struct namecache *ncp = ap->a_nch->ncp; 198 struct vattr *vap = ap->a_vap; 199 struct ucred *cred = ap->a_cred; 200 int error; 201 202 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK); 203 204 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 205 if (error == 0) { 206 cache_setunresolved(ap->a_nch); 207 cache_setvp(ap->a_nch, *vpp); 208 tmpfs_knote(dvp, NOTE_WRITE); 209 } 210 return (error); 211 } 212 /* --------------------------------------------------------------------- */ 213 214 static int 215 tmpfs_nmknod(struct vop_nmknod_args *ap) 216 { 217 struct vnode *dvp = ap->a_dvp; 218 struct vnode **vpp = ap->a_vpp; 219 struct namecache *ncp = ap->a_nch->ncp; 220 struct vattr *vap = ap->a_vap; 221 struct ucred *cred = ap->a_cred; 222 int error; 223 224 if (vap->va_type != VBLK && vap->va_type != VCHR && 225 vap->va_type != VFIFO) { 226 return (EINVAL); 227 } 228 229 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 230 if (error == 0) { 231 cache_setunresolved(ap->a_nch); 232 cache_setvp(ap->a_nch, *vpp); 233 tmpfs_knote(dvp, NOTE_WRITE); 234 } 235 return error; 236 } 237 238 /* --------------------------------------------------------------------- */ 239 240 static int 241 tmpfs_open(struct vop_open_args *ap) 242 { 243 struct vnode *vp = ap->a_vp; 244 int mode = ap->a_mode; 245 struct tmpfs_node *node; 246 int error; 247 248 node = VP_TO_TMPFS_NODE(vp); 249 250 #if 0 251 /* The file is still active but all its names have been removed 252 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as 253 * it is about to die. */ 254 if (node->tn_links < 1) 255 return (ENOENT); 256 #endif 257 258 /* If the file is marked append-only, deny write requests. */ 259 if ((node->tn_flags & APPEND) && 260 (mode & (FWRITE | O_APPEND)) == FWRITE) { 261 error = EPERM; 262 } else { 263 if (node->tn_reg.tn_pages_in_aobj) { 264 TMPFS_NODE_LOCK(node); 265 if (node->tn_reg.tn_pages_in_aobj) { 266 tmpfs_move_pages(node->tn_reg.tn_aobj, 267 vp->v_object, 268 TMPFS_MOVF_FROMBACKING); 269 node->tn_reg.tn_pages_in_aobj = 0; 270 } 271 TMPFS_NODE_UNLOCK(node); 272 } 273 error = vop_stdopen(ap); 274 } 275 276 return (error); 277 } 278 279 /* --------------------------------------------------------------------- */ 280 281 static int 282 tmpfs_close(struct vop_close_args *ap) 283 { 284 struct vnode *vp = ap->a_vp; 285 struct tmpfs_node *node; 286 int error; 287 288 node = VP_TO_TMPFS_NODE(vp); 289 290 if (node->tn_links > 0) { 291 /* 292 * Update node times. No need to do it if the node has 293 * been deleted, because it will vanish after we return. 294 */ 295 tmpfs_update(vp); 296 } 297 298 error = vop_stdclose(ap); 299 300 return (error); 301 } 302 303 /* --------------------------------------------------------------------- */ 304 305 int 306 tmpfs_access(struct vop_access_args *ap) 307 { 308 struct vnode *vp = ap->a_vp; 309 int error; 310 struct tmpfs_node *node; 311 312 node = VP_TO_TMPFS_NODE(vp); 313 314 switch (vp->v_type) { 315 case VDIR: 316 /* FALLTHROUGH */ 317 case VLNK: 318 /* FALLTHROUGH */ 319 case VREG: 320 if ((ap->a_mode & VWRITE) && 321 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 322 error = EROFS; 323 goto out; 324 } 325 break; 326 327 case VBLK: 328 /* FALLTHROUGH */ 329 case VCHR: 330 /* FALLTHROUGH */ 331 case VSOCK: 332 /* FALLTHROUGH */ 333 case VFIFO: 334 break; 335 336 default: 337 error = EINVAL; 338 goto out; 339 } 340 341 if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) { 342 error = EPERM; 343 goto out; 344 } 345 346 error = vop_helper_access(ap, node->tn_uid, node->tn_gid, 347 node->tn_mode, 0); 348 out: 349 return error; 350 } 351 352 /* --------------------------------------------------------------------- */ 353 354 int 355 tmpfs_getattr(struct vop_getattr_args *ap) 356 { 357 struct vnode *vp = ap->a_vp; 358 struct vattr *vap = ap->a_vap; 359 struct tmpfs_node *node; 360 361 node = VP_TO_TMPFS_NODE(vp); 362 363 tmpfs_update(vp); 364 365 TMPFS_NODE_LOCK_SH(node); 366 vap->va_type = vp->v_type; 367 vap->va_mode = node->tn_mode; 368 vap->va_nlink = node->tn_links; 369 vap->va_uid = node->tn_uid; 370 vap->va_gid = node->tn_gid; 371 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 372 vap->va_fileid = node->tn_id; 373 vap->va_size = node->tn_size; 374 vap->va_blocksize = PAGE_SIZE; 375 vap->va_atime.tv_sec = node->tn_atime; 376 vap->va_atime.tv_nsec = node->tn_atimensec; 377 vap->va_mtime.tv_sec = node->tn_mtime; 378 vap->va_mtime.tv_nsec = node->tn_mtimensec; 379 vap->va_ctime.tv_sec = node->tn_ctime; 380 vap->va_ctime.tv_nsec = node->tn_ctimensec; 381 vap->va_gen = node->tn_gen; 382 vap->va_flags = node->tn_flags; 383 if (vp->v_type == VBLK || vp->v_type == VCHR) { 384 vap->va_rmajor = umajor(node->tn_rdev); 385 vap->va_rminor = uminor(node->tn_rdev); 386 } 387 vap->va_bytes = round_page(node->tn_size); 388 vap->va_filerev = 0; 389 TMPFS_NODE_UNLOCK(node); 390 391 return 0; 392 } 393 394 /* --------------------------------------------------------------------- */ 395 396 int 397 tmpfs_setattr(struct vop_setattr_args *ap) 398 { 399 struct vnode *vp = ap->a_vp; 400 struct vattr *vap = ap->a_vap; 401 struct ucred *cred = ap->a_cred; 402 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp); 403 int error = 0; 404 int kflags = 0; 405 406 TMPFS_NODE_LOCK(node); 407 if (error == 0 && (vap->va_flags != VNOVAL)) { 408 error = tmpfs_chflags(vp, vap->va_flags, cred); 409 kflags |= NOTE_ATTRIB; 410 } 411 412 if (error == 0 && (vap->va_size != VNOVAL)) { 413 /* restore any saved pages before proceeding */ 414 if (node->tn_reg.tn_pages_in_aobj) { 415 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 416 TMPFS_MOVF_FROMBACKING | 417 TMPFS_MOVF_DEACTIVATE); 418 node->tn_reg.tn_pages_in_aobj = 0; 419 } 420 if (vap->va_size > node->tn_size) 421 kflags |= NOTE_WRITE | NOTE_EXTEND; 422 else 423 kflags |= NOTE_WRITE; 424 error = tmpfs_chsize(vp, vap->va_size, cred); 425 } 426 427 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL || 428 vap->va_gid != (gid_t)VNOVAL)) { 429 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred); 430 kflags |= NOTE_ATTRIB; 431 } 432 433 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) { 434 error = tmpfs_chmod(vp, vap->va_mode, cred); 435 kflags |= NOTE_ATTRIB; 436 } 437 438 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && 439 vap->va_atime.tv_nsec != VNOVAL) || 440 (vap->va_mtime.tv_sec != VNOVAL && 441 vap->va_mtime.tv_nsec != VNOVAL) )) { 442 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime, 443 vap->va_vaflags, cred); 444 kflags |= NOTE_ATTRIB; 445 } 446 447 /* 448 * Update the node times. We give preference to the error codes 449 * generated by this function rather than the ones that may arise 450 * from tmpfs_update. 451 */ 452 tmpfs_update(vp); 453 TMPFS_NODE_UNLOCK(node); 454 tmpfs_knote(vp, kflags); 455 456 return (error); 457 } 458 459 /* --------------------------------------------------------------------- */ 460 461 /* 462 * fsync is usually a NOP, but we must take action when unmounting or 463 * when recycling. 464 */ 465 static int 466 tmpfs_fsync(struct vop_fsync_args *ap) 467 { 468 struct tmpfs_node *node; 469 struct vnode *vp = ap->a_vp; 470 471 node = VP_TO_TMPFS_NODE(vp); 472 473 /* 474 * tmpfs vnodes typically remain dirty, avoid long syncer scans 475 * by forcing removal from the syncer list. 476 */ 477 vn_syncer_remove(vp, 1); 478 479 tmpfs_update(vp); 480 if (vp->v_type == VREG) { 481 if (vp->v_flag & VRECLAIMED) { 482 if (node->tn_links == 0) 483 tmpfs_truncate(vp, 0); 484 else 485 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL); 486 } 487 } 488 489 return 0; 490 } 491 492 /* --------------------------------------------------------------------- */ 493 494 static int 495 tmpfs_read(struct vop_read_args *ap) 496 { 497 struct buf *bp; 498 struct vnode *vp = ap->a_vp; 499 struct uio *uio = ap->a_uio; 500 struct tmpfs_node *node; 501 off_t base_offset; 502 size_t offset; 503 size_t len; 504 size_t resid; 505 int error; 506 int seqcount; 507 508 /* 509 * Check the basics 510 */ 511 if (uio->uio_offset < 0) 512 return (EINVAL); 513 if (vp->v_type != VREG) 514 return (EINVAL); 515 516 /* 517 * Extract node, try to shortcut the operation through 518 * the VM page cache, allowing us to avoid buffer cache 519 * overheads. 520 */ 521 node = VP_TO_TMPFS_NODE(vp); 522 resid = uio->uio_resid; 523 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 524 error = vop_helper_read_shortcut(ap); 525 if (error) 526 return error; 527 if (uio->uio_resid == 0) { 528 if (resid) 529 goto finished; 530 return error; 531 } 532 533 /* 534 * restore any saved pages before proceeding 535 */ 536 if (node->tn_reg.tn_pages_in_aobj) { 537 TMPFS_NODE_LOCK(node); 538 if (node->tn_reg.tn_pages_in_aobj) { 539 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 540 TMPFS_MOVF_FROMBACKING); 541 node->tn_reg.tn_pages_in_aobj = 0; 542 } 543 TMPFS_NODE_UNLOCK(node); 544 } 545 546 /* 547 * Fall-through to our normal read code. 548 */ 549 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) { 550 /* 551 * Use buffer cache I/O (via tmpfs_strategy) 552 */ 553 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; 554 base_offset = (off_t)uio->uio_offset - offset; 555 bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO); 556 if (bp == NULL) { 557 if (tmpfs_cluster_rd_enable) { 558 error = cluster_readx(vp, node->tn_size, 559 base_offset, 560 TMPFS_BLKSIZE, 561 B_NOTMETA | B_KVABIO, 562 uio->uio_resid, 563 seqcount * MAXBSIZE, 564 &bp); 565 } else { 566 error = bread_kvabio(vp, base_offset, 567 TMPFS_BLKSIZE, &bp); 568 } 569 if (error) { 570 brelse(bp); 571 kprintf("tmpfs_read bread error %d\n", error); 572 break; 573 } 574 575 /* 576 * tmpfs pretty much fiddles directly with the VM 577 * system, don't let it exhaust it or we won't play 578 * nice with other processes. 579 * 580 * Only do this if the VOP is coming from a normal 581 * read/write. The VM system handles the case for 582 * UIO_NOCOPY. 583 */ 584 if (uio->uio_segflg != UIO_NOCOPY) 585 vm_wait_nominal(); 586 } 587 bp->b_flags |= B_CLUSTEROK; 588 bkvasync(bp); 589 590 /* 591 * Figure out how many bytes we can actually copy this loop. 592 */ 593 len = TMPFS_BLKSIZE - offset; 594 if (len > uio->uio_resid) 595 len = uio->uio_resid; 596 if (len > node->tn_size - uio->uio_offset) 597 len = (size_t)(node->tn_size - uio->uio_offset); 598 599 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); 600 bqrelse(bp); 601 if (error) { 602 kprintf("tmpfs_read uiomove error %d\n", error); 603 break; 604 } 605 } 606 607 finished: 608 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 609 TMPFS_NODE_LOCK(node); 610 node->tn_status |= TMPFS_NODE_ACCESSED; 611 TMPFS_NODE_UNLOCK(node); 612 } 613 return (error); 614 } 615 616 static int 617 tmpfs_write(struct vop_write_args *ap) 618 { 619 struct buf *bp; 620 struct vnode *vp = ap->a_vp; 621 struct uio *uio = ap->a_uio; 622 struct thread *td = uio->uio_td; 623 struct tmpfs_node *node; 624 boolean_t extended; 625 off_t oldsize; 626 int error; 627 off_t base_offset; 628 size_t offset; 629 size_t len; 630 struct rlimit limit; 631 int trivial = 0; 632 int kflags = 0; 633 int seqcount; 634 635 error = 0; 636 if (uio->uio_resid == 0) { 637 return error; 638 } 639 640 node = VP_TO_TMPFS_NODE(vp); 641 642 if (vp->v_type != VREG) 643 return (EINVAL); 644 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 645 646 TMPFS_NODE_LOCK(node); 647 648 /* 649 * restore any saved pages before proceeding 650 */ 651 if (node->tn_reg.tn_pages_in_aobj) { 652 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 653 TMPFS_MOVF_FROMBACKING); 654 node->tn_reg.tn_pages_in_aobj = 0; 655 } 656 657 oldsize = node->tn_size; 658 if (ap->a_ioflag & IO_APPEND) 659 uio->uio_offset = node->tn_size; 660 661 /* 662 * Check for illegal write offsets. 663 */ 664 if (uio->uio_offset + uio->uio_resid > 665 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) { 666 error = EFBIG; 667 goto done; 668 } 669 670 /* 671 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN). 672 */ 673 if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) { 674 error = kern_getrlimit(RLIMIT_FSIZE, &limit); 675 if (error) 676 goto done; 677 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) { 678 ksignal(td->td_proc, SIGXFSZ); 679 error = EFBIG; 680 goto done; 681 } 682 } 683 684 /* 685 * Extend the file's size if necessary 686 */ 687 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size); 688 689 while (uio->uio_resid > 0) { 690 /* 691 * Don't completely blow out running buffer I/O 692 * when being hit from the pageout daemon. 693 */ 694 if (uio->uio_segflg == UIO_NOCOPY && 695 (ap->a_ioflag & IO_RECURSE) == 0) { 696 bwillwrite(TMPFS_BLKSIZE); 697 } 698 699 /* 700 * Use buffer cache I/O (via tmpfs_strategy) 701 */ 702 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; 703 base_offset = (off_t)uio->uio_offset - offset; 704 len = TMPFS_BLKSIZE - offset; 705 if (len > uio->uio_resid) 706 len = uio->uio_resid; 707 708 if ((uio->uio_offset + len) > node->tn_size) { 709 trivial = (uio->uio_offset <= node->tn_size); 710 error = tmpfs_reg_resize(vp, uio->uio_offset + len, 711 trivial); 712 if (error) 713 break; 714 } 715 716 /* 717 * Read to fill in any gaps. Theoretically we could 718 * optimize this if the write covers the entire buffer 719 * and is not a UIO_NOCOPY write, however this can lead 720 * to a security violation exposing random kernel memory 721 * (whatever junk was in the backing VM pages before). 722 * 723 * So just use bread() to do the right thing. 724 */ 725 error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp); 726 bkvasync(bp); 727 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); 728 if (error) { 729 kprintf("tmpfs_write uiomove error %d\n", error); 730 brelse(bp); 731 break; 732 } 733 734 if (uio->uio_offset > node->tn_size) { 735 node->tn_size = uio->uio_offset; 736 kflags |= NOTE_EXTEND; 737 } 738 kflags |= NOTE_WRITE; 739 740 /* 741 * Always try to flush the page in the UIO_NOCOPY case. This 742 * can come from the pageout daemon or during vnode eviction. 743 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC. 744 * 745 * For the normal case we buwrite(), dirtying the underlying 746 * VM pages instead of dirtying the buffer and releasing the 747 * buffer as a clean buffer. This allows tmpfs to use 748 * essentially all available memory to cache file data. 749 * If we used bdwrite() the buffer cache would wind up 750 * flushing the data to swap too quickly. 751 * 752 * But because tmpfs can seriously load the VM system we 753 * fall-back to using bdwrite() when free memory starts 754 * to get low. This shifts the load away from the VM system 755 * and makes tmpfs act more like a normal filesystem with 756 * regards to disk activity. 757 * 758 * tmpfs pretty much fiddles directly with the VM 759 * system, don't let it exhaust it or we won't play 760 * nice with other processes. Only do this if the 761 * VOP is coming from a normal read/write. The VM system 762 * handles the case for UIO_NOCOPY. 763 */ 764 bp->b_flags |= B_CLUSTEROK; 765 if (uio->uio_segflg == UIO_NOCOPY) { 766 /* 767 * Flush from the pageout daemon, deal with 768 * potentially very heavy tmpfs write activity 769 * causing long stalls in the pageout daemon 770 * before pages get to free/cache. 771 * 772 * (a) Under severe pressure setting B_DIRECT will 773 * cause a buffer release to try to free the 774 * underlying pages. 775 * 776 * (b) Under modest memory pressure the B_RELBUF 777 * alone is sufficient to get the pages moved 778 * to the cache. We could also force this by 779 * setting B_NOTMETA but that might have other 780 * unintended side-effects (e.g. setting 781 * PG_NOTMETA on the VM page). 782 * 783 * Hopefully this will unblock the VM system more 784 * quickly under extreme tmpfs write load. 785 */ 786 if (vm_page_count_min(vm_page_free_hysteresis)) 787 bp->b_flags |= B_DIRECT; 788 bp->b_flags |= B_AGE | B_RELBUF; 789 bp->b_act_count = 0; /* buffer->deactivate pgs */ 790 cluster_awrite(bp); 791 } else if (vm_pages_needed || vm_paging_needed(0) || 792 tmpfs_bufcache_mode >= 2) { 793 /* 794 * If the pageout daemon is running we cycle the 795 * write through the buffer cache normally to 796 * pipeline the flush, thus avoiding adding any 797 * more memory pressure to the pageout daemon. 798 */ 799 bp->b_act_count = 0; /* buffer->deactivate pgs */ 800 if (tmpfs_cluster_wr_enable) { 801 cluster_write(bp, node->tn_size, 802 TMPFS_BLKSIZE, seqcount); 803 } else { 804 bdwrite(bp); 805 } 806 } else { 807 /* 808 * Otherwise run the buffer directly through to the 809 * backing VM store, leaving the buffer clean so 810 * buffer limits do not force early flushes to swap. 811 */ 812 buwrite(bp); 813 /*vm_wait_nominal();*/ 814 } 815 816 if (bp->b_error) { 817 kprintf("tmpfs_write bwrite error %d\n", bp->b_error); 818 break; 819 } 820 } 821 822 if (error) { 823 if (extended) { 824 (void)tmpfs_reg_resize(vp, oldsize, trivial); 825 kflags &= ~NOTE_EXTEND; 826 } 827 goto done; 828 } 829 830 /* 831 * Currently we don't set the mtime on files modified via mmap() 832 * because we can't tell the difference between those modifications 833 * and an attempt by the pageout daemon to flush tmpfs pages to 834 * swap. 835 * 836 * This is because in order to defer flushes as long as possible 837 * buwrite() works by marking the underlying VM pages dirty in 838 * order to be able to dispose of the buffer cache buffer without 839 * flushing it. 840 */ 841 if (uio->uio_segflg == UIO_NOCOPY) { 842 if (vp->v_flag & VLASTWRITETS) { 843 node->tn_mtime = vp->v_lastwrite_ts.tv_sec; 844 node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec; 845 } 846 } else { 847 node->tn_status |= TMPFS_NODE_MODIFIED; 848 vclrflags(vp, VLASTWRITETS); 849 } 850 851 if (extended) 852 node->tn_status |= TMPFS_NODE_CHANGED; 853 854 if (node->tn_mode & (S_ISUID | S_ISGID)) { 855 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 856 node->tn_mode &= ~(S_ISUID | S_ISGID); 857 } 858 done: 859 TMPFS_NODE_UNLOCK(node); 860 if (kflags) 861 tmpfs_knote(vp, kflags); 862 863 return(error); 864 } 865 866 static int 867 tmpfs_advlock(struct vop_advlock_args *ap) 868 { 869 struct tmpfs_node *node; 870 struct vnode *vp = ap->a_vp; 871 int error; 872 873 node = VP_TO_TMPFS_NODE(vp); 874 error = (lf_advlock(ap, &node->tn_advlock, node->tn_size)); 875 876 return (error); 877 } 878 879 /* 880 * The strategy function is typically only called when memory pressure 881 * forces the system to attempt to pageout pages. It can also be called 882 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write 883 * operations 884 * 885 * We set VKVABIO for VREG files so bp->b_data may not be synchronized to 886 * our cpu. swap_pager_strategy() is all we really use, and it directly 887 * supports this. 888 */ 889 static int 890 tmpfs_strategy(struct vop_strategy_args *ap) 891 { 892 struct bio *bio = ap->a_bio; 893 struct bio *nbio; 894 struct buf *bp = bio->bio_buf; 895 struct vnode *vp = ap->a_vp; 896 struct tmpfs_node *node; 897 vm_object_t uobj; 898 vm_page_t m; 899 int i; 900 901 if (vp->v_type != VREG) { 902 bp->b_resid = bp->b_bcount; 903 bp->b_flags |= B_ERROR | B_INVAL; 904 bp->b_error = EINVAL; 905 biodone(bio); 906 return(0); 907 } 908 909 node = VP_TO_TMPFS_NODE(vp); 910 911 uobj = node->tn_reg.tn_aobj; 912 913 /* 914 * Don't bother flushing to swap if there is no swap, just 915 * ensure that the pages are marked as needing a commit (still). 916 */ 917 if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) { 918 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 919 m = bp->b_xio.xio_pages[i]; 920 vm_page_need_commit(m); 921 } 922 bp->b_resid = 0; 923 bp->b_error = 0; 924 biodone(bio); 925 } else { 926 /* 927 * Tell the buffer cache to try to recycle the pages 928 * to PQ_CACHE on release. 929 */ 930 if (tmpfs_bufcache_mode >= 2 || 931 (tmpfs_bufcache_mode == 1 && vm_paging_needed(0))) { 932 bp->b_flags |= B_TTC; 933 } 934 nbio = push_bio(bio); 935 nbio->bio_done = tmpfs_strategy_done; 936 nbio->bio_offset = bio->bio_offset; 937 swap_pager_strategy(uobj, nbio); 938 } 939 return 0; 940 } 941 942 /* 943 * If we were unable to commit the pages to swap make sure they are marked 944 * as needing a commit (again). If we were, clear the flag to allow the 945 * pages to be freed. 946 * 947 * Do not error-out the buffer. In particular, vinvalbuf() needs to 948 * always work. 949 */ 950 static void 951 tmpfs_strategy_done(struct bio *bio) 952 { 953 struct buf *bp; 954 vm_page_t m; 955 int i; 956 957 bp = bio->bio_buf; 958 959 if (bp->b_flags & B_ERROR) { 960 bp->b_flags &= ~B_ERROR; 961 bp->b_error = 0; 962 bp->b_resid = 0; 963 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 964 m = bp->b_xio.xio_pages[i]; 965 vm_page_need_commit(m); 966 } 967 } else { 968 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 969 m = bp->b_xio.xio_pages[i]; 970 vm_page_clear_commit(m); 971 } 972 } 973 bio = pop_bio(bio); 974 biodone(bio); 975 } 976 977 static int 978 tmpfs_bmap(struct vop_bmap_args *ap) 979 { 980 if (ap->a_doffsetp != NULL) 981 *ap->a_doffsetp = ap->a_loffset; 982 if (ap->a_runp != NULL) 983 *ap->a_runp = 0; 984 if (ap->a_runb != NULL) 985 *ap->a_runb = 0; 986 987 return 0; 988 } 989 990 /* --------------------------------------------------------------------- */ 991 992 static int 993 tmpfs_nremove(struct vop_nremove_args *ap) 994 { 995 struct vnode *dvp = ap->a_dvp; 996 struct namecache *ncp = ap->a_nch->ncp; 997 struct vnode *vp; 998 int error; 999 struct tmpfs_dirent *de; 1000 struct tmpfs_mount *tmp; 1001 struct tmpfs_node *dnode; 1002 struct tmpfs_node *node; 1003 1004 /* 1005 * We have to acquire the vp from ap->a_nch because we will likely 1006 * unresolve the namecache entry, and a vrele/vput is needed to 1007 * trigger the tmpfs_inactive/tmpfs_reclaim sequence. 1008 * 1009 * We have to use vget to clear any inactive state on the vnode, 1010 * otherwise the vnode may remain inactive and thus tmpfs_inactive 1011 * will not get called when we release it. 1012 */ 1013 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp); 1014 KKASSERT(vp->v_mount == dvp->v_mount); 1015 KKASSERT(error == 0); 1016 vn_unlock(vp); 1017 1018 if (vp->v_type == VDIR) { 1019 error = EISDIR; 1020 goto out2; 1021 } 1022 1023 dnode = VP_TO_TMPFS_DIR(dvp); 1024 node = VP_TO_TMPFS_NODE(vp); 1025 tmp = VFS_TO_TMPFS(vp->v_mount); 1026 1027 TMPFS_NODE_LOCK(dnode); 1028 de = tmpfs_dir_lookup(dnode, node, ncp); 1029 if (de == NULL) { 1030 error = ENOENT; 1031 TMPFS_NODE_UNLOCK(dnode); 1032 goto out; 1033 } 1034 1035 /* Files marked as immutable or append-only cannot be deleted. */ 1036 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || 1037 (dnode->tn_flags & APPEND)) { 1038 error = EPERM; 1039 TMPFS_NODE_UNLOCK(dnode); 1040 goto out; 1041 } 1042 1043 /* Remove the entry from the directory; as it is a file, we do not 1044 * have to change the number of hard links of the directory. */ 1045 tmpfs_dir_detach(dnode, de); 1046 TMPFS_NODE_UNLOCK(dnode); 1047 1048 /* Free the directory entry we just deleted. Note that the node 1049 * referred by it will not be removed until the vnode is really 1050 * reclaimed. */ 1051 tmpfs_free_dirent(tmp, de); 1052 1053 if (node->tn_links > 0) { 1054 TMPFS_NODE_LOCK(node); 1055 node->tn_status |= TMPFS_NODE_CHANGED; 1056 TMPFS_NODE_UNLOCK(node); 1057 } 1058 1059 cache_unlink(ap->a_nch); 1060 tmpfs_knote(vp, NOTE_DELETE); 1061 error = 0; 1062 1063 out: 1064 if (error == 0) 1065 tmpfs_knote(dvp, NOTE_WRITE); 1066 out2: 1067 vrele(vp); 1068 1069 return error; 1070 } 1071 1072 /* --------------------------------------------------------------------- */ 1073 1074 static int 1075 tmpfs_nlink(struct vop_nlink_args *ap) 1076 { 1077 struct vnode *dvp = ap->a_dvp; 1078 struct vnode *vp = ap->a_vp; 1079 struct namecache *ncp = ap->a_nch->ncp; 1080 struct tmpfs_dirent *de; 1081 struct tmpfs_node *node; 1082 struct tmpfs_node *dnode; 1083 int error; 1084 1085 KKASSERT(dvp != vp); /* XXX When can this be false? */ 1086 1087 node = VP_TO_TMPFS_NODE(vp); 1088 dnode = VP_TO_TMPFS_NODE(dvp); 1089 TMPFS_NODE_LOCK(dnode); 1090 1091 /* XXX: Why aren't the following two tests done by the caller? */ 1092 1093 /* Hard links of directories are forbidden. */ 1094 if (vp->v_type == VDIR) { 1095 error = EPERM; 1096 goto out; 1097 } 1098 1099 /* Cannot create cross-device links. */ 1100 if (dvp->v_mount != vp->v_mount) { 1101 error = EXDEV; 1102 goto out; 1103 } 1104 1105 /* Ensure that we do not overflow the maximum number of links imposed 1106 * by the system. */ 1107 KKASSERT(node->tn_links <= LINK_MAX); 1108 if (node->tn_links >= LINK_MAX) { 1109 error = EMLINK; 1110 goto out; 1111 } 1112 1113 /* We cannot create links of files marked immutable or append-only. */ 1114 if (node->tn_flags & (IMMUTABLE | APPEND)) { 1115 error = EPERM; 1116 goto out; 1117 } 1118 1119 /* Allocate a new directory entry to represent the node. */ 1120 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, 1121 ncp->nc_name, ncp->nc_nlen, &de); 1122 if (error != 0) 1123 goto out; 1124 1125 /* Insert the new directory entry into the appropriate directory. */ 1126 tmpfs_dir_attach(dnode, de); 1127 1128 /* vp link count has changed, so update node times. */ 1129 1130 TMPFS_NODE_LOCK(node); 1131 node->tn_status |= TMPFS_NODE_CHANGED; 1132 TMPFS_NODE_UNLOCK(node); 1133 tmpfs_update(vp); 1134 1135 tmpfs_knote(vp, NOTE_LINK); 1136 cache_setunresolved(ap->a_nch); 1137 cache_setvp(ap->a_nch, vp); 1138 error = 0; 1139 1140 out: 1141 TMPFS_NODE_UNLOCK(dnode); 1142 if (error == 0) 1143 tmpfs_knote(dvp, NOTE_WRITE); 1144 return error; 1145 } 1146 1147 /* --------------------------------------------------------------------- */ 1148 1149 static int 1150 tmpfs_nrename(struct vop_nrename_args *ap) 1151 { 1152 struct vnode *fdvp = ap->a_fdvp; 1153 struct namecache *fncp = ap->a_fnch->ncp; 1154 struct vnode *fvp = fncp->nc_vp; 1155 struct vnode *tdvp = ap->a_tdvp; 1156 struct namecache *tncp = ap->a_tnch->ncp; 1157 struct vnode *tvp; 1158 struct tmpfs_dirent *de, *tde; 1159 struct tmpfs_mount *tmp; 1160 struct tmpfs_node *fdnode; 1161 struct tmpfs_node *fnode; 1162 struct tmpfs_node *tnode; 1163 struct tmpfs_node *tdnode; 1164 char *newname; 1165 char *oldname; 1166 int error; 1167 1168 KKASSERT(fdvp->v_mount == fvp->v_mount); 1169 1170 /* 1171 * Because tvp can get overwritten we have to vget it instead of 1172 * just vref or use it, otherwise it's VINACTIVE flag may not get 1173 * cleared and the node won't get destroyed. 1174 */ 1175 error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp); 1176 if (error == 0) { 1177 tnode = VP_TO_TMPFS_NODE(tvp); 1178 vn_unlock(tvp); 1179 } else { 1180 tnode = NULL; 1181 } 1182 1183 /* Disallow cross-device renames. 1184 * XXX Why isn't this done by the caller? */ 1185 if (fvp->v_mount != tdvp->v_mount || 1186 (tvp != NULL && fvp->v_mount != tvp->v_mount)) { 1187 error = EXDEV; 1188 goto out; 1189 } 1190 1191 tmp = VFS_TO_TMPFS(tdvp->v_mount); 1192 tdnode = VP_TO_TMPFS_DIR(tdvp); 1193 1194 /* If source and target are the same file, there is nothing to do. */ 1195 if (fvp == tvp) { 1196 error = 0; 1197 goto out; 1198 } 1199 1200 fdnode = VP_TO_TMPFS_DIR(fdvp); 1201 fnode = VP_TO_TMPFS_NODE(fvp); 1202 TMPFS_NODE_LOCK(fdnode); 1203 de = tmpfs_dir_lookup(fdnode, fnode, fncp); 1204 TMPFS_NODE_UNLOCK(fdnode); /* XXX depend on namecache lock */ 1205 1206 /* Avoid manipulating '.' and '..' entries. */ 1207 if (de == NULL) { 1208 error = ENOENT; 1209 goto out_locked; 1210 } 1211 KKASSERT(de->td_node == fnode); 1212 1213 /* 1214 * If replacing an entry in the target directory and that entry 1215 * is a directory, it must be empty. 1216 * 1217 * Kern_rename gurantees the destination to be a directory 1218 * if the source is one (it does?). 1219 */ 1220 if (tvp != NULL) { 1221 KKASSERT(tnode != NULL); 1222 1223 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1224 (tdnode->tn_flags & (APPEND | IMMUTABLE))) { 1225 error = EPERM; 1226 goto out_locked; 1227 } 1228 1229 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { 1230 if (tnode->tn_size > 0) { 1231 error = ENOTEMPTY; 1232 goto out_locked; 1233 } 1234 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { 1235 error = ENOTDIR; 1236 goto out_locked; 1237 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { 1238 error = EISDIR; 1239 goto out_locked; 1240 } else { 1241 KKASSERT(fnode->tn_type != VDIR && 1242 tnode->tn_type != VDIR); 1243 } 1244 } 1245 1246 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1247 (fdnode->tn_flags & (APPEND | IMMUTABLE))) { 1248 error = EPERM; 1249 goto out_locked; 1250 } 1251 1252 /* 1253 * Ensure that we have enough memory to hold the new name, if it 1254 * has to be changed. 1255 */ 1256 if (fncp->nc_nlen != tncp->nc_nlen || 1257 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) { 1258 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, 1259 M_WAITOK | M_NULLOK); 1260 if (newname == NULL) { 1261 error = ENOSPC; 1262 goto out_locked; 1263 } 1264 bcopy(tncp->nc_name, newname, tncp->nc_nlen); 1265 newname[tncp->nc_nlen] = '\0'; 1266 } else { 1267 newname = NULL; 1268 } 1269 1270 /* 1271 * Unlink entry from source directory. Note that the kernel has 1272 * already checked for illegal recursion cases (renaming a directory 1273 * into a subdirectory of itself). 1274 */ 1275 if (fdnode != tdnode) { 1276 tmpfs_dir_detach(fdnode, de); 1277 } else { 1278 /* XXX depend on namecache lock */ 1279 TMPFS_NODE_LOCK(fdnode); 1280 KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp)); 1281 RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de); 1282 RB_REMOVE(tmpfs_dirtree_cookie, 1283 &fdnode->tn_dir.tn_cookietree, de); 1284 TMPFS_NODE_UNLOCK(fdnode); 1285 } 1286 1287 /* 1288 * Handle any name change. Swap with newname, we will 1289 * deallocate it at the end. 1290 */ 1291 if (newname != NULL) { 1292 #if 0 1293 TMPFS_NODE_LOCK(fnode); 1294 fnode->tn_status |= TMPFS_NODE_CHANGED; 1295 TMPFS_NODE_UNLOCK(fnode); 1296 #endif 1297 oldname = de->td_name; 1298 de->td_name = newname; 1299 de->td_namelen = (uint16_t)tncp->nc_nlen; 1300 newname = oldname; 1301 } 1302 1303 /* 1304 * If we are overwriting an entry, we have to remove the old one 1305 * from the target directory. 1306 */ 1307 if (tvp != NULL) { 1308 /* Remove the old entry from the target directory. */ 1309 TMPFS_NODE_LOCK(tdnode); 1310 tde = tmpfs_dir_lookup(tdnode, tnode, tncp); 1311 tmpfs_dir_detach(tdnode, tde); 1312 TMPFS_NODE_UNLOCK(tdnode); 1313 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE); 1314 1315 /* 1316 * Free the directory entry we just deleted. Note that the 1317 * node referred by it will not be removed until the vnode is 1318 * really reclaimed. 1319 */ 1320 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); 1321 /*cache_inval_vp(tvp, CINV_DESTROY);*/ 1322 } 1323 1324 /* 1325 * Link entry to target directory. If the entry 1326 * represents a directory move the parent linkage 1327 * as well. 1328 */ 1329 if (fdnode != tdnode) { 1330 if (de->td_node->tn_type == VDIR) { 1331 TMPFS_VALIDATE_DIR(fnode); 1332 } 1333 tmpfs_dir_attach(tdnode, de); 1334 } else { 1335 TMPFS_NODE_LOCK(tdnode); 1336 tdnode->tn_status |= TMPFS_NODE_MODIFIED; 1337 RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de); 1338 RB_INSERT(tmpfs_dirtree_cookie, 1339 &tdnode->tn_dir.tn_cookietree, de); 1340 TMPFS_NODE_UNLOCK(tdnode); 1341 } 1342 1343 /* 1344 * Finish up 1345 */ 1346 if (newname) { 1347 kfree(newname, tmp->tm_name_zone); 1348 newname = NULL; 1349 } 1350 cache_rename(ap->a_fnch, ap->a_tnch); 1351 tmpfs_knote(ap->a_fdvp, NOTE_WRITE); 1352 tmpfs_knote(ap->a_tdvp, NOTE_WRITE); 1353 if (fnode->tn_vnode) 1354 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME); 1355 error = 0; 1356 1357 out_locked: 1358 ; 1359 out: 1360 if (tvp) 1361 vrele(tvp); 1362 return error; 1363 } 1364 1365 /* --------------------------------------------------------------------- */ 1366 1367 static int 1368 tmpfs_nmkdir(struct vop_nmkdir_args *ap) 1369 { 1370 struct vnode *dvp = ap->a_dvp; 1371 struct vnode **vpp = ap->a_vpp; 1372 struct namecache *ncp = ap->a_nch->ncp; 1373 struct vattr *vap = ap->a_vap; 1374 struct ucred *cred = ap->a_cred; 1375 int error; 1376 1377 KKASSERT(vap->va_type == VDIR); 1378 1379 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 1380 if (error == 0) { 1381 cache_setunresolved(ap->a_nch); 1382 cache_setvp(ap->a_nch, *vpp); 1383 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); 1384 } 1385 return error; 1386 } 1387 1388 /* --------------------------------------------------------------------- */ 1389 1390 static int 1391 tmpfs_nrmdir(struct vop_nrmdir_args *ap) 1392 { 1393 struct vnode *dvp = ap->a_dvp; 1394 struct namecache *ncp = ap->a_nch->ncp; 1395 struct vnode *vp; 1396 struct tmpfs_dirent *de; 1397 struct tmpfs_mount *tmp; 1398 struct tmpfs_node *dnode; 1399 struct tmpfs_node *node; 1400 int error; 1401 1402 /* 1403 * We have to acquire the vp from ap->a_nch because we will likely 1404 * unresolve the namecache entry, and a vrele/vput is needed to 1405 * trigger the tmpfs_inactive/tmpfs_reclaim sequence. 1406 * 1407 * We have to use vget to clear any inactive state on the vnode, 1408 * otherwise the vnode may remain inactive and thus tmpfs_inactive 1409 * will not get called when we release it. 1410 */ 1411 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp); 1412 KKASSERT(error == 0); 1413 vn_unlock(vp); 1414 1415 /* 1416 * Prevalidate so we don't hit an assertion later 1417 */ 1418 if (vp->v_type != VDIR) { 1419 error = ENOTDIR; 1420 goto out; 1421 } 1422 1423 tmp = VFS_TO_TMPFS(dvp->v_mount); 1424 dnode = VP_TO_TMPFS_DIR(dvp); 1425 node = VP_TO_TMPFS_DIR(vp); 1426 1427 /* 1428 * Directories with more than two entries ('.' and '..') cannot 1429 * be removed. 1430 */ 1431 if (node->tn_size > 0) { 1432 error = ENOTEMPTY; 1433 goto out; 1434 } 1435 1436 if ((dnode->tn_flags & APPEND) 1437 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1438 error = EPERM; 1439 goto out; 1440 } 1441 1442 /* 1443 * This invariant holds only if we are not trying to 1444 * remove "..". We checked for that above so this is safe now. 1445 */ 1446 KKASSERT(node->tn_dir.tn_parent == dnode); 1447 1448 /* 1449 * Get the directory entry associated with node (vp). This 1450 * was filled by tmpfs_lookup while looking up the entry. 1451 */ 1452 TMPFS_NODE_LOCK(dnode); 1453 de = tmpfs_dir_lookup(dnode, node, ncp); 1454 KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen)); 1455 1456 /* Check flags to see if we are allowed to remove the directory. */ 1457 if ((dnode->tn_flags & APPEND) || 1458 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { 1459 error = EPERM; 1460 TMPFS_NODE_UNLOCK(dnode); 1461 goto out; 1462 } 1463 1464 /* Detach the directory entry from the directory (dnode). */ 1465 tmpfs_dir_detach(dnode, de); 1466 TMPFS_NODE_UNLOCK(dnode); 1467 1468 /* No vnode should be allocated for this entry from this point */ 1469 TMPFS_NODE_LOCK(dnode); 1470 TMPFS_ASSERT_ELOCKED(dnode); 1471 TMPFS_NODE_LOCK(node); 1472 TMPFS_ASSERT_ELOCKED(node); 1473 1474 /* 1475 * Must set parent linkage to NULL (tested by ncreate to disallow 1476 * the creation of new files/dirs in a deleted directory) 1477 */ 1478 node->tn_status |= TMPFS_NODE_CHANGED; 1479 1480 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 1481 TMPFS_NODE_MODIFIED; 1482 1483 TMPFS_NODE_UNLOCK(node); 1484 TMPFS_NODE_UNLOCK(dnode); 1485 1486 /* Free the directory entry we just deleted. Note that the node 1487 * referred by it will not be removed until the vnode is really 1488 * reclaimed. */ 1489 tmpfs_free_dirent(tmp, de); 1490 1491 /* Release the deleted vnode (will destroy the node, notify 1492 * interested parties and clean it from the cache). */ 1493 1494 TMPFS_NODE_LOCK(dnode); 1495 dnode->tn_status |= TMPFS_NODE_CHANGED; 1496 TMPFS_NODE_UNLOCK(dnode); 1497 tmpfs_update(dvp); 1498 1499 cache_unlink(ap->a_nch); 1500 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); 1501 error = 0; 1502 1503 out: 1504 vrele(vp); 1505 1506 return error; 1507 } 1508 1509 /* --------------------------------------------------------------------- */ 1510 1511 static int 1512 tmpfs_nsymlink(struct vop_nsymlink_args *ap) 1513 { 1514 struct vnode *dvp = ap->a_dvp; 1515 struct vnode **vpp = ap->a_vpp; 1516 struct namecache *ncp = ap->a_nch->ncp; 1517 struct vattr *vap = ap->a_vap; 1518 struct ucred *cred = ap->a_cred; 1519 char *target = ap->a_target; 1520 int error; 1521 1522 vap->va_type = VLNK; 1523 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target); 1524 if (error == 0) { 1525 tmpfs_knote(*vpp, NOTE_WRITE); 1526 cache_setunresolved(ap->a_nch); 1527 cache_setvp(ap->a_nch, *vpp); 1528 } 1529 return error; 1530 } 1531 1532 /* --------------------------------------------------------------------- */ 1533 1534 static int 1535 tmpfs_readdir(struct vop_readdir_args *ap) 1536 { 1537 struct vnode *vp = ap->a_vp; 1538 struct uio *uio = ap->a_uio; 1539 int *eofflag = ap->a_eofflag; 1540 off_t **cookies = ap->a_cookies; 1541 int *ncookies = ap->a_ncookies; 1542 struct tmpfs_mount *tmp; 1543 int error; 1544 off_t startoff; 1545 off_t cnt = 0; 1546 struct tmpfs_node *node; 1547 1548 /* This operation only makes sense on directory nodes. */ 1549 if (vp->v_type != VDIR) { 1550 return ENOTDIR; 1551 } 1552 1553 tmp = VFS_TO_TMPFS(vp->v_mount); 1554 node = VP_TO_TMPFS_DIR(vp); 1555 startoff = uio->uio_offset; 1556 1557 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { 1558 error = tmpfs_dir_getdotdent(node, uio); 1559 if (error != 0) { 1560 TMPFS_NODE_LOCK_SH(node); 1561 goto outok; 1562 } 1563 cnt++; 1564 } 1565 1566 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { 1567 /* may lock parent, cannot hold node lock */ 1568 error = tmpfs_dir_getdotdotdent(tmp, node, uio); 1569 if (error != 0) { 1570 TMPFS_NODE_LOCK_SH(node); 1571 goto outok; 1572 } 1573 cnt++; 1574 } 1575 1576 TMPFS_NODE_LOCK_SH(node); 1577 error = tmpfs_dir_getdents(node, uio, &cnt); 1578 1579 outok: 1580 KKASSERT(error >= -1); 1581 1582 if (error == -1) 1583 error = 0; 1584 1585 if (eofflag != NULL) 1586 *eofflag = 1587 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); 1588 1589 /* Update NFS-related variables. */ 1590 if (error == 0 && cookies != NULL && ncookies != NULL) { 1591 off_t i; 1592 off_t off = startoff; 1593 struct tmpfs_dirent *de = NULL; 1594 1595 *ncookies = cnt; 1596 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); 1597 1598 for (i = 0; i < cnt; i++) { 1599 KKASSERT(off != TMPFS_DIRCOOKIE_EOF); 1600 if (off == TMPFS_DIRCOOKIE_DOT) { 1601 off = TMPFS_DIRCOOKIE_DOTDOT; 1602 } else { 1603 if (off == TMPFS_DIRCOOKIE_DOTDOT) { 1604 de = RB_MIN(tmpfs_dirtree_cookie, 1605 &node->tn_dir.tn_cookietree); 1606 } else if (de != NULL) { 1607 de = RB_NEXT(tmpfs_dirtree_cookie, 1608 &node->tn_dir.tn_cookietree, de); 1609 } else { 1610 de = tmpfs_dir_lookupbycookie(node, 1611 off); 1612 KKASSERT(de != NULL); 1613 de = RB_NEXT(tmpfs_dirtree_cookie, 1614 &node->tn_dir.tn_cookietree, de); 1615 } 1616 if (de == NULL) 1617 off = TMPFS_DIRCOOKIE_EOF; 1618 else 1619 off = tmpfs_dircookie(de); 1620 } 1621 (*cookies)[i] = off; 1622 } 1623 KKASSERT(uio->uio_offset == off); 1624 } 1625 TMPFS_NODE_UNLOCK(node); 1626 1627 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 1628 TMPFS_NODE_LOCK(node); 1629 node->tn_status |= TMPFS_NODE_ACCESSED; 1630 TMPFS_NODE_UNLOCK(node); 1631 } 1632 return error; 1633 } 1634 1635 /* --------------------------------------------------------------------- */ 1636 1637 static int 1638 tmpfs_readlink(struct vop_readlink_args *ap) 1639 { 1640 struct vnode *vp = ap->a_vp; 1641 struct uio *uio = ap->a_uio; 1642 int error; 1643 struct tmpfs_node *node; 1644 1645 KKASSERT(uio->uio_offset == 0); 1646 KKASSERT(vp->v_type == VLNK); 1647 1648 node = VP_TO_TMPFS_NODE(vp); 1649 TMPFS_NODE_LOCK_SH(node); 1650 error = uiomove(node->tn_link, 1651 MIN(node->tn_size, uio->uio_resid), uio); 1652 TMPFS_NODE_UNLOCK(node); 1653 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 1654 TMPFS_NODE_LOCK(node); 1655 node->tn_status |= TMPFS_NODE_ACCESSED; 1656 TMPFS_NODE_UNLOCK(node); 1657 } 1658 return error; 1659 } 1660 1661 /* --------------------------------------------------------------------- */ 1662 1663 static int 1664 tmpfs_inactive(struct vop_inactive_args *ap) 1665 { 1666 struct vnode *vp = ap->a_vp; 1667 struct tmpfs_node *node; 1668 struct mount *mp; 1669 1670 mp = vp->v_mount; 1671 lwkt_gettoken(&mp->mnt_token); 1672 node = VP_TO_TMPFS_NODE(vp); 1673 1674 /* 1675 * Degenerate case 1676 */ 1677 if (node == NULL) { 1678 vrecycle(vp); 1679 lwkt_reltoken(&mp->mnt_token); 1680 return(0); 1681 } 1682 1683 /* 1684 * Get rid of unreferenced deleted vnodes sooner rather than 1685 * later so the data memory can be recovered immediately. 1686 * 1687 * We must truncate the vnode to prevent the normal reclamation 1688 * path from flushing the data for the removed file to disk. 1689 */ 1690 TMPFS_NODE_LOCK(node); 1691 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 && 1692 node->tn_links == 0) 1693 { 1694 node->tn_vpstate = TMPFS_VNODE_DOOMED; 1695 TMPFS_NODE_UNLOCK(node); 1696 if (node->tn_type == VREG) 1697 tmpfs_truncate(vp, 0); 1698 vrecycle(vp); 1699 } else { 1700 /* 1701 * We must retain any VM pages belonging to the vnode's 1702 * object as the vnode will destroy the object during a 1703 * later reclaim. We call vinvalbuf(V_SAVE) to clean 1704 * out the buffer cache. 1705 * 1706 * On DragonFlyBSD, vnodes are not immediately deactivated 1707 * on the 1->0 refs, so this is a relatively optimal 1708 * operation. We have to do this in tmpfs_inactive() 1709 * because the pages will have already been thrown away 1710 * at the time tmpfs_reclaim() is called. 1711 */ 1712 if (node->tn_type == VREG && 1713 node->tn_reg.tn_pages_in_aobj == 0) { 1714 vinvalbuf(vp, V_SAVE, 0, 0); 1715 KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree)); 1716 KKASSERT(RB_EMPTY(&vp->v_rbclean_tree)); 1717 tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj, 1718 TMPFS_MOVF_DEACTIVATE); 1719 node->tn_reg.tn_pages_in_aobj = 1; 1720 } 1721 1722 TMPFS_NODE_UNLOCK(node); 1723 } 1724 lwkt_reltoken(&mp->mnt_token); 1725 1726 return 0; 1727 } 1728 1729 /* --------------------------------------------------------------------- */ 1730 1731 int 1732 tmpfs_reclaim(struct vop_reclaim_args *ap) 1733 { 1734 struct vnode *vp = ap->a_vp; 1735 struct tmpfs_mount *tmp; 1736 struct tmpfs_node *node; 1737 struct mount *mp; 1738 1739 mp = vp->v_mount; 1740 lwkt_gettoken(&mp->mnt_token); 1741 1742 node = VP_TO_TMPFS_NODE(vp); 1743 tmp = VFS_TO_TMPFS(vp->v_mount); 1744 KKASSERT(mp == tmp->tm_mount); 1745 1746 TMPFS_NODE_LOCK(node); 1747 KKASSERT(node->tn_vnode == vp); 1748 node->tn_vnode = NULL; 1749 vp->v_data = NULL; 1750 1751 /* 1752 * If the node referenced by this vnode was deleted by the 1753 * user, we must free its associated data structures now that 1754 * the vnode is being reclaimed. 1755 * 1756 * Directories have an extra link ref. 1757 */ 1758 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 && 1759 node->tn_links == 0) { 1760 node->tn_vpstate = TMPFS_VNODE_DOOMED; 1761 tmpfs_free_node(tmp, node); 1762 /* eats the lock */ 1763 } else { 1764 TMPFS_NODE_UNLOCK(node); 1765 } 1766 lwkt_reltoken(&mp->mnt_token); 1767 1768 KKASSERT(vp->v_data == NULL); 1769 return 0; 1770 } 1771 1772 /* --------------------------------------------------------------------- */ 1773 1774 static int 1775 tmpfs_mountctl(struct vop_mountctl_args *ap) 1776 { 1777 struct tmpfs_mount *tmp; 1778 struct mount *mp; 1779 int rc; 1780 1781 mp = ap->a_head.a_ops->head.vv_mount; 1782 lwkt_gettoken(&mp->mnt_token); 1783 1784 switch (ap->a_op) { 1785 case (MOUNTCTL_SET_EXPORT): 1786 tmp = (struct tmpfs_mount *) mp->mnt_data; 1787 1788 if (ap->a_ctllen != sizeof(struct export_args)) 1789 rc = (EINVAL); 1790 else 1791 rc = vfs_export(mp, &tmp->tm_export, 1792 (const struct export_args *) ap->a_ctl); 1793 break; 1794 default: 1795 rc = vop_stdmountctl(ap); 1796 break; 1797 } 1798 1799 lwkt_reltoken(&mp->mnt_token); 1800 return (rc); 1801 } 1802 1803 /* --------------------------------------------------------------------- */ 1804 1805 static int 1806 tmpfs_print(struct vop_print_args *ap) 1807 { 1808 struct vnode *vp = ap->a_vp; 1809 1810 struct tmpfs_node *node; 1811 1812 node = VP_TO_TMPFS_NODE(vp); 1813 1814 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n", 1815 node, node->tn_flags, node->tn_links); 1816 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n", 1817 node->tn_mode, node->tn_uid, node->tn_gid, 1818 (uintmax_t)node->tn_size, node->tn_status); 1819 1820 if (vp->v_type == VFIFO) 1821 fifo_printinfo(vp); 1822 1823 kprintf("\n"); 1824 1825 return 0; 1826 } 1827 1828 /* --------------------------------------------------------------------- */ 1829 1830 static int 1831 tmpfs_pathconf(struct vop_pathconf_args *ap) 1832 { 1833 struct vnode *vp = ap->a_vp; 1834 int name = ap->a_name; 1835 register_t *retval = ap->a_retval; 1836 struct tmpfs_mount *tmp; 1837 int error; 1838 1839 error = 0; 1840 1841 switch (name) { 1842 case _PC_CHOWN_RESTRICTED: 1843 *retval = 1; 1844 break; 1845 1846 case _PC_FILESIZEBITS: 1847 tmp = VFS_TO_TMPFS(vp->v_mount); 1848 *retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1); 1849 break; 1850 1851 case _PC_LINK_MAX: 1852 *retval = LINK_MAX; 1853 break; 1854 1855 case _PC_NAME_MAX: 1856 *retval = NAME_MAX; 1857 break; 1858 1859 case _PC_NO_TRUNC: 1860 *retval = 1; 1861 break; 1862 1863 case _PC_PATH_MAX: 1864 *retval = PATH_MAX; 1865 break; 1866 1867 case _PC_PIPE_BUF: 1868 *retval = PIPE_BUF; 1869 break; 1870 1871 case _PC_SYNC_IO: 1872 *retval = 1; 1873 break; 1874 1875 case _PC_2_SYMLINKS: 1876 *retval = 1; 1877 break; 1878 1879 default: 1880 error = EINVAL; 1881 } 1882 1883 return error; 1884 } 1885 1886 /************************************************************************ 1887 * KQFILTER OPS * 1888 ************************************************************************/ 1889 1890 static void filt_tmpfsdetach(struct knote *kn); 1891 static int filt_tmpfsread(struct knote *kn, long hint); 1892 static int filt_tmpfswrite(struct knote *kn, long hint); 1893 static int filt_tmpfsvnode(struct knote *kn, long hint); 1894 1895 static struct filterops tmpfsread_filtops = 1896 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1897 NULL, filt_tmpfsdetach, filt_tmpfsread }; 1898 static struct filterops tmpfswrite_filtops = 1899 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1900 NULL, filt_tmpfsdetach, filt_tmpfswrite }; 1901 static struct filterops tmpfsvnode_filtops = 1902 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1903 NULL, filt_tmpfsdetach, filt_tmpfsvnode }; 1904 1905 static int 1906 tmpfs_kqfilter (struct vop_kqfilter_args *ap) 1907 { 1908 struct vnode *vp = ap->a_vp; 1909 struct knote *kn = ap->a_kn; 1910 1911 switch (kn->kn_filter) { 1912 case EVFILT_READ: 1913 kn->kn_fop = &tmpfsread_filtops; 1914 break; 1915 case EVFILT_WRITE: 1916 kn->kn_fop = &tmpfswrite_filtops; 1917 break; 1918 case EVFILT_VNODE: 1919 kn->kn_fop = &tmpfsvnode_filtops; 1920 break; 1921 default: 1922 return (EOPNOTSUPP); 1923 } 1924 1925 kn->kn_hook = (caddr_t)vp; 1926 1927 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1928 1929 return(0); 1930 } 1931 1932 static void 1933 filt_tmpfsdetach(struct knote *kn) 1934 { 1935 struct vnode *vp = (void *)kn->kn_hook; 1936 1937 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1938 } 1939 1940 static int 1941 filt_tmpfsread(struct knote *kn, long hint) 1942 { 1943 struct vnode *vp = (void *)kn->kn_hook; 1944 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp); 1945 off_t off; 1946 1947 if (hint == NOTE_REVOKE) { 1948 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1949 return(1); 1950 } 1951 1952 /* 1953 * Interlock against MP races when performing this function. 1954 */ 1955 TMPFS_NODE_LOCK_SH(node); 1956 off = node->tn_size - kn->kn_fp->f_offset; 1957 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 1958 if (kn->kn_sfflags & NOTE_OLDAPI) { 1959 TMPFS_NODE_UNLOCK(node); 1960 return(1); 1961 } 1962 if (kn->kn_data == 0) { 1963 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 1964 } 1965 TMPFS_NODE_UNLOCK(node); 1966 return (kn->kn_data != 0); 1967 } 1968 1969 static int 1970 filt_tmpfswrite(struct knote *kn, long hint) 1971 { 1972 if (hint == NOTE_REVOKE) 1973 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1974 kn->kn_data = 0; 1975 return (1); 1976 } 1977 1978 static int 1979 filt_tmpfsvnode(struct knote *kn, long hint) 1980 { 1981 if (kn->kn_sfflags & hint) 1982 kn->kn_fflags |= hint; 1983 if (hint == NOTE_REVOKE) { 1984 kn->kn_flags |= (EV_EOF | EV_NODATA); 1985 return (1); 1986 } 1987 return (kn->kn_fflags != 0); 1988 } 1989 1990 /* 1991 * Helper to move VM pages between objects 1992 * 1993 * NOTE: The vm_page_rename() dirties the page, so we can clear the 1994 * PG_NEED_COMMIT flag. If the pages are being moved into tn_aobj, 1995 * the pageout daemon will be able to page them out. 1996 */ 1997 static int 1998 tmpfs_move_pages_callback(vm_page_t p, void *data) 1999 { 2000 struct rb_vm_page_scan_info *info = data; 2001 vm_pindex_t pindex; 2002 2003 pindex = p->pindex; 2004 if (vm_page_busy_try(p, TRUE)) { 2005 vm_page_sleep_busy(p, TRUE, "tpgmov"); 2006 info->error = -1; 2007 return -1; 2008 } 2009 if (p->object != info->object || p->pindex != pindex) { 2010 vm_page_wakeup(p); 2011 info->error = -1; 2012 return -1; 2013 } 2014 2015 if ((info->pagerflags & TMPFS_MOVF_FROMBACKING) && 2016 (p->flags & PG_SWAPPED) && 2017 (p->flags & PG_NEED_COMMIT) == 0 && 2018 p->dirty == 0) { 2019 /* 2020 * If the page in the backing aobj was paged out to swap 2021 * it will be clean and it is better to free it rather 2022 * than re-dirty it. We will assume that the page was 2023 * paged out to swap for a reason! 2024 * 2025 * This helps avoid unnecessary swap thrashing on the page. 2026 */ 2027 vm_page_free(p); 2028 } else if ((info->pagerflags & TMPFS_MOVF_FROMBACKING) == 0 && 2029 (p->flags & PG_NEED_COMMIT) == 0 && 2030 p->dirty == 0) { 2031 /* 2032 * If the page associated with the vnode was cleaned via 2033 * a tmpfs_strategy() call, it exists as a swap block in 2034 * aobj and it is again better to free it rather than 2035 * re-dirty it. We will assume that the page was 2036 * paged out to swap for a reason! 2037 * 2038 * This helps avoid unnecessary swap thrashing on the page. 2039 */ 2040 vm_page_free(p); 2041 } else { 2042 /* 2043 * Rename the page, which will also ensure that it is flagged 2044 * as dirty and check whether a swap block association exists 2045 * in the target object or not, setting appropriate flags if 2046 * it does. 2047 */ 2048 vm_page_rename(p, info->dest_object, pindex); 2049 vm_page_clear_commit(p); 2050 if (info->pagerflags & TMPFS_MOVF_DEACTIVATE) 2051 vm_page_deactivate(p); 2052 vm_page_wakeup(p); 2053 /* page automaticaly made dirty */ 2054 } 2055 2056 return 0; 2057 } 2058 2059 static 2060 void 2061 tmpfs_move_pages(vm_object_t src, vm_object_t dst, int movflags) 2062 { 2063 struct rb_vm_page_scan_info info; 2064 2065 vm_object_hold(src); 2066 vm_object_hold(dst); 2067 info.object = src; 2068 info.dest_object = dst; 2069 info.pagerflags = movflags; 2070 do { 2071 if (src->paging_in_progress) 2072 vm_object_pip_wait(src, "objtfs"); 2073 info.error = 1; 2074 vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL, 2075 tmpfs_move_pages_callback, &info); 2076 } while (info.error < 0 || !RB_EMPTY(&src->rb_memq) || 2077 src->paging_in_progress); 2078 vm_object_drop(dst); 2079 vm_object_drop(src); 2080 } 2081 2082 /* --------------------------------------------------------------------- */ 2083 2084 /* 2085 * vnode operations vector used for files stored in a tmpfs file system. 2086 */ 2087 struct vop_ops tmpfs_vnode_vops = { 2088 .vop_default = vop_defaultop, 2089 .vop_getpages = vop_stdgetpages, 2090 .vop_putpages = vop_stdputpages, 2091 .vop_ncreate = tmpfs_ncreate, 2092 .vop_nresolve = tmpfs_nresolve, 2093 .vop_nlookupdotdot = tmpfs_nlookupdotdot, 2094 .vop_nmknod = tmpfs_nmknod, 2095 .vop_open = tmpfs_open, 2096 .vop_close = tmpfs_close, 2097 .vop_access = tmpfs_access, 2098 .vop_getattr = tmpfs_getattr, 2099 .vop_setattr = tmpfs_setattr, 2100 .vop_read = tmpfs_read, 2101 .vop_write = tmpfs_write, 2102 .vop_fsync = tmpfs_fsync, 2103 .vop_mountctl = tmpfs_mountctl, 2104 .vop_nremove = tmpfs_nremove, 2105 .vop_nlink = tmpfs_nlink, 2106 .vop_nrename = tmpfs_nrename, 2107 .vop_nmkdir = tmpfs_nmkdir, 2108 .vop_nrmdir = tmpfs_nrmdir, 2109 .vop_nsymlink = tmpfs_nsymlink, 2110 .vop_readdir = tmpfs_readdir, 2111 .vop_readlink = tmpfs_readlink, 2112 .vop_inactive = tmpfs_inactive, 2113 .vop_reclaim = tmpfs_reclaim, 2114 .vop_print = tmpfs_print, 2115 .vop_pathconf = tmpfs_pathconf, 2116 .vop_bmap = tmpfs_bmap, 2117 .vop_strategy = tmpfs_strategy, 2118 .vop_advlock = tmpfs_advlock, 2119 .vop_kqfilter = tmpfs_kqfilter 2120 }; 2121