1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/stat.h> 43 #include <sys/systm.h> 44 #include <sys/vnode.h> 45 #include <sys/vmmeter.h> 46 47 #include <vm/vm.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_pager.h> 51 #include <vm/vm_extern.h> 52 53 #include <vfs/tmpfs/tmpfs.h> 54 #include <vfs/tmpfs/tmpfs_vnops.h> 55 56 static ino_t tmpfs_fetch_ino(struct tmpfs_mount *); 57 58 static int tmpfs_dirtree_compare(struct tmpfs_dirent *a, 59 struct tmpfs_dirent *b); 60 RB_GENERATE(tmpfs_dirtree, tmpfs_dirent, rb_node, tmpfs_dirtree_compare); 61 62 static int tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, 63 struct tmpfs_dirent *b); 64 RB_GENERATE(tmpfs_dirtree_cookie, tmpfs_dirent, 65 rb_cookienode, tmpfs_dirtree_compare_cookie); 66 67 68 /* --------------------------------------------------------------------- */ 69 70 /* 71 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 72 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 73 * using the credentials of the process 'p'. 74 * 75 * If the node type is set to 'VDIR', then the parent parameter must point 76 * to the parent directory of the node being created. It may only be NULL 77 * while allocating the root node. 78 * 79 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 80 * specifies the device the node represents. 81 * 82 * If the node type is set to 'VLNK', then the parameter target specifies 83 * the file name of the target file for the symbolic link that is being 84 * created. 85 * 86 * Note that new nodes are retrieved from the available list if it has 87 * items or, if it is empty, from the node pool as long as there is enough 88 * space to create them. 89 * 90 * Returns zero on success or an appropriate error code on failure. 91 */ 92 int 93 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 94 uid_t uid, gid_t gid, mode_t mode, 95 char *target, int rmajor, int rminor, 96 struct tmpfs_node **node) 97 { 98 struct tmpfs_node *nnode; 99 struct timespec ts; 100 udev_t rdev; 101 102 KKASSERT(IFF(type == VLNK, target != NULL)); 103 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 104 105 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 106 return (ENOSPC); 107 108 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 109 if (nnode == NULL) 110 return (ENOSPC); 111 112 /* Generic initialization. */ 113 nnode->tn_type = type; 114 vfs_timestamp(&ts); 115 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 116 = ts.tv_sec; 117 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 118 = ts.tv_nsec; 119 nnode->tn_uid = uid; 120 nnode->tn_gid = gid; 121 nnode->tn_mode = mode; 122 nnode->tn_id = tmpfs_fetch_ino(tmp); 123 nnode->tn_advlock.init_done = 0; 124 KKASSERT(nnode->tn_links == 0); 125 126 /* Type-specific initialization. */ 127 switch (nnode->tn_type) { 128 case VBLK: 129 case VCHR: 130 rdev = makeudev(rmajor, rminor); 131 if (rdev == NOUDEV) { 132 objcache_put(tmp->tm_node_pool, nnode); 133 return(EINVAL); 134 } 135 nnode->tn_rdev = rdev; 136 break; 137 138 case VDIR: 139 RB_INIT(&nnode->tn_dir.tn_dirtree); 140 RB_INIT(&nnode->tn_dir.tn_cookietree); 141 nnode->tn_size = 0; 142 break; 143 144 case VFIFO: 145 /* FALLTHROUGH */ 146 case VSOCK: 147 break; 148 149 case VLNK: 150 nnode->tn_size = strlen(target); 151 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 152 M_WAITOK | M_NULLOK); 153 if (nnode->tn_link == NULL) { 154 objcache_put(tmp->tm_node_pool, nnode); 155 return (ENOSPC); 156 } 157 bcopy(target, nnode->tn_link, nnode->tn_size); 158 nnode->tn_link[nnode->tn_size] = '\0'; 159 break; 160 161 case VREG: 162 nnode->tn_reg.tn_aobj = 163 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 164 nnode->tn_reg.tn_aobj_pages = 0; 165 nnode->tn_size = 0; 166 break; 167 168 default: 169 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 170 } 171 172 TMPFS_NODE_LOCK(nnode); 173 TMPFS_LOCK(tmp); 174 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 175 tmp->tm_nodes_inuse++; 176 TMPFS_UNLOCK(tmp); 177 TMPFS_NODE_UNLOCK(nnode); 178 179 *node = nnode; 180 return 0; 181 } 182 183 /* --------------------------------------------------------------------- */ 184 185 /* 186 * Destroys the node pointed to by node from the file system 'tmp'. 187 * If the node does not belong to the given mount point, the results are 188 * unpredicted. 189 * 190 * If the node references a directory; no entries are allowed because 191 * their removal could need a recursive algorithm, something forbidden in 192 * kernel space. Furthermore, there is not need to provide such 193 * functionality (recursive removal) because the only primitives offered 194 * to the user are the removal of empty directories and the deletion of 195 * individual files. 196 * 197 * Note that nodes are not really deleted; in fact, when a node has been 198 * allocated, it cannot be deleted during the whole life of the file 199 * system. Instead, they are moved to the available list and remain there 200 * until reused. 201 */ 202 void 203 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 204 { 205 vm_pindex_t pages = 0; 206 207 #ifdef INVARIANTS 208 TMPFS_ASSERT_ELOCKED(node); 209 KKASSERT(node->tn_vnode == NULL); 210 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 211 #endif 212 213 TMPFS_LOCK(tmp); 214 LIST_REMOVE(node, tn_entries); 215 tmp->tm_nodes_inuse--; 216 TMPFS_UNLOCK(tmp); 217 TMPFS_NODE_UNLOCK(node); 218 219 switch (node->tn_type) { 220 case VNON: 221 /* Do not do anything. VNON is provided to let the 222 * allocation routine clean itself easily by avoiding 223 * duplicating code in it. */ 224 /* FALLTHROUGH */ 225 case VBLK: 226 /* FALLTHROUGH */ 227 case VCHR: 228 /* FALLTHROUGH */ 229 break; 230 case VDIR: 231 /* 232 * The parent link can be NULL if this is the root 233 * node or if it is a directory node that was rmdir'd. 234 * 235 * XXX what if node is a directory which still contains 236 * directory entries (e.g. due to a forced umount) ? 237 */ 238 node->tn_size = 0; 239 KKASSERT(node->tn_dir.tn_parent == NULL); 240 241 /* 242 * If the root node is being destroyed don't leave a 243 * dangling pointer in tmpfs_mount. 244 */ 245 if (node == tmp->tm_root) 246 tmp->tm_root = NULL; 247 break; 248 case VFIFO: 249 /* FALLTHROUGH */ 250 case VSOCK: 251 break; 252 253 case VLNK: 254 kfree(node->tn_link, tmp->tm_name_zone); 255 node->tn_link = NULL; 256 node->tn_size = 0; 257 break; 258 259 case VREG: 260 if (node->tn_reg.tn_aobj != NULL) 261 vm_object_deallocate(node->tn_reg.tn_aobj); 262 node->tn_reg.tn_aobj = NULL; 263 pages = node->tn_reg.tn_aobj_pages; 264 break; 265 266 default: 267 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 268 } 269 270 /* 271 * Clean up fields for the next allocation. The objcache only ctors 272 * new allocations. 273 */ 274 tmpfs_node_ctor(node, NULL, 0); 275 objcache_put(tmp->tm_node_pool, node); 276 /* node is now invalid */ 277 278 TMPFS_LOCK(tmp); 279 tmp->tm_pages_used -= pages; 280 TMPFS_UNLOCK(tmp); 281 } 282 283 /* --------------------------------------------------------------------- */ 284 285 /* 286 * Allocates a new directory entry for the node node with a name of name. 287 * The new directory entry is returned in *de. 288 * 289 * The link count of node is increased by one to reflect the new object 290 * referencing it. 291 * 292 * Returns zero on success or an appropriate error code on failure. 293 */ 294 int 295 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 296 const char *name, uint16_t len, struct tmpfs_dirent **de) 297 { 298 struct tmpfs_dirent *nde; 299 300 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 301 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 302 if (nde->td_name == NULL) { 303 objcache_put(tmp->tm_dirent_pool, nde); 304 *de = NULL; 305 return (ENOSPC); 306 } 307 nde->td_namelen = len; 308 bcopy(name, nde->td_name, len); 309 nde->td_name[len] = '\0'; 310 311 nde->td_node = node; 312 313 TMPFS_NODE_LOCK(node); 314 ++node->tn_links; 315 TMPFS_NODE_UNLOCK(node); 316 317 *de = nde; 318 319 return 0; 320 } 321 322 /* --------------------------------------------------------------------- */ 323 324 /* 325 * Frees a directory entry. It is the caller's responsibility to destroy 326 * the node referenced by it if needed. 327 * 328 * The link count of node is decreased by one to reflect the removal of an 329 * object that referenced it. This only happens if 'node_exists' is true; 330 * otherwise the function will not access the node referred to by the 331 * directory entry, as it may already have been released from the outside. 332 */ 333 void 334 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 335 { 336 struct tmpfs_node *node; 337 338 node = de->td_node; 339 340 TMPFS_NODE_LOCK(node); 341 TMPFS_ASSERT_ELOCKED(node); 342 KKASSERT(node->tn_links > 0); 343 node->tn_links--; 344 TMPFS_NODE_UNLOCK(node); 345 346 kfree(de->td_name, tmp->tm_name_zone); 347 de->td_namelen = 0; 348 de->td_name = NULL; 349 de->td_node = NULL; 350 objcache_put(tmp->tm_dirent_pool, de); 351 } 352 353 /* --------------------------------------------------------------------- */ 354 355 /* 356 * Allocates a new vnode for the node node or returns a new reference to 357 * an existing one if the node had already a vnode referencing it. The 358 * resulting locked vnode is returned in *vpp. 359 * 360 * Returns zero on success or an appropriate error code on failure. 361 */ 362 int 363 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 364 struct vnode **vpp) 365 { 366 int error = 0; 367 struct vnode *vp; 368 369 loop: 370 /* 371 * Interlocked extraction from node. This can race many things. 372 * We have to get a soft reference on the vnode while we hold 373 * the node locked, then acquire it properly and check for races. 374 */ 375 TMPFS_NODE_LOCK(node); 376 if ((vp = node->tn_vnode) != NULL) { 377 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 378 vhold(vp); 379 TMPFS_NODE_UNLOCK(node); 380 381 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 382 vdrop(vp); 383 goto loop; 384 } 385 if (node->tn_vnode != vp) { 386 vput(vp); 387 vdrop(vp); 388 goto loop; 389 } 390 vdrop(vp); 391 goto out; 392 } 393 /* vp is NULL */ 394 395 /* 396 * This should never happen. 397 */ 398 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 399 TMPFS_NODE_UNLOCK(node); 400 error = ENOENT; 401 goto out; 402 } 403 404 /* 405 * Interlock against other calls to tmpfs_alloc_vp() trying to 406 * allocate and assign a vp to node. 407 */ 408 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 409 node->tn_vpstate |= TMPFS_VNODE_WANT; 410 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 411 "tmpfs_alloc_vp", 0); 412 TMPFS_NODE_UNLOCK(node); 413 if (error) 414 return error; 415 goto loop; 416 } 417 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 418 TMPFS_NODE_UNLOCK(node); 419 420 /* 421 * Allocate a new vnode (may block). The ALLOCATING flag should 422 * prevent a race against someone else assigning node->tn_vnode. 423 */ 424 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 425 if (error != 0) 426 goto unlock; 427 428 KKASSERT(node->tn_vnode == NULL); 429 KKASSERT(vp != NULL); 430 vp->v_data = node; 431 vp->v_type = node->tn_type; 432 433 /* Type-specific initialization. */ 434 switch (node->tn_type) { 435 case VBLK: 436 /* FALLTHROUGH */ 437 case VCHR: 438 /* FALLTHROUGH */ 439 case VSOCK: 440 break; 441 case VREG: 442 vinitvmio(vp, node->tn_size, TMPFS_BLKMASK, -1); 443 break; 444 case VLNK: 445 break; 446 case VFIFO: 447 vp->v_ops = &mp->mnt_vn_fifo_ops; 448 break; 449 case VDIR: 450 break; 451 452 default: 453 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 454 } 455 456 457 unlock: 458 TMPFS_NODE_LOCK(node); 459 460 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 461 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 462 node->tn_vnode = vp; 463 464 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 465 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 466 TMPFS_NODE_UNLOCK(node); 467 wakeup(&node->tn_vpstate); 468 } else { 469 TMPFS_NODE_UNLOCK(node); 470 } 471 472 out: 473 *vpp = vp; 474 475 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 476 #ifdef INVARIANTS 477 TMPFS_NODE_LOCK(node); 478 KKASSERT(*vpp == node->tn_vnode); 479 TMPFS_NODE_UNLOCK(node); 480 #endif 481 482 return error; 483 } 484 485 /* --------------------------------------------------------------------- */ 486 487 /* 488 * Destroys the association between the vnode vp and the node it 489 * references. 490 */ 491 void 492 tmpfs_free_vp(struct vnode *vp) 493 { 494 struct tmpfs_node *node; 495 496 node = VP_TO_TMPFS_NODE(vp); 497 498 TMPFS_NODE_LOCK(node); 499 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 500 node->tn_vnode = NULL; 501 vp->v_data = NULL; 502 TMPFS_NODE_UNLOCK(node); 503 } 504 505 /* --------------------------------------------------------------------- */ 506 507 /* 508 * Allocates a new file of type 'type' and adds it to the parent directory 509 * 'dvp'; this addition is done using the component name given in 'cnp'. 510 * The ownership of the new file is automatically assigned based on the 511 * credentials of the caller (through 'cnp'), the group is set based on 512 * the parent directory and the mode is determined from the 'vap' argument. 513 * If successful, *vpp holds a vnode to the newly created file and zero 514 * is returned. Otherwise *vpp is NULL and the function returns an 515 * appropriate error code. 516 */ 517 int 518 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 519 struct namecache *ncp, struct ucred *cred, char *target) 520 { 521 int error; 522 struct tmpfs_dirent *de; 523 struct tmpfs_mount *tmp; 524 struct tmpfs_node *dnode; 525 struct tmpfs_node *node; 526 527 tmp = VFS_TO_TMPFS(dvp->v_mount); 528 dnode = VP_TO_TMPFS_DIR(dvp); 529 *vpp = NULL; 530 531 /* 532 * If the directory was removed but a process was CD'd into it, 533 * we do not allow any more file/dir creation within it. Otherwise 534 * we will lose track of it. 535 */ 536 KKASSERT(dnode->tn_type == VDIR); 537 if (dnode != tmp->tm_root && dnode->tn_dir.tn_parent == NULL) 538 return ENOENT; 539 540 /* 541 * Make sure the link count does not overflow. 542 */ 543 if (vap->va_type == VDIR && dnode->tn_links >= LINK_MAX) 544 return EMLINK; 545 546 /* Allocate a node that represents the new file. */ 547 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 548 dnode->tn_gid, vap->va_mode, target, 549 vap->va_rmajor, vap->va_rminor, &node); 550 if (error != 0) 551 return error; 552 TMPFS_NODE_LOCK(node); 553 554 /* Allocate a directory entry that points to the new file. */ 555 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 556 if (error != 0) { 557 tmpfs_free_node(tmp, node); 558 /* eats node lock */ 559 return error; 560 } 561 562 /* Allocate a vnode for the new file. */ 563 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 564 if (error != 0) { 565 tmpfs_free_dirent(tmp, de); 566 tmpfs_free_node(tmp, node); 567 /* eats node lock */ 568 return error; 569 } 570 571 /* 572 * Now that all required items are allocated, we can proceed to 573 * insert the new node into the directory, an operation that 574 * cannot fail. 575 */ 576 tmpfs_dir_attach(dnode, de); 577 TMPFS_NODE_UNLOCK(node); 578 579 return error; 580 } 581 582 /* --------------------------------------------------------------------- */ 583 584 /* 585 * Attaches the directory entry de to the directory represented by dnode. 586 * Note that this does not change the link count of the node pointed by 587 * the directory entry, as this is done by tmpfs_alloc_dirent. 588 */ 589 void 590 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 591 { 592 struct tmpfs_node *node = de->td_node; 593 594 TMPFS_NODE_LOCK(dnode); 595 if (node && node->tn_type == VDIR) { 596 TMPFS_NODE_LOCK(node); 597 ++node->tn_links; 598 node->tn_status |= TMPFS_NODE_CHANGED; 599 node->tn_dir.tn_parent = dnode; 600 ++dnode->tn_links; 601 TMPFS_NODE_UNLOCK(node); 602 } 603 RB_INSERT(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 604 RB_INSERT(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 605 dnode->tn_size += sizeof(struct tmpfs_dirent); 606 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 607 TMPFS_NODE_MODIFIED; 608 TMPFS_NODE_UNLOCK(dnode); 609 } 610 611 /* --------------------------------------------------------------------- */ 612 613 /* 614 * Detaches the directory entry de from the directory represented by dnode. 615 * Note that this does not change the link count of the node pointed by 616 * the directory entry, as this is done by tmpfs_free_dirent. 617 */ 618 void 619 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 620 { 621 struct tmpfs_node *node = de->td_node; 622 623 TMPFS_NODE_LOCK(dnode); 624 RB_REMOVE(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 625 RB_REMOVE(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 626 dnode->tn_size -= sizeof(struct tmpfs_dirent); 627 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 628 TMPFS_NODE_MODIFIED; 629 TMPFS_NODE_UNLOCK(dnode); 630 631 /* 632 * Clean out the tn_parent pointer immediately when removing a 633 * directory. 634 * 635 * Removal of the parent linkage also cleans out the extra tn_links 636 * count we had on both node and dnode. 637 * 638 * node can be NULL (typ during a forced umount), in which case 639 * the mount code is dealing with the linkages from a linked list 640 * scan. 641 */ 642 if (node && node->tn_type == VDIR && node->tn_dir.tn_parent) { 643 TMPFS_NODE_LOCK(dnode); 644 TMPFS_NODE_LOCK(node); 645 KKASSERT(node->tn_dir.tn_parent == dnode); 646 dnode->tn_links--; 647 node->tn_links--; 648 node->tn_dir.tn_parent = NULL; 649 TMPFS_NODE_UNLOCK(node); 650 TMPFS_NODE_UNLOCK(dnode); 651 } 652 } 653 654 /* --------------------------------------------------------------------- */ 655 656 /* 657 * Looks for a directory entry in the directory represented by node. 658 * 'ncp' describes the name of the entry to look for. Note that the . 659 * and .. components are not allowed as they do not physically exist 660 * within directories. 661 * 662 * Returns a pointer to the entry when found, otherwise NULL. 663 * 664 * Caller must hold the node locked (shared ok) 665 */ 666 struct tmpfs_dirent * 667 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 668 struct namecache *ncp) 669 { 670 struct tmpfs_dirent *de; 671 int len = ncp->nc_nlen; 672 struct tmpfs_dirent wanted; 673 674 wanted.td_namelen = len; 675 wanted.td_name = ncp->nc_name; 676 677 TMPFS_VALIDATE_DIR(node); 678 679 de = RB_FIND(tmpfs_dirtree, &node->tn_dir.tn_dirtree, &wanted); 680 681 KKASSERT(f == NULL || f == de->td_node); 682 683 return de; 684 } 685 686 /* --------------------------------------------------------------------- */ 687 688 /* 689 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 690 * directory and returns it in the uio space. The function returns 0 691 * on success, -1 if there was not enough space in the uio structure to 692 * hold the directory entry or an appropriate error code if another 693 * error happens. 694 */ 695 int 696 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 697 { 698 int error; 699 struct dirent dent; 700 int dirsize; 701 702 TMPFS_VALIDATE_DIR(node); 703 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 704 705 dent.d_ino = node->tn_id; 706 dent.d_type = DT_DIR; 707 dent.d_namlen = 1; 708 dent.d_name[0] = '.'; 709 dent.d_name[1] = '\0'; 710 dirsize = _DIRENT_DIRSIZ(&dent); 711 712 if (dirsize > uio->uio_resid) 713 error = -1; 714 else { 715 error = uiomove((caddr_t)&dent, dirsize, uio); 716 if (error == 0) 717 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 718 } 719 return error; 720 } 721 722 /* --------------------------------------------------------------------- */ 723 724 /* 725 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 726 * directory and returns it in the uio space. The function returns 0 727 * on success, -1 if there was not enough space in the uio structure to 728 * hold the directory entry or an appropriate error code if another 729 * error happens. 730 */ 731 int 732 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 733 struct uio *uio) 734 { 735 int error; 736 struct dirent dent; 737 int dirsize; 738 739 TMPFS_VALIDATE_DIR(node); 740 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 741 742 if (node->tn_dir.tn_parent) { 743 TMPFS_NODE_LOCK(node); 744 if (node->tn_dir.tn_parent) 745 dent.d_ino = node->tn_dir.tn_parent->tn_id; 746 else 747 dent.d_ino = tmp->tm_root->tn_id; 748 TMPFS_NODE_UNLOCK(node); 749 } else { 750 dent.d_ino = tmp->tm_root->tn_id; 751 } 752 753 dent.d_type = DT_DIR; 754 dent.d_namlen = 2; 755 dent.d_name[0] = '.'; 756 dent.d_name[1] = '.'; 757 dent.d_name[2] = '\0'; 758 dirsize = _DIRENT_DIRSIZ(&dent); 759 760 if (dirsize > uio->uio_resid) 761 error = -1; 762 else { 763 error = uiomove((caddr_t)&dent, dirsize, uio); 764 if (error == 0) { 765 struct tmpfs_dirent *de; 766 767 de = RB_MIN(tmpfs_dirtree_cookie, 768 &node->tn_dir.tn_cookietree); 769 if (de == NULL) 770 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 771 else 772 uio->uio_offset = tmpfs_dircookie(de); 773 } 774 } 775 return error; 776 } 777 778 /* --------------------------------------------------------------------- */ 779 780 /* 781 * Lookup a directory entry by its associated cookie. 782 * 783 * Must be called with the directory node locked (shared ok) 784 */ 785 struct lubycookie_info { 786 off_t cookie; 787 struct tmpfs_dirent *de; 788 }; 789 790 static int 791 lubycookie_cmp(struct tmpfs_dirent *de, void *arg) 792 { 793 struct lubycookie_info *info = arg; 794 off_t cookie = tmpfs_dircookie(de); 795 796 if (cookie < info->cookie) 797 return(-1); 798 if (cookie > info->cookie) 799 return(1); 800 return(0); 801 } 802 803 static int 804 lubycookie_callback(struct tmpfs_dirent *de, void *arg) 805 { 806 struct lubycookie_info *info = arg; 807 808 if (tmpfs_dircookie(de) == info->cookie) { 809 info->de = de; 810 return(-1); 811 } 812 return(0); 813 } 814 815 struct tmpfs_dirent * 816 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 817 { 818 struct lubycookie_info info; 819 820 info.cookie = cookie; 821 info.de = NULL; 822 RB_SCAN(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree, 823 lubycookie_cmp, lubycookie_callback, &info); 824 return (info.de); 825 } 826 827 /* --------------------------------------------------------------------- */ 828 829 /* 830 * Helper function for tmpfs_readdir. Returns as much directory entries 831 * as can fit in the uio space. The read starts at uio->uio_offset. 832 * The function returns 0 on success, -1 if there was not enough space 833 * in the uio structure to hold the directory entry or an appropriate 834 * error code if another error happens. 835 * 836 * Caller must hold the node locked (shared ok) 837 */ 838 int 839 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 840 { 841 int error; 842 off_t startcookie; 843 struct tmpfs_dirent *de; 844 845 TMPFS_VALIDATE_DIR(node); 846 847 /* 848 * Locate the first directory entry we have to return. We have cached 849 * the last readdir in the node, so use those values if appropriate. 850 * Otherwise do a linear scan to find the requested entry. 851 */ 852 startcookie = uio->uio_offset; 853 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 854 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 855 856 if (startcookie == TMPFS_DIRCOOKIE_EOF) 857 return 0; 858 859 de = tmpfs_dir_lookupbycookie(node, startcookie); 860 if (de == NULL) 861 return EINVAL; 862 863 /* 864 * Read as much entries as possible; i.e., until we reach the end of 865 * the directory or we exhaust uio space. 866 */ 867 do { 868 struct dirent d; 869 int reclen; 870 871 /* Create a dirent structure representing the current 872 * tmpfs_node and fill it. */ 873 d.d_ino = de->td_node->tn_id; 874 switch (de->td_node->tn_type) { 875 case VBLK: 876 d.d_type = DT_BLK; 877 break; 878 879 case VCHR: 880 d.d_type = DT_CHR; 881 break; 882 883 case VDIR: 884 d.d_type = DT_DIR; 885 break; 886 887 case VFIFO: 888 d.d_type = DT_FIFO; 889 break; 890 891 case VLNK: 892 d.d_type = DT_LNK; 893 break; 894 895 case VREG: 896 d.d_type = DT_REG; 897 break; 898 899 case VSOCK: 900 d.d_type = DT_SOCK; 901 break; 902 903 default: 904 panic("tmpfs_dir_getdents: type %p %d", 905 de->td_node, (int)de->td_node->tn_type); 906 } 907 d.d_namlen = de->td_namelen; 908 KKASSERT(de->td_namelen < sizeof(d.d_name)); 909 bcopy(de->td_name, d.d_name, d.d_namlen); 910 d.d_name[d.d_namlen] = '\0'; 911 reclen = _DIRENT_RECLEN(d.d_namlen); 912 913 /* Stop reading if the directory entry we are treating is 914 * bigger than the amount of data that can be returned. */ 915 if (reclen > uio->uio_resid) { 916 error = -1; 917 break; 918 } 919 920 /* Copy the new dirent structure into the output buffer and 921 * advance pointers. */ 922 error = uiomove((caddr_t)&d, reclen, uio); 923 924 (*cntp)++; 925 de = RB_NEXT(tmpfs_dirtree_cookie, 926 node->tn_dir.tn_cookietree, de); 927 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 928 929 /* Update the offset and cache. */ 930 if (de == NULL) { 931 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 932 } else { 933 uio->uio_offset = tmpfs_dircookie(de); 934 } 935 936 return error; 937 } 938 939 /* --------------------------------------------------------------------- */ 940 941 /* 942 * Resizes the aobj associated to the regular file pointed to by vp to 943 * the size newsize. 'vp' must point to a vnode that represents a regular 944 * file. 'newsize' must be positive. 945 * 946 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 947 * to be zero filled. 948 * 949 * Returns zero on success or an appropriate error code on failure. 950 */ 951 int 952 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 953 { 954 int error; 955 vm_pindex_t newpages, oldpages; 956 struct tmpfs_mount *tmp; 957 struct tmpfs_node *node; 958 off_t oldsize; 959 960 #ifdef INVARIANTS 961 KKASSERT(vp->v_type == VREG); 962 KKASSERT(newsize >= 0); 963 #endif 964 965 node = VP_TO_TMPFS_NODE(vp); 966 tmp = VFS_TO_TMPFS(vp->v_mount); 967 968 /* 969 * Convert the old and new sizes to the number of pages needed to 970 * store them. It may happen that we do not need to do anything 971 * because the last allocated page can accommodate the change on 972 * its own. 973 */ 974 TMPFS_NODE_LOCK(node); 975 oldsize = node->tn_size; 976 oldpages = round_page64(oldsize) / PAGE_SIZE; 977 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 978 newpages = round_page64(newsize) / PAGE_SIZE; 979 980 if (newpages > oldpages && 981 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 982 TMPFS_NODE_UNLOCK(node); 983 error = ENOSPC; 984 goto out; 985 } 986 node->tn_reg.tn_aobj_pages = newpages; 987 node->tn_size = newsize; 988 TMPFS_NODE_UNLOCK(node); 989 990 TMPFS_LOCK(tmp); 991 tmp->tm_pages_used += (newpages - oldpages); 992 TMPFS_UNLOCK(tmp); 993 994 /* 995 * When adjusting the vnode filesize and its VM object we must 996 * also adjust our backing VM object (aobj). The blocksize 997 * used must match the block sized we use for the buffer cache. 998 * 999 * The backing VM object contains no VM pages, only swap 1000 * assignments. 1001 */ 1002 if (newsize < oldsize) { 1003 vm_pindex_t osize; 1004 vm_pindex_t nsize; 1005 vm_object_t aobj; 1006 1007 error = nvtruncbuf(vp, newsize, TMPFS_BLKSIZE, -1, 0); 1008 aobj = node->tn_reg.tn_aobj; 1009 if (aobj) { 1010 osize = aobj->size; 1011 nsize = vp->v_object->size; 1012 if (nsize < osize) { 1013 aobj->size = osize; 1014 swap_pager_freespace(aobj, nsize, 1015 osize - nsize); 1016 } 1017 } 1018 } else { 1019 vm_object_t aobj; 1020 1021 error = nvextendbuf(vp, oldsize, newsize, 1022 TMPFS_BLKSIZE, TMPFS_BLKSIZE, 1023 -1, -1, trivial); 1024 aobj = node->tn_reg.tn_aobj; 1025 if (aobj) 1026 aobj->size = vp->v_object->size; 1027 } 1028 1029 out: 1030 return error; 1031 } 1032 1033 /* --------------------------------------------------------------------- */ 1034 1035 /* 1036 * Change flags of the given vnode. 1037 * Caller should execute tmpfs_update on vp after a successful execution. 1038 * The vnode must be locked on entry and remain locked on exit. 1039 */ 1040 int 1041 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1042 { 1043 int error; 1044 struct tmpfs_node *node; 1045 int flags; 1046 1047 KKASSERT(vn_islocked(vp)); 1048 1049 node = VP_TO_TMPFS_NODE(vp); 1050 flags = node->tn_flags; 1051 1052 /* Disallow this operation if the file system is mounted read-only. */ 1053 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1054 return EROFS; 1055 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1056 1057 /* Actually change the flags on the node itself */ 1058 if (error == 0) { 1059 TMPFS_NODE_LOCK(node); 1060 node->tn_flags = flags; 1061 node->tn_status |= TMPFS_NODE_CHANGED; 1062 TMPFS_NODE_UNLOCK(node); 1063 } 1064 1065 KKASSERT(vn_islocked(vp)); 1066 1067 return error; 1068 } 1069 1070 /* --------------------------------------------------------------------- */ 1071 1072 /* 1073 * Change access mode on the given vnode. 1074 * Caller should execute tmpfs_update on vp after a successful execution. 1075 * The vnode must be locked on entry and remain locked on exit. 1076 */ 1077 int 1078 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1079 { 1080 struct tmpfs_node *node; 1081 mode_t cur_mode; 1082 int error; 1083 1084 KKASSERT(vn_islocked(vp)); 1085 1086 node = VP_TO_TMPFS_NODE(vp); 1087 1088 /* Disallow this operation if the file system is mounted read-only. */ 1089 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1090 return EROFS; 1091 1092 /* Immutable or append-only files cannot be modified, either. */ 1093 if (node->tn_flags & (IMMUTABLE | APPEND)) 1094 return EPERM; 1095 1096 cur_mode = node->tn_mode; 1097 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1098 &cur_mode); 1099 1100 if (error == 0 && 1101 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1102 TMPFS_NODE_LOCK(node); 1103 node->tn_mode &= ~ALLPERMS; 1104 node->tn_mode |= cur_mode & ALLPERMS; 1105 1106 node->tn_status |= TMPFS_NODE_CHANGED; 1107 TMPFS_NODE_UNLOCK(node); 1108 } 1109 1110 KKASSERT(vn_islocked(vp)); 1111 1112 return 0; 1113 } 1114 1115 /* --------------------------------------------------------------------- */ 1116 1117 /* 1118 * Change ownership of the given vnode. At least one of uid or gid must 1119 * be different than VNOVAL. If one is set to that value, the attribute 1120 * is unchanged. 1121 * Caller should execute tmpfs_update on vp after a successful execution. 1122 * The vnode must be locked on entry and remain locked on exit. 1123 */ 1124 int 1125 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1126 { 1127 mode_t cur_mode; 1128 uid_t cur_uid; 1129 gid_t cur_gid; 1130 struct tmpfs_node *node; 1131 int error; 1132 1133 KKASSERT(vn_islocked(vp)); 1134 node = VP_TO_TMPFS_NODE(vp); 1135 1136 /* Disallow this operation if the file system is mounted read-only. */ 1137 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1138 return EROFS; 1139 1140 /* Immutable or append-only files cannot be modified, either. */ 1141 if (node->tn_flags & (IMMUTABLE | APPEND)) 1142 return EPERM; 1143 1144 cur_uid = node->tn_uid; 1145 cur_gid = node->tn_gid; 1146 cur_mode = node->tn_mode; 1147 error = vop_helper_chown(vp, uid, gid, cred, 1148 &cur_uid, &cur_gid, &cur_mode); 1149 1150 if (error == 0) { 1151 TMPFS_NODE_LOCK(node); 1152 if (cur_uid != node->tn_uid || 1153 cur_gid != node->tn_gid || 1154 cur_mode != node->tn_mode) { 1155 node->tn_uid = cur_uid; 1156 node->tn_gid = cur_gid; 1157 node->tn_mode = cur_mode; 1158 node->tn_status |= TMPFS_NODE_CHANGED; 1159 } 1160 TMPFS_NODE_UNLOCK(node); 1161 } 1162 1163 return error; 1164 } 1165 1166 /* --------------------------------------------------------------------- */ 1167 1168 /* 1169 * Change size of the given vnode. 1170 * Caller should execute tmpfs_update on vp after a successful execution. 1171 * The vnode must be locked on entry and remain locked on exit. 1172 */ 1173 int 1174 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1175 { 1176 int error; 1177 struct tmpfs_node *node; 1178 1179 KKASSERT(vn_islocked(vp)); 1180 1181 node = VP_TO_TMPFS_NODE(vp); 1182 1183 /* Decide whether this is a valid operation based on the file type. */ 1184 error = 0; 1185 switch (vp->v_type) { 1186 case VDIR: 1187 return EISDIR; 1188 1189 case VREG: 1190 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1191 return EROFS; 1192 break; 1193 1194 case VBLK: 1195 /* FALLTHROUGH */ 1196 case VCHR: 1197 /* FALLTHROUGH */ 1198 case VFIFO: 1199 /* Allow modifications of special files even if in the file 1200 * system is mounted read-only (we are not modifying the 1201 * files themselves, but the objects they represent). */ 1202 return 0; 1203 1204 default: 1205 /* Anything else is unsupported. */ 1206 return EOPNOTSUPP; 1207 } 1208 1209 /* Immutable or append-only files cannot be modified, either. */ 1210 if (node->tn_flags & (IMMUTABLE | APPEND)) 1211 return EPERM; 1212 1213 error = tmpfs_truncate(vp, size); 1214 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1215 * for us, as will update tn_status; no need to do that here. */ 1216 1217 KKASSERT(vn_islocked(vp)); 1218 1219 return error; 1220 } 1221 1222 /* --------------------------------------------------------------------- */ 1223 1224 /* 1225 * Change access and modification times of the given vnode. 1226 * Caller should execute tmpfs_update on vp after a successful execution. 1227 * The vnode must be locked on entry and remain locked on exit. 1228 */ 1229 int 1230 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1231 int vaflags, struct ucred *cred) 1232 { 1233 struct tmpfs_node *node; 1234 1235 KKASSERT(vn_islocked(vp)); 1236 1237 node = VP_TO_TMPFS_NODE(vp); 1238 1239 /* Disallow this operation if the file system is mounted read-only. */ 1240 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1241 return EROFS; 1242 1243 /* Immutable or append-only files cannot be modified, either. */ 1244 if (node->tn_flags & (IMMUTABLE | APPEND)) 1245 return EPERM; 1246 1247 TMPFS_NODE_LOCK(node); 1248 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1249 node->tn_status |= TMPFS_NODE_ACCESSED; 1250 1251 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1252 node->tn_status |= TMPFS_NODE_MODIFIED; 1253 1254 TMPFS_NODE_UNLOCK(node); 1255 1256 tmpfs_itimes(vp, atime, mtime); 1257 1258 KKASSERT(vn_islocked(vp)); 1259 1260 return 0; 1261 } 1262 1263 /* --------------------------------------------------------------------- */ 1264 /* Sync timestamps */ 1265 void 1266 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1267 const struct timespec *mod) 1268 { 1269 struct tmpfs_node *node; 1270 struct timespec now; 1271 1272 node = VP_TO_TMPFS_NODE(vp); 1273 1274 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1275 TMPFS_NODE_CHANGED)) == 0) 1276 return; 1277 1278 vfs_timestamp(&now); 1279 1280 TMPFS_NODE_LOCK(node); 1281 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1282 if (acc == NULL) 1283 acc = &now; 1284 node->tn_atime = acc->tv_sec; 1285 node->tn_atimensec = acc->tv_nsec; 1286 } 1287 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1288 if (mod == NULL) 1289 mod = &now; 1290 node->tn_mtime = mod->tv_sec; 1291 node->tn_mtimensec = mod->tv_nsec; 1292 } 1293 if (node->tn_status & TMPFS_NODE_CHANGED) { 1294 node->tn_ctime = now.tv_sec; 1295 node->tn_ctimensec = now.tv_nsec; 1296 } 1297 node->tn_status &= 1298 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1299 TMPFS_NODE_UNLOCK(node); 1300 } 1301 1302 /* --------------------------------------------------------------------- */ 1303 1304 void 1305 tmpfs_update(struct vnode *vp) 1306 { 1307 tmpfs_itimes(vp, NULL, NULL); 1308 } 1309 1310 /* --------------------------------------------------------------------- */ 1311 1312 int 1313 tmpfs_truncate(struct vnode *vp, off_t length) 1314 { 1315 int error; 1316 struct tmpfs_node *node; 1317 1318 node = VP_TO_TMPFS_NODE(vp); 1319 1320 if (length < 0) { 1321 error = EINVAL; 1322 goto out; 1323 } 1324 1325 if (node->tn_size == length) { 1326 error = 0; 1327 goto out; 1328 } 1329 1330 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1331 return (EFBIG); 1332 1333 1334 error = tmpfs_reg_resize(vp, length, 1); 1335 1336 if (error == 0) { 1337 TMPFS_NODE_LOCK(node); 1338 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1339 TMPFS_NODE_UNLOCK(node); 1340 } 1341 1342 out: 1343 tmpfs_update(vp); 1344 1345 return error; 1346 } 1347 1348 /* --------------------------------------------------------------------- */ 1349 1350 static ino_t 1351 tmpfs_fetch_ino(struct tmpfs_mount *tmp) 1352 { 1353 ino_t ret; 1354 1355 TMPFS_LOCK(tmp); 1356 ret = tmp->tm_ino++; 1357 TMPFS_UNLOCK(tmp); 1358 1359 return (ret); 1360 } 1361 1362 static int 1363 tmpfs_dirtree_compare(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1364 { 1365 if (a->td_namelen > b->td_namelen) 1366 return 1; 1367 else if (a->td_namelen < b->td_namelen) 1368 return -1; 1369 else 1370 return strncmp(a->td_name, b->td_name, a->td_namelen); 1371 } 1372 1373 static int 1374 tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1375 { 1376 if (a < b) 1377 return(-1); 1378 if (a > b) 1379 return(1); 1380 return 0; 1381 } 1382