1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/spinlock2.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <vfs/tmpfs/tmpfs.h> 55 #include <vfs/tmpfs/tmpfs_fifoops.h> 56 #include <vfs/tmpfs/tmpfs_vnops.h> 57 58 static ino_t t_ino = 2; 59 static struct spinlock ino_lock; 60 static ino_t tmpfs_fetch_ino(void); 61 62 /* --------------------------------------------------------------------- */ 63 64 /* 65 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 66 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 67 * using the credentials of the process 'p'. 68 * 69 * If the node type is set to 'VDIR', then the parent parameter must point 70 * to the parent directory of the node being created. It may only be NULL 71 * while allocating the root node. 72 * 73 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 74 * specifies the device the node represents. 75 * 76 * If the node type is set to 'VLNK', then the parameter target specifies 77 * the file name of the target file for the symbolic link that is being 78 * created. 79 * 80 * Note that new nodes are retrieved from the available list if it has 81 * items or, if it is empty, from the node pool as long as there is enough 82 * space to create them. 83 * 84 * Returns zero on success or an appropriate error code on failure. 85 */ 86 int 87 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 88 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 89 char *target, int rmajor, int rminor, struct tmpfs_node **node) 90 { 91 struct tmpfs_node *nnode; 92 struct timespec ts; 93 udev_t rdev; 94 95 /* If the root directory of the 'tmp' file system is not yet 96 * allocated, this must be the request to do it. */ 97 KKASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 98 99 KKASSERT(IFF(type == VLNK, target != NULL)); 100 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 101 102 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 103 return (ENOSPC); 104 105 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 106 if (nnode == NULL) 107 return (ENOSPC); 108 109 /* Generic initialization. */ 110 nnode->tn_type = type; 111 vfs_timestamp(&ts); 112 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 113 = ts.tv_sec; 114 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 115 = ts.tv_nsec; 116 nnode->tn_uid = uid; 117 nnode->tn_gid = gid; 118 nnode->tn_mode = mode; 119 nnode->tn_id = tmpfs_fetch_ino(); 120 nnode->tn_advlock.init_done = 0; 121 122 /* Type-specific initialization. */ 123 switch (nnode->tn_type) { 124 case VBLK: 125 case VCHR: 126 rdev = makeudev(rmajor, rminor); 127 if (rdev == NOUDEV) { 128 objcache_put(tmp->tm_node_pool, nnode); 129 return(EINVAL); 130 } 131 nnode->tn_rdev = rdev; 132 break; 133 134 case VDIR: 135 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 136 KKASSERT(parent != nnode); 137 KKASSERT(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 138 nnode->tn_dir.tn_parent = parent; 139 nnode->tn_dir.tn_readdir_lastn = 0; 140 nnode->tn_dir.tn_readdir_lastp = NULL; 141 nnode->tn_links++; 142 nnode->tn_size = 0; 143 if (parent) { 144 TMPFS_NODE_LOCK(parent); 145 parent->tn_links++; 146 TMPFS_NODE_UNLOCK(parent); 147 } 148 break; 149 150 case VFIFO: 151 /* FALLTHROUGH */ 152 case VSOCK: 153 break; 154 155 case VLNK: 156 nnode->tn_size = strlen(target); 157 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 158 M_WAITOK | M_NULLOK); 159 if (nnode->tn_link == NULL) { 160 objcache_put(tmp->tm_node_pool, nnode); 161 return (ENOSPC); 162 } 163 bcopy(target, nnode->tn_link, nnode->tn_size); 164 nnode->tn_link[nnode->tn_size] = '\0'; 165 break; 166 167 case VREG: 168 nnode->tn_reg.tn_aobj = 169 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 170 nnode->tn_reg.tn_aobj_pages = 0; 171 nnode->tn_size = 0; 172 break; 173 174 default: 175 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 176 } 177 178 TMPFS_NODE_LOCK(nnode); 179 TMPFS_LOCK(tmp); 180 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 181 tmp->tm_nodes_inuse++; 182 TMPFS_UNLOCK(tmp); 183 TMPFS_NODE_UNLOCK(nnode); 184 185 *node = nnode; 186 return 0; 187 } 188 189 /* --------------------------------------------------------------------- */ 190 191 /* 192 * Destroys the node pointed to by node from the file system 'tmp'. 193 * If the node does not belong to the given mount point, the results are 194 * unpredicted. 195 * 196 * If the node references a directory; no entries are allowed because 197 * their removal could need a recursive algorithm, something forbidden in 198 * kernel space. Furthermore, there is not need to provide such 199 * functionality (recursive removal) because the only primitives offered 200 * to the user are the removal of empty directories and the deletion of 201 * individual files. 202 * 203 * Note that nodes are not really deleted; in fact, when a node has been 204 * allocated, it cannot be deleted during the whole life of the file 205 * system. Instead, they are moved to the available list and remain there 206 * until reused. 207 */ 208 void 209 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 210 { 211 vm_pindex_t pages = 0; 212 213 #ifdef INVARIANTS 214 TMPFS_ASSERT_ELOCKED(node); 215 KKASSERT(node->tn_vnode == NULL); 216 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 217 #endif 218 219 TMPFS_LOCK(tmp); 220 LIST_REMOVE(node, tn_entries); 221 tmp->tm_nodes_inuse--; 222 TMPFS_UNLOCK(tmp); 223 TMPFS_NODE_UNLOCK(node); 224 225 switch (node->tn_type) { 226 case VNON: 227 /* Do not do anything. VNON is provided to let the 228 * allocation routine clean itself easily by avoiding 229 * duplicating code in it. */ 230 /* FALLTHROUGH */ 231 case VBLK: 232 /* FALLTHROUGH */ 233 case VCHR: 234 /* FALLTHROUGH */ 235 break; 236 case VDIR: 237 /* 238 * The parent link can be NULL if this is the root 239 * node. 240 */ 241 node->tn_links--; 242 node->tn_size = 0; 243 KKASSERT(node->tn_dir.tn_parent || node == tmp->tm_root); 244 if (node->tn_dir.tn_parent) { 245 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 246 node->tn_dir.tn_parent->tn_links--; 247 248 /* 249 * If the parent directory has no more links and 250 * no vnode ref nothing is going to come along 251 * and clean it up unless we do it here. 252 */ 253 if (node->tn_dir.tn_parent->tn_links == 0 && 254 node->tn_dir.tn_parent->tn_vnode == NULL) { 255 tmpfs_free_node(tmp, node->tn_dir.tn_parent); 256 /* eats parent lock */ 257 } else { 258 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 259 } 260 node->tn_dir.tn_parent = NULL; 261 } 262 263 /* 264 * If the root node is being destroyed don't leave a 265 * dangling pointer in tmpfs_mount. 266 */ 267 if (node == tmp->tm_root) 268 tmp->tm_root = NULL; 269 break; 270 case VFIFO: 271 /* FALLTHROUGH */ 272 case VSOCK: 273 break; 274 275 case VLNK: 276 kfree(node->tn_link, tmp->tm_name_zone); 277 node->tn_link = NULL; 278 node->tn_size = 0; 279 break; 280 281 case VREG: 282 if (node->tn_reg.tn_aobj != NULL) 283 vm_object_deallocate(node->tn_reg.tn_aobj); 284 node->tn_reg.tn_aobj = NULL; 285 pages = node->tn_reg.tn_aobj_pages; 286 break; 287 288 default: 289 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 290 } 291 292 /* 293 * Clean up fields for the next allocation. The objcache only ctors 294 * new allocations. 295 */ 296 tmpfs_node_ctor(node, NULL, 0); 297 objcache_put(tmp->tm_node_pool, node); 298 /* node is now invalid */ 299 300 TMPFS_LOCK(tmp); 301 tmp->tm_pages_used -= pages; 302 TMPFS_UNLOCK(tmp); 303 } 304 305 /* --------------------------------------------------------------------- */ 306 307 /* 308 * Allocates a new directory entry for the node node with a name of name. 309 * The new directory entry is returned in *de. 310 * 311 * The link count of node is increased by one to reflect the new object 312 * referencing it. 313 * 314 * Returns zero on success or an appropriate error code on failure. 315 */ 316 int 317 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 318 const char *name, uint16_t len, struct tmpfs_dirent **de) 319 { 320 struct tmpfs_dirent *nde; 321 322 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 323 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 324 if (nde->td_name == NULL) { 325 objcache_put(tmp->tm_dirent_pool, nde); 326 *de = NULL; 327 return (ENOSPC); 328 } 329 nde->td_namelen = len; 330 bcopy(name, nde->td_name, len); 331 nde->td_name[len] = '\0'; 332 333 nde->td_node = node; 334 335 TMPFS_NODE_LOCK(node); 336 node->tn_links++; 337 TMPFS_NODE_UNLOCK(node); 338 339 *de = nde; 340 341 return 0; 342 } 343 344 /* --------------------------------------------------------------------- */ 345 346 /* 347 * Frees a directory entry. It is the caller's responsibility to destroy 348 * the node referenced by it if needed. 349 * 350 * The link count of node is decreased by one to reflect the removal of an 351 * object that referenced it. This only happens if 'node_exists' is true; 352 * otherwise the function will not access the node referred to by the 353 * directory entry, as it may already have been released from the outside. 354 */ 355 void 356 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 357 { 358 struct tmpfs_node *node; 359 360 node = de->td_node; 361 362 TMPFS_NODE_LOCK(node); 363 TMPFS_ASSERT_ELOCKED(node); 364 KKASSERT(node->tn_links > 0); 365 node->tn_links--; 366 TMPFS_NODE_UNLOCK(node); 367 368 kfree(de->td_name, tmp->tm_name_zone); 369 de->td_namelen = 0; 370 de->td_name = NULL; 371 de->td_node = NULL; 372 objcache_put(tmp->tm_dirent_pool, de); 373 } 374 375 /* --------------------------------------------------------------------- */ 376 377 /* 378 * Allocates a new vnode for the node node or returns a new reference to 379 * an existing one if the node had already a vnode referencing it. The 380 * resulting locked vnode is returned in *vpp. 381 * 382 * Returns zero on success or an appropriate error code on failure. 383 */ 384 int 385 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 386 struct vnode **vpp) 387 { 388 int error = 0; 389 struct vnode *vp; 390 391 loop: 392 /* 393 * Interlocked extraction from node. This can race many things. 394 * We have to get a soft reference on the vnode while we hold 395 * the node locked, then acquire it properly and check for races. 396 */ 397 TMPFS_NODE_LOCK(node); 398 if ((vp = node->tn_vnode) != NULL) { 399 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 400 vhold_interlocked(vp); 401 TMPFS_NODE_UNLOCK(node); 402 403 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 404 vdrop(vp); 405 goto loop; 406 } 407 if (node->tn_vnode != vp) { 408 vput(vp); 409 vdrop(vp); 410 goto loop; 411 } 412 vdrop(vp); 413 goto out; 414 } 415 /* vp is NULL */ 416 417 /* 418 * This should never happen. 419 */ 420 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 421 TMPFS_NODE_UNLOCK(node); 422 error = ENOENT; 423 goto out; 424 } 425 426 /* 427 * Interlock against other calls to tmpfs_alloc_vp() trying to 428 * allocate and assign a vp to node. 429 */ 430 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 431 node->tn_vpstate |= TMPFS_VNODE_WANT; 432 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 433 "tmpfs_alloc_vp", 0); 434 TMPFS_NODE_UNLOCK(node); 435 if (error) 436 return error; 437 goto loop; 438 } 439 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 440 TMPFS_NODE_UNLOCK(node); 441 442 /* 443 * Allocate a new vnode (may block). The ALLOCATING flag should 444 * prevent a race against someone else assigning node->tn_vnode. 445 */ 446 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 447 if (error != 0) 448 goto unlock; 449 450 KKASSERT(node->tn_vnode == NULL); 451 KKASSERT(vp != NULL); 452 vp->v_data = node; 453 vp->v_type = node->tn_type; 454 455 /* Type-specific initialization. */ 456 switch (node->tn_type) { 457 case VBLK: 458 /* FALLTHROUGH */ 459 case VCHR: 460 /* FALLTHROUGH */ 461 case VSOCK: 462 break; 463 case VREG: 464 vinitvmio(vp, node->tn_size, BMASK, -1); 465 break; 466 case VLNK: 467 break; 468 case VFIFO: 469 vp->v_ops = &mp->mnt_vn_fifo_ops; 470 break; 471 case VDIR: 472 break; 473 474 default: 475 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 476 } 477 478 insmntque(vp, mp); 479 480 unlock: 481 TMPFS_NODE_LOCK(node); 482 483 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 484 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 485 node->tn_vnode = vp; 486 487 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 488 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 489 TMPFS_NODE_UNLOCK(node); 490 wakeup(&node->tn_vpstate); 491 } else { 492 TMPFS_NODE_UNLOCK(node); 493 } 494 495 out: 496 *vpp = vp; 497 498 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 499 #ifdef INVARIANTS 500 TMPFS_NODE_LOCK(node); 501 KKASSERT(*vpp == node->tn_vnode); 502 TMPFS_NODE_UNLOCK(node); 503 #endif 504 505 return error; 506 } 507 508 /* --------------------------------------------------------------------- */ 509 510 /* 511 * Destroys the association between the vnode vp and the node it 512 * references. 513 */ 514 void 515 tmpfs_free_vp(struct vnode *vp) 516 { 517 struct tmpfs_node *node; 518 519 node = VP_TO_TMPFS_NODE(vp); 520 521 TMPFS_NODE_LOCK(node); 522 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 523 node->tn_vnode = NULL; 524 TMPFS_NODE_UNLOCK(node); 525 vp->v_data = NULL; 526 } 527 528 /* --------------------------------------------------------------------- */ 529 530 /* 531 * Allocates a new file of type 'type' and adds it to the parent directory 532 * 'dvp'; this addition is done using the component name given in 'cnp'. 533 * The ownership of the new file is automatically assigned based on the 534 * credentials of the caller (through 'cnp'), the group is set based on 535 * the parent directory and the mode is determined from the 'vap' argument. 536 * If successful, *vpp holds a vnode to the newly created file and zero 537 * is returned. Otherwise *vpp is NULL and the function returns an 538 * appropriate error code. 539 */ 540 int 541 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 542 struct namecache *ncp, struct ucred *cred, char *target) 543 { 544 int error; 545 struct tmpfs_dirent *de; 546 struct tmpfs_mount *tmp; 547 struct tmpfs_node *dnode; 548 struct tmpfs_node *node; 549 struct tmpfs_node *parent; 550 551 tmp = VFS_TO_TMPFS(dvp->v_mount); 552 dnode = VP_TO_TMPFS_DIR(dvp); 553 *vpp = NULL; 554 555 /* If the entry we are creating is a directory, we cannot overflow 556 * the number of links of its parent, because it will get a new 557 * link. */ 558 if (vap->va_type == VDIR) { 559 /* Ensure that we do not overflow the maximum number of links 560 * imposed by the system. */ 561 KKASSERT(dnode->tn_links <= LINK_MAX); 562 if (dnode->tn_links == LINK_MAX) { 563 return EMLINK; 564 } 565 566 parent = dnode; 567 KKASSERT(parent != NULL); 568 } else 569 parent = NULL; 570 571 /* Allocate a node that represents the new file. */ 572 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 573 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rmajor, vap->va_rminor, &node); 574 if (error != 0) 575 return error; 576 TMPFS_NODE_LOCK(node); 577 578 /* Allocate a directory entry that points to the new file. */ 579 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 580 if (error != 0) { 581 tmpfs_free_node(tmp, node); 582 /* eats node lock */ 583 return error; 584 } 585 586 /* Allocate a vnode for the new file. */ 587 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 588 if (error != 0) { 589 tmpfs_free_dirent(tmp, de); 590 tmpfs_free_node(tmp, node); 591 /* eats node lock */ 592 return error; 593 } 594 595 /* Now that all required items are allocated, we can proceed to 596 * insert the new node into the directory, an operation that 597 * cannot fail. */ 598 tmpfs_dir_attach(dnode, de); 599 TMPFS_NODE_UNLOCK(node); 600 601 return error; 602 } 603 604 /* --------------------------------------------------------------------- */ 605 606 /* 607 * Attaches the directory entry de to the directory represented by vp. 608 * Note that this does not change the link count of the node pointed by 609 * the directory entry, as this is done by tmpfs_alloc_dirent. 610 */ 611 void 612 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 613 { 614 TMPFS_NODE_LOCK(dnode); 615 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 616 617 TMPFS_ASSERT_ELOCKED(dnode); 618 dnode->tn_size += sizeof(struct tmpfs_dirent); 619 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 620 TMPFS_NODE_MODIFIED; 621 TMPFS_NODE_UNLOCK(dnode); 622 } 623 624 /* --------------------------------------------------------------------- */ 625 626 /* 627 * Detaches the directory entry de from the directory represented by vp. 628 * Note that this does not change the link count of the node pointed by 629 * the directory entry, as this is done by tmpfs_free_dirent. 630 */ 631 void 632 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 633 { 634 TMPFS_NODE_LOCK(dnode); 635 if (dnode->tn_dir.tn_readdir_lastp == de) { 636 dnode->tn_dir.tn_readdir_lastn = 0; 637 dnode->tn_dir.tn_readdir_lastp = NULL; 638 } 639 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 640 641 TMPFS_ASSERT_ELOCKED(dnode); 642 dnode->tn_size -= sizeof(struct tmpfs_dirent); 643 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 644 TMPFS_NODE_MODIFIED; 645 TMPFS_NODE_UNLOCK(dnode); 646 } 647 648 /* --------------------------------------------------------------------- */ 649 650 /* 651 * Looks for a directory entry in the directory represented by node. 652 * 'ncp' describes the name of the entry to look for. Note that the . 653 * and .. components are not allowed as they do not physically exist 654 * within directories. 655 * 656 * Returns a pointer to the entry when found, otherwise NULL. 657 */ 658 struct tmpfs_dirent * 659 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 660 struct namecache *ncp) 661 { 662 struct tmpfs_dirent *de; 663 int len = ncp->nc_nlen; 664 665 TMPFS_VALIDATE_DIR(node); 666 667 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 668 if (f != NULL && de->td_node != f) 669 continue; 670 if (len == de->td_namelen) { 671 if (!memcmp(ncp->nc_name, de->td_name, len)) 672 break; 673 } 674 } 675 676 TMPFS_NODE_LOCK(node); 677 node->tn_status |= TMPFS_NODE_ACCESSED; 678 TMPFS_NODE_UNLOCK(node); 679 680 return de; 681 } 682 683 /* --------------------------------------------------------------------- */ 684 685 /* 686 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 687 * directory and returns it in the uio space. The function returns 0 688 * on success, -1 if there was not enough space in the uio structure to 689 * hold the directory entry or an appropriate error code if another 690 * error happens. 691 */ 692 int 693 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 694 { 695 int error; 696 struct dirent dent; 697 int dirsize; 698 699 TMPFS_VALIDATE_DIR(node); 700 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 701 702 dent.d_ino = node->tn_id; 703 dent.d_type = DT_DIR; 704 dent.d_namlen = 1; 705 dent.d_name[0] = '.'; 706 dent.d_name[1] = '\0'; 707 dirsize = _DIRENT_DIRSIZ(&dent); 708 709 if (dirsize > uio->uio_resid) 710 error = -1; 711 else { 712 error = uiomove((caddr_t)&dent, dirsize, uio); 713 if (error == 0) 714 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 715 } 716 717 TMPFS_NODE_LOCK(node); 718 node->tn_status |= TMPFS_NODE_ACCESSED; 719 TMPFS_NODE_UNLOCK(node); 720 721 return error; 722 } 723 724 /* --------------------------------------------------------------------- */ 725 726 /* 727 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 728 * directory and returns it in the uio space. The function returns 0 729 * on success, -1 if there was not enough space in the uio structure to 730 * hold the directory entry or an appropriate error code if another 731 * error happens. 732 */ 733 int 734 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 735 struct uio *uio) 736 { 737 int error; 738 struct dirent dent; 739 int dirsize; 740 741 TMPFS_VALIDATE_DIR(node); 742 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 743 744 if (node->tn_dir.tn_parent) { 745 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 746 dent.d_ino = node->tn_dir.tn_parent->tn_id; 747 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 748 } else { 749 dent.d_ino = tmp->tm_root->tn_id; 750 } 751 752 dent.d_type = DT_DIR; 753 dent.d_namlen = 2; 754 dent.d_name[0] = '.'; 755 dent.d_name[1] = '.'; 756 dent.d_name[2] = '\0'; 757 dirsize = _DIRENT_DIRSIZ(&dent); 758 759 if (dirsize > uio->uio_resid) 760 error = -1; 761 else { 762 error = uiomove((caddr_t)&dent, dirsize, uio); 763 if (error == 0) { 764 struct tmpfs_dirent *de; 765 766 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 767 if (de == NULL) 768 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 769 else 770 uio->uio_offset = tmpfs_dircookie(de); 771 } 772 } 773 774 TMPFS_NODE_LOCK(node); 775 node->tn_status |= TMPFS_NODE_ACCESSED; 776 TMPFS_NODE_UNLOCK(node); 777 778 return error; 779 } 780 781 /* --------------------------------------------------------------------- */ 782 783 /* 784 * Lookup a directory entry by its associated cookie. 785 */ 786 struct tmpfs_dirent * 787 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 788 { 789 struct tmpfs_dirent *de; 790 791 if (cookie == node->tn_dir.tn_readdir_lastn && 792 node->tn_dir.tn_readdir_lastp != NULL) { 793 return node->tn_dir.tn_readdir_lastp; 794 } 795 796 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 797 if (tmpfs_dircookie(de) == cookie) { 798 break; 799 } 800 } 801 802 return de; 803 } 804 805 /* --------------------------------------------------------------------- */ 806 807 /* 808 * Helper function for tmpfs_readdir. Returns as much directory entries 809 * as can fit in the uio space. The read starts at uio->uio_offset. 810 * The function returns 0 on success, -1 if there was not enough space 811 * in the uio structure to hold the directory entry or an appropriate 812 * error code if another error happens. 813 */ 814 int 815 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 816 { 817 int error; 818 off_t startcookie; 819 struct tmpfs_dirent *de; 820 821 TMPFS_VALIDATE_DIR(node); 822 823 /* Locate the first directory entry we have to return. We have cached 824 * the last readdir in the node, so use those values if appropriate. 825 * Otherwise do a linear scan to find the requested entry. */ 826 startcookie = uio->uio_offset; 827 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 828 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 829 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 830 return 0; 831 } else { 832 de = tmpfs_dir_lookupbycookie(node, startcookie); 833 } 834 if (de == NULL) { 835 return EINVAL; 836 } 837 838 /* Read as much entries as possible; i.e., until we reach the end of 839 * the directory or we exhaust uio space. */ 840 do { 841 struct dirent d; 842 int reclen; 843 844 /* Create a dirent structure representing the current 845 * tmpfs_node and fill it. */ 846 d.d_ino = de->td_node->tn_id; 847 switch (de->td_node->tn_type) { 848 case VBLK: 849 d.d_type = DT_BLK; 850 break; 851 852 case VCHR: 853 d.d_type = DT_CHR; 854 break; 855 856 case VDIR: 857 d.d_type = DT_DIR; 858 break; 859 860 case VFIFO: 861 d.d_type = DT_FIFO; 862 break; 863 864 case VLNK: 865 d.d_type = DT_LNK; 866 break; 867 868 case VREG: 869 d.d_type = DT_REG; 870 break; 871 872 case VSOCK: 873 d.d_type = DT_SOCK; 874 break; 875 876 default: 877 panic("tmpfs_dir_getdents: type %p %d", 878 de->td_node, (int)de->td_node->tn_type); 879 } 880 d.d_namlen = de->td_namelen; 881 KKASSERT(de->td_namelen < sizeof(d.d_name)); 882 bcopy(de->td_name, d.d_name, d.d_namlen); 883 d.d_name[d.d_namlen] = '\0'; 884 reclen = _DIRENT_RECLEN(d.d_namlen); 885 886 /* Stop reading if the directory entry we are treating is 887 * bigger than the amount of data that can be returned. */ 888 if (reclen > uio->uio_resid) { 889 error = -1; 890 break; 891 } 892 893 /* Copy the new dirent structure into the output buffer and 894 * advance pointers. */ 895 error = uiomove((caddr_t)&d, reclen, uio); 896 897 (*cntp)++; 898 de = TAILQ_NEXT(de, td_entries); 899 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 900 901 /* Update the offset and cache. */ 902 if (de == NULL) { 903 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 904 node->tn_dir.tn_readdir_lastn = 0; 905 node->tn_dir.tn_readdir_lastp = NULL; 906 } else { 907 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 908 node->tn_dir.tn_readdir_lastp = de; 909 } 910 node->tn_status |= TMPFS_NODE_ACCESSED; 911 912 return error; 913 } 914 915 /* --------------------------------------------------------------------- */ 916 917 /* 918 * Resizes the aobj associated to the regular file pointed to by vp to 919 * the size newsize. 'vp' must point to a vnode that represents a regular 920 * file. 'newsize' must be positive. 921 * 922 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 923 * to be zero filled. 924 * 925 * Returns zero on success or an appropriate error code on failure. 926 */ 927 int 928 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 929 { 930 int error; 931 vm_pindex_t newpages, oldpages; 932 struct tmpfs_mount *tmp; 933 struct tmpfs_node *node; 934 off_t oldsize; 935 936 #ifdef INVARIANTS 937 KKASSERT(vp->v_type == VREG); 938 KKASSERT(newsize >= 0); 939 #endif 940 941 node = VP_TO_TMPFS_NODE(vp); 942 tmp = VFS_TO_TMPFS(vp->v_mount); 943 944 /* Convert the old and new sizes to the number of pages needed to 945 * store them. It may happen that we do not need to do anything 946 * because the last allocated page can accommodate the change on 947 * its own. */ 948 oldsize = node->tn_size; 949 oldpages = round_page64(oldsize) / PAGE_SIZE; 950 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 951 newpages = round_page64(newsize) / PAGE_SIZE; 952 953 if (newpages > oldpages && 954 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 955 error = ENOSPC; 956 goto out; 957 } 958 959 TMPFS_LOCK(tmp); 960 tmp->tm_pages_used += (newpages - oldpages); 961 TMPFS_UNLOCK(tmp); 962 963 TMPFS_NODE_LOCK(node); 964 node->tn_reg.tn_aobj_pages = newpages; 965 node->tn_size = newsize; 966 TMPFS_NODE_UNLOCK(node); 967 968 /* 969 * When adjusting the vnode filesize and its VM object we must 970 * also adjust our backing VM object (aobj). The blocksize 971 * used must match the block sized we use for the buffer cache. 972 * 973 * The backing VM object contains no VM pages, only swap 974 * assignments. 975 */ 976 if (newsize < oldsize) { 977 vm_pindex_t osize; 978 vm_pindex_t nsize; 979 vm_object_t aobj; 980 981 error = nvtruncbuf(vp, newsize, BSIZE, -1); 982 aobj = node->tn_reg.tn_aobj; 983 if (aobj) { 984 osize = aobj->size; 985 nsize = vp->v_object->size; 986 if (nsize < osize) { 987 aobj->size = osize; 988 swap_pager_freespace(aobj, nsize, 989 osize - nsize); 990 } 991 } 992 } else { 993 vm_object_t aobj; 994 995 error = nvextendbuf(vp, oldsize, newsize, BSIZE, BSIZE, 996 -1, -1, trivial); 997 aobj = node->tn_reg.tn_aobj; 998 if (aobj) 999 aobj->size = vp->v_object->size; 1000 } 1001 1002 out: 1003 return error; 1004 } 1005 1006 /* --------------------------------------------------------------------- */ 1007 1008 /* 1009 * Change flags of the given vnode. 1010 * Caller should execute tmpfs_update on vp after a successful execution. 1011 * The vnode must be locked on entry and remain locked on exit. 1012 */ 1013 int 1014 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1015 { 1016 int error; 1017 struct tmpfs_node *node; 1018 int flags; 1019 1020 KKASSERT(vn_islocked(vp)); 1021 1022 node = VP_TO_TMPFS_NODE(vp); 1023 flags = node->tn_flags; 1024 1025 /* Disallow this operation if the file system is mounted read-only. */ 1026 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1027 return EROFS; 1028 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1029 1030 /* 1031 * Unprivileged processes are not permitted to unset system 1032 * flags, or modify flags if any system flags are set. 1033 * 1034 * Silently enforce SF_NOCACHE on the root tmpfs vnode so 1035 * tmpfs data is not double-cached by swapcache. 1036 */ 1037 if (error == 0) { 1038 TMPFS_NODE_LOCK(node); 1039 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1040 if (vp->v_flag & VROOT) 1041 flags |= SF_NOCACHE; 1042 node->tn_flags = flags; 1043 } else { 1044 if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | 1045 SF_APPEND) || 1046 (flags & UF_SETTABLE) != flags) { 1047 error = EPERM; 1048 } else { 1049 node->tn_flags &= SF_SETTABLE; 1050 node->tn_flags |= (flags & UF_SETTABLE); 1051 } 1052 } 1053 node->tn_status |= TMPFS_NODE_CHANGED; 1054 TMPFS_NODE_UNLOCK(node); 1055 } 1056 1057 KKASSERT(vn_islocked(vp)); 1058 1059 return error; 1060 } 1061 1062 /* --------------------------------------------------------------------- */ 1063 1064 /* 1065 * Change access mode on the given vnode. 1066 * Caller should execute tmpfs_update on vp after a successful execution. 1067 * The vnode must be locked on entry and remain locked on exit. 1068 */ 1069 int 1070 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1071 { 1072 struct tmpfs_node *node; 1073 mode_t cur_mode; 1074 int error; 1075 1076 KKASSERT(vn_islocked(vp)); 1077 1078 node = VP_TO_TMPFS_NODE(vp); 1079 1080 /* Disallow this operation if the file system is mounted read-only. */ 1081 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1082 return EROFS; 1083 1084 /* Immutable or append-only files cannot be modified, either. */ 1085 if (node->tn_flags & (IMMUTABLE | APPEND)) 1086 return EPERM; 1087 1088 cur_mode = node->tn_mode; 1089 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1090 &cur_mode); 1091 1092 if (error == 0 && 1093 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1094 TMPFS_NODE_LOCK(node); 1095 node->tn_mode &= ~ALLPERMS; 1096 node->tn_mode |= cur_mode & ALLPERMS; 1097 1098 node->tn_status |= TMPFS_NODE_CHANGED; 1099 TMPFS_NODE_UNLOCK(node); 1100 } 1101 1102 KKASSERT(vn_islocked(vp)); 1103 1104 return 0; 1105 } 1106 1107 /* --------------------------------------------------------------------- */ 1108 1109 /* 1110 * Change ownership of the given vnode. At least one of uid or gid must 1111 * be different than VNOVAL. If one is set to that value, the attribute 1112 * is unchanged. 1113 * Caller should execute tmpfs_update on vp after a successful execution. 1114 * The vnode must be locked on entry and remain locked on exit. 1115 */ 1116 int 1117 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1118 { 1119 mode_t cur_mode; 1120 uid_t cur_uid; 1121 gid_t cur_gid; 1122 struct tmpfs_node *node; 1123 int error; 1124 1125 KKASSERT(vn_islocked(vp)); 1126 node = VP_TO_TMPFS_NODE(vp); 1127 1128 /* Disallow this operation if the file system is mounted read-only. */ 1129 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1130 return EROFS; 1131 1132 /* Immutable or append-only files cannot be modified, either. */ 1133 if (node->tn_flags & (IMMUTABLE | APPEND)) 1134 return EPERM; 1135 1136 cur_uid = node->tn_uid; 1137 cur_gid = node->tn_gid; 1138 cur_mode = node->tn_mode; 1139 error = vop_helper_chown(vp, uid, gid, cred, 1140 &cur_uid, &cur_gid, &cur_mode); 1141 1142 if (error == 0) { 1143 TMPFS_NODE_LOCK(node); 1144 if (cur_uid != node->tn_uid || 1145 cur_gid != node->tn_gid || 1146 cur_mode != node->tn_mode) { 1147 node->tn_uid = cur_uid; 1148 node->tn_gid = cur_gid; 1149 node->tn_mode = cur_mode; 1150 node->tn_status |= TMPFS_NODE_CHANGED; 1151 } 1152 TMPFS_NODE_UNLOCK(node); 1153 } 1154 1155 return error; 1156 } 1157 1158 /* --------------------------------------------------------------------- */ 1159 1160 /* 1161 * Change size of the given vnode. 1162 * Caller should execute tmpfs_update on vp after a successful execution. 1163 * The vnode must be locked on entry and remain locked on exit. 1164 */ 1165 int 1166 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1167 { 1168 int error; 1169 struct tmpfs_node *node; 1170 1171 KKASSERT(vn_islocked(vp)); 1172 1173 node = VP_TO_TMPFS_NODE(vp); 1174 1175 /* Decide whether this is a valid operation based on the file type. */ 1176 error = 0; 1177 switch (vp->v_type) { 1178 case VDIR: 1179 return EISDIR; 1180 1181 case VREG: 1182 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1183 return EROFS; 1184 break; 1185 1186 case VBLK: 1187 /* FALLTHROUGH */ 1188 case VCHR: 1189 /* FALLTHROUGH */ 1190 case VFIFO: 1191 /* Allow modifications of special files even if in the file 1192 * system is mounted read-only (we are not modifying the 1193 * files themselves, but the objects they represent). */ 1194 return 0; 1195 1196 default: 1197 /* Anything else is unsupported. */ 1198 return EOPNOTSUPP; 1199 } 1200 1201 /* Immutable or append-only files cannot be modified, either. */ 1202 if (node->tn_flags & (IMMUTABLE | APPEND)) 1203 return EPERM; 1204 1205 error = tmpfs_truncate(vp, size); 1206 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1207 * for us, as will update tn_status; no need to do that here. */ 1208 1209 KKASSERT(vn_islocked(vp)); 1210 1211 return error; 1212 } 1213 1214 /* --------------------------------------------------------------------- */ 1215 1216 /* 1217 * Change access and modification times of the given vnode. 1218 * Caller should execute tmpfs_update on vp after a successful execution. 1219 * The vnode must be locked on entry and remain locked on exit. 1220 */ 1221 int 1222 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1223 int vaflags, struct ucred *cred) 1224 { 1225 struct tmpfs_node *node; 1226 1227 KKASSERT(vn_islocked(vp)); 1228 1229 node = VP_TO_TMPFS_NODE(vp); 1230 1231 /* Disallow this operation if the file system is mounted read-only. */ 1232 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1233 return EROFS; 1234 1235 /* Immutable or append-only files cannot be modified, either. */ 1236 if (node->tn_flags & (IMMUTABLE | APPEND)) 1237 return EPERM; 1238 1239 TMPFS_NODE_LOCK(node); 1240 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1241 node->tn_status |= TMPFS_NODE_ACCESSED; 1242 1243 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1244 node->tn_status |= TMPFS_NODE_MODIFIED; 1245 1246 TMPFS_NODE_UNLOCK(node); 1247 1248 tmpfs_itimes(vp, atime, mtime); 1249 1250 KKASSERT(vn_islocked(vp)); 1251 1252 return 0; 1253 } 1254 1255 /* --------------------------------------------------------------------- */ 1256 /* Sync timestamps */ 1257 void 1258 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1259 const struct timespec *mod) 1260 { 1261 struct tmpfs_node *node; 1262 struct timespec now; 1263 1264 node = VP_TO_TMPFS_NODE(vp); 1265 1266 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1267 TMPFS_NODE_CHANGED)) == 0) 1268 return; 1269 1270 vfs_timestamp(&now); 1271 1272 TMPFS_NODE_LOCK(node); 1273 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1274 if (acc == NULL) 1275 acc = &now; 1276 node->tn_atime = acc->tv_sec; 1277 node->tn_atimensec = acc->tv_nsec; 1278 } 1279 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1280 if (mod == NULL) 1281 mod = &now; 1282 node->tn_mtime = mod->tv_sec; 1283 node->tn_mtimensec = mod->tv_nsec; 1284 } 1285 if (node->tn_status & TMPFS_NODE_CHANGED) { 1286 node->tn_ctime = now.tv_sec; 1287 node->tn_ctimensec = now.tv_nsec; 1288 } 1289 node->tn_status &= 1290 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1291 TMPFS_NODE_UNLOCK(node); 1292 } 1293 1294 /* --------------------------------------------------------------------- */ 1295 1296 void 1297 tmpfs_update(struct vnode *vp) 1298 { 1299 1300 tmpfs_itimes(vp, NULL, NULL); 1301 } 1302 1303 /* --------------------------------------------------------------------- */ 1304 1305 int 1306 tmpfs_truncate(struct vnode *vp, off_t length) 1307 { 1308 int error; 1309 struct tmpfs_node *node; 1310 1311 node = VP_TO_TMPFS_NODE(vp); 1312 1313 if (length < 0) { 1314 error = EINVAL; 1315 goto out; 1316 } 1317 1318 if (node->tn_size == length) { 1319 error = 0; 1320 goto out; 1321 } 1322 1323 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1324 return (EFBIG); 1325 1326 1327 error = tmpfs_reg_resize(vp, length, 1); 1328 1329 if (error == 0) { 1330 TMPFS_NODE_LOCK(node); 1331 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1332 TMPFS_NODE_UNLOCK(node); 1333 } 1334 1335 out: 1336 tmpfs_update(vp); 1337 1338 return error; 1339 } 1340 1341 /* --------------------------------------------------------------------- */ 1342 1343 static ino_t 1344 tmpfs_fetch_ino(void) 1345 { 1346 ino_t ret; 1347 1348 spin_lock(&ino_lock); 1349 ret = t_ino++; 1350 spin_unlock(&ino_lock); 1351 1352 return ret; 1353 } 1354