1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 38 #include <sys/kernel.h> 39 #include <sys/param.h> 40 #include <sys/namei.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/spinlock2.h> 44 #include <sys/stat.h> 45 #include <sys/systm.h> 46 #include <sys/vnode.h> 47 #include <sys/vmmeter.h> 48 49 #include <sys/mplock2.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/vm_pager.h> 55 #include <vm/vm_extern.h> 56 57 #include <vfs/tmpfs/tmpfs.h> 58 #include <vfs/tmpfs/tmpfs_fifoops.h> 59 #include <vfs/tmpfs/tmpfs_vnops.h> 60 61 static ino_t t_ino = 2; 62 static struct spinlock ino_lock; 63 static ino_t tmpfs_fetch_ino(void); 64 65 /* --------------------------------------------------------------------- */ 66 67 /* 68 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 69 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 70 * using the credentials of the process 'p'. 71 * 72 * If the node type is set to 'VDIR', then the parent parameter must point 73 * to the parent directory of the node being created. It may only be NULL 74 * while allocating the root node. 75 * 76 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 77 * specifies the device the node represents. 78 * 79 * If the node type is set to 'VLNK', then the parameter target specifies 80 * the file name of the target file for the symbolic link that is being 81 * created. 82 * 83 * Note that new nodes are retrieved from the available list if it has 84 * items or, if it is empty, from the node pool as long as there is enough 85 * space to create them. 86 * 87 * Returns zero on success or an appropriate error code on failure. 88 */ 89 int 90 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 91 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 92 char *target, int rmajor, int rminor, struct tmpfs_node **node) 93 { 94 struct tmpfs_node *nnode; 95 struct timespec ts; 96 udev_t rdev; 97 98 /* If the root directory of the 'tmp' file system is not yet 99 * allocated, this must be the request to do it. */ 100 KKASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 101 102 KKASSERT(IFF(type == VLNK, target != NULL)); 103 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 104 105 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 106 return (ENOSPC); 107 108 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 109 if (nnode == NULL) 110 return (ENOSPC); 111 112 /* Generic initialization. */ 113 nnode->tn_type = type; 114 vfs_timestamp(&ts); 115 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 116 = ts.tv_sec; 117 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 118 = ts.tv_nsec; 119 nnode->tn_uid = uid; 120 nnode->tn_gid = gid; 121 nnode->tn_mode = mode; 122 nnode->tn_id = tmpfs_fetch_ino(); 123 nnode->tn_advlock.init_done = 0; 124 125 /* Type-specific initialization. */ 126 switch (nnode->tn_type) { 127 case VBLK: 128 case VCHR: 129 rdev = makeudev(rmajor, rminor); 130 if (rdev == NOUDEV) { 131 objcache_put(tmp->tm_node_pool, nnode); 132 return(EINVAL); 133 } 134 nnode->tn_rdev = rdev; 135 break; 136 137 case VDIR: 138 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 139 KKASSERT(parent != nnode); 140 KKASSERT(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 141 nnode->tn_dir.tn_parent = parent; 142 nnode->tn_dir.tn_readdir_lastn = 0; 143 nnode->tn_dir.tn_readdir_lastp = NULL; 144 nnode->tn_links++; 145 nnode->tn_size = 0; 146 if (parent) { 147 TMPFS_NODE_LOCK(parent); 148 parent->tn_links++; 149 TMPFS_NODE_UNLOCK(parent); 150 } 151 break; 152 153 case VFIFO: 154 /* FALLTHROUGH */ 155 case VSOCK: 156 break; 157 158 case VLNK: 159 nnode->tn_size = strlen(target); 160 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 161 M_WAITOK | M_NULLOK); 162 if (nnode->tn_link == NULL) { 163 objcache_put(tmp->tm_node_pool, nnode); 164 return (ENOSPC); 165 } 166 bcopy(target, nnode->tn_link, nnode->tn_size); 167 nnode->tn_link[nnode->tn_size] = '\0'; 168 break; 169 170 case VREG: 171 nnode->tn_reg.tn_aobj = 172 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 173 nnode->tn_reg.tn_aobj_pages = 0; 174 nnode->tn_size = 0; 175 break; 176 177 default: 178 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 179 } 180 181 TMPFS_NODE_LOCK(nnode); 182 TMPFS_LOCK(tmp); 183 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 184 tmp->tm_nodes_inuse++; 185 TMPFS_UNLOCK(tmp); 186 TMPFS_NODE_UNLOCK(nnode); 187 188 *node = nnode; 189 return 0; 190 } 191 192 /* --------------------------------------------------------------------- */ 193 194 /* 195 * Destroys the node pointed to by node from the file system 'tmp'. 196 * If the node does not belong to the given mount point, the results are 197 * unpredicted. 198 * 199 * If the node references a directory; no entries are allowed because 200 * their removal could need a recursive algorithm, something forbidden in 201 * kernel space. Furthermore, there is not need to provide such 202 * functionality (recursive removal) because the only primitives offered 203 * to the user are the removal of empty directories and the deletion of 204 * individual files. 205 * 206 * Note that nodes are not really deleted; in fact, when a node has been 207 * allocated, it cannot be deleted during the whole life of the file 208 * system. Instead, they are moved to the available list and remain there 209 * until reused. 210 */ 211 void 212 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 213 { 214 vm_pindex_t pages = 0; 215 216 #ifdef INVARIANTS 217 TMPFS_ASSERT_ELOCKED(node); 218 KKASSERT(node->tn_vnode == NULL); 219 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 220 #endif 221 222 TMPFS_LOCK(tmp); 223 LIST_REMOVE(node, tn_entries); 224 tmp->tm_nodes_inuse--; 225 TMPFS_UNLOCK(tmp); 226 TMPFS_NODE_UNLOCK(node); 227 228 switch (node->tn_type) { 229 case VNON: 230 /* Do not do anything. VNON is provided to let the 231 * allocation routine clean itself easily by avoiding 232 * duplicating code in it. */ 233 /* FALLTHROUGH */ 234 case VBLK: 235 /* FALLTHROUGH */ 236 case VCHR: 237 /* FALLTHROUGH */ 238 break; 239 case VDIR: 240 /* 241 * The parent link can be NULL if this is the root 242 * node. 243 */ 244 node->tn_links--; 245 node->tn_size = 0; 246 KKASSERT(node->tn_dir.tn_parent || node == tmp->tm_root); 247 if (node->tn_dir.tn_parent) { 248 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 249 node->tn_dir.tn_parent->tn_links--; 250 251 /* 252 * If the parent directory has no more links and 253 * no vnode ref nothing is going to come along 254 * and clean it up unless we do it here. 255 */ 256 if (node->tn_dir.tn_parent->tn_links == 0 && 257 node->tn_dir.tn_parent->tn_vnode == NULL) { 258 tmpfs_free_node(tmp, node->tn_dir.tn_parent); 259 /* eats parent lock */ 260 } else { 261 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 262 } 263 node->tn_dir.tn_parent = NULL; 264 } 265 266 /* 267 * If the root node is being destroyed don't leave a 268 * dangling pointer in tmpfs_mount. 269 */ 270 if (node == tmp->tm_root) 271 tmp->tm_root = NULL; 272 break; 273 case VFIFO: 274 /* FALLTHROUGH */ 275 case VSOCK: 276 break; 277 278 case VLNK: 279 kfree(node->tn_link, tmp->tm_name_zone); 280 node->tn_link = NULL; 281 node->tn_size = 0; 282 break; 283 284 case VREG: 285 if (node->tn_reg.tn_aobj != NULL) 286 vm_object_deallocate(node->tn_reg.tn_aobj); 287 node->tn_reg.tn_aobj = NULL; 288 pages = node->tn_reg.tn_aobj_pages; 289 break; 290 291 default: 292 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 293 } 294 295 /* 296 * Clean up fields for the next allocation. The objcache only ctors 297 * new allocations. 298 */ 299 tmpfs_node_ctor(node, NULL, 0); 300 objcache_put(tmp->tm_node_pool, node); 301 /* node is now invalid */ 302 303 TMPFS_LOCK(tmp); 304 tmp->tm_pages_used -= pages; 305 TMPFS_UNLOCK(tmp); 306 } 307 308 /* --------------------------------------------------------------------- */ 309 310 /* 311 * Allocates a new directory entry for the node node with a name of name. 312 * The new directory entry is returned in *de. 313 * 314 * The link count of node is increased by one to reflect the new object 315 * referencing it. 316 * 317 * Returns zero on success or an appropriate error code on failure. 318 */ 319 int 320 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 321 const char *name, uint16_t len, struct tmpfs_dirent **de) 322 { 323 struct tmpfs_dirent *nde; 324 325 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 326 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 327 if (nde->td_name == NULL) { 328 objcache_put(tmp->tm_dirent_pool, nde); 329 *de = NULL; 330 return (ENOSPC); 331 } 332 nde->td_namelen = len; 333 bcopy(name, nde->td_name, len); 334 nde->td_name[len] = '\0'; 335 336 nde->td_node = node; 337 338 TMPFS_NODE_LOCK(node); 339 node->tn_links++; 340 TMPFS_NODE_UNLOCK(node); 341 342 *de = nde; 343 344 return 0; 345 } 346 347 /* --------------------------------------------------------------------- */ 348 349 /* 350 * Frees a directory entry. It is the caller's responsibility to destroy 351 * the node referenced by it if needed. 352 * 353 * The link count of node is decreased by one to reflect the removal of an 354 * object that referenced it. This only happens if 'node_exists' is true; 355 * otherwise the function will not access the node referred to by the 356 * directory entry, as it may already have been released from the outside. 357 */ 358 void 359 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 360 { 361 struct tmpfs_node *node; 362 363 node = de->td_node; 364 365 TMPFS_NODE_LOCK(node); 366 TMPFS_ASSERT_ELOCKED(node); 367 KKASSERT(node->tn_links > 0); 368 node->tn_links--; 369 TMPFS_NODE_UNLOCK(node); 370 371 kfree(de->td_name, tmp->tm_name_zone); 372 de->td_namelen = 0; 373 de->td_name = NULL; 374 de->td_node = NULL; 375 objcache_put(tmp->tm_dirent_pool, de); 376 } 377 378 /* --------------------------------------------------------------------- */ 379 380 /* 381 * Allocates a new vnode for the node node or returns a new reference to 382 * an existing one if the node had already a vnode referencing it. The 383 * resulting locked vnode is returned in *vpp. 384 * 385 * Returns zero on success or an appropriate error code on failure. 386 */ 387 int 388 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 389 struct vnode **vpp) 390 { 391 int error = 0; 392 struct vnode *vp; 393 394 loop: 395 /* 396 * Interlocked extraction from node. This can race many things. 397 * We have to get a soft reference on the vnode while we hold 398 * the node locked, then acquire it properly and check for races. 399 */ 400 TMPFS_NODE_LOCK(node); 401 if ((vp = node->tn_vnode) != NULL) { 402 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 403 vhold_interlocked(vp); 404 TMPFS_NODE_UNLOCK(node); 405 406 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 407 vdrop(vp); 408 goto loop; 409 } 410 if (node->tn_vnode != vp) { 411 vput(vp); 412 vdrop(vp); 413 goto loop; 414 } 415 vdrop(vp); 416 goto out; 417 } 418 /* vp is NULL */ 419 420 /* 421 * This should never happen. 422 */ 423 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 424 TMPFS_NODE_UNLOCK(node); 425 error = ENOENT; 426 goto out; 427 } 428 429 /* 430 * Interlock against other calls to tmpfs_alloc_vp() trying to 431 * allocate and assign a vp to node. 432 */ 433 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 434 node->tn_vpstate |= TMPFS_VNODE_WANT; 435 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 436 "tmpfs_alloc_vp", 0); 437 TMPFS_NODE_UNLOCK(node); 438 if (error) 439 return error; 440 goto loop; 441 } 442 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 443 TMPFS_NODE_UNLOCK(node); 444 445 /* 446 * Allocate a new vnode (may block). The ALLOCATING flag should 447 * prevent a race against someone else assigning node->tn_vnode. 448 */ 449 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 450 if (error != 0) 451 goto unlock; 452 453 KKASSERT(node->tn_vnode == NULL); 454 KKASSERT(vp != NULL); 455 vp->v_data = node; 456 vp->v_type = node->tn_type; 457 458 /* Type-specific initialization. */ 459 switch (node->tn_type) { 460 case VBLK: 461 /* FALLTHROUGH */ 462 case VCHR: 463 /* FALLTHROUGH */ 464 case VSOCK: 465 break; 466 case VREG: 467 vinitvmio(vp, node->tn_size, BMASK, -1); 468 break; 469 case VLNK: 470 break; 471 case VFIFO: 472 vp->v_ops = &mp->mnt_vn_fifo_ops; 473 break; 474 case VDIR: 475 break; 476 477 default: 478 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 479 } 480 481 insmntque(vp, mp); 482 483 unlock: 484 TMPFS_NODE_LOCK(node); 485 486 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 487 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 488 node->tn_vnode = vp; 489 490 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 491 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 492 TMPFS_NODE_UNLOCK(node); 493 wakeup(&node->tn_vpstate); 494 } else { 495 TMPFS_NODE_UNLOCK(node); 496 } 497 498 out: 499 *vpp = vp; 500 501 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 502 #ifdef INVARIANTS 503 TMPFS_NODE_LOCK(node); 504 KKASSERT(*vpp == node->tn_vnode); 505 TMPFS_NODE_UNLOCK(node); 506 #endif 507 508 return error; 509 } 510 511 /* --------------------------------------------------------------------- */ 512 513 /* 514 * Destroys the association between the vnode vp and the node it 515 * references. 516 */ 517 void 518 tmpfs_free_vp(struct vnode *vp) 519 { 520 struct tmpfs_node *node; 521 522 node = VP_TO_TMPFS_NODE(vp); 523 524 TMPFS_NODE_LOCK(node); 525 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 526 node->tn_vnode = NULL; 527 TMPFS_NODE_UNLOCK(node); 528 vp->v_data = NULL; 529 } 530 531 /* --------------------------------------------------------------------- */ 532 533 /* 534 * Allocates a new file of type 'type' and adds it to the parent directory 535 * 'dvp'; this addition is done using the component name given in 'cnp'. 536 * The ownership of the new file is automatically assigned based on the 537 * credentials of the caller (through 'cnp'), the group is set based on 538 * the parent directory and the mode is determined from the 'vap' argument. 539 * If successful, *vpp holds a vnode to the newly created file and zero 540 * is returned. Otherwise *vpp is NULL and the function returns an 541 * appropriate error code. 542 */ 543 int 544 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 545 struct namecache *ncp, struct ucred *cred, char *target) 546 { 547 int error; 548 struct tmpfs_dirent *de; 549 struct tmpfs_mount *tmp; 550 struct tmpfs_node *dnode; 551 struct tmpfs_node *node; 552 struct tmpfs_node *parent; 553 554 tmp = VFS_TO_TMPFS(dvp->v_mount); 555 dnode = VP_TO_TMPFS_DIR(dvp); 556 *vpp = NULL; 557 558 /* If the entry we are creating is a directory, we cannot overflow 559 * the number of links of its parent, because it will get a new 560 * link. */ 561 if (vap->va_type == VDIR) { 562 /* Ensure that we do not overflow the maximum number of links 563 * imposed by the system. */ 564 KKASSERT(dnode->tn_links <= LINK_MAX); 565 if (dnode->tn_links == LINK_MAX) { 566 return EMLINK; 567 } 568 569 parent = dnode; 570 KKASSERT(parent != NULL); 571 } else 572 parent = NULL; 573 574 /* Allocate a node that represents the new file. */ 575 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 576 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rmajor, vap->va_rminor, &node); 577 if (error != 0) 578 return error; 579 TMPFS_NODE_LOCK(node); 580 581 /* Allocate a directory entry that points to the new file. */ 582 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 583 if (error != 0) { 584 tmpfs_free_node(tmp, node); 585 /* eats node lock */ 586 return error; 587 } 588 589 /* Allocate a vnode for the new file. */ 590 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 591 if (error != 0) { 592 tmpfs_free_dirent(tmp, de); 593 tmpfs_free_node(tmp, node); 594 /* eats node lock */ 595 return error; 596 } 597 598 /* Now that all required items are allocated, we can proceed to 599 * insert the new node into the directory, an operation that 600 * cannot fail. */ 601 tmpfs_dir_attach(dnode, de); 602 TMPFS_NODE_UNLOCK(node); 603 604 return error; 605 } 606 607 /* --------------------------------------------------------------------- */ 608 609 /* 610 * Attaches the directory entry de to the directory represented by vp. 611 * Note that this does not change the link count of the node pointed by 612 * the directory entry, as this is done by tmpfs_alloc_dirent. 613 */ 614 void 615 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 616 { 617 TMPFS_NODE_LOCK(dnode); 618 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 619 620 TMPFS_ASSERT_ELOCKED(dnode); 621 dnode->tn_size += sizeof(struct tmpfs_dirent); 622 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 623 TMPFS_NODE_MODIFIED; 624 TMPFS_NODE_UNLOCK(dnode); 625 } 626 627 /* --------------------------------------------------------------------- */ 628 629 /* 630 * Detaches the directory entry de from the directory represented by vp. 631 * Note that this does not change the link count of the node pointed by 632 * the directory entry, as this is done by tmpfs_free_dirent. 633 */ 634 void 635 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 636 { 637 TMPFS_NODE_LOCK(dnode); 638 if (dnode->tn_dir.tn_readdir_lastp == de) { 639 dnode->tn_dir.tn_readdir_lastn = 0; 640 dnode->tn_dir.tn_readdir_lastp = NULL; 641 } 642 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 643 644 TMPFS_ASSERT_ELOCKED(dnode); 645 dnode->tn_size -= sizeof(struct tmpfs_dirent); 646 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 647 TMPFS_NODE_MODIFIED; 648 TMPFS_NODE_UNLOCK(dnode); 649 } 650 651 /* --------------------------------------------------------------------- */ 652 653 /* 654 * Looks for a directory entry in the directory represented by node. 655 * 'ncp' describes the name of the entry to look for. Note that the . 656 * and .. components are not allowed as they do not physically exist 657 * within directories. 658 * 659 * Returns a pointer to the entry when found, otherwise NULL. 660 */ 661 struct tmpfs_dirent * 662 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 663 struct namecache *ncp) 664 { 665 struct tmpfs_dirent *de; 666 int len = ncp->nc_nlen; 667 668 TMPFS_VALIDATE_DIR(node); 669 670 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 671 if (f != NULL && de->td_node != f) 672 continue; 673 if (len == de->td_namelen) { 674 if (!memcmp(ncp->nc_name, de->td_name, len)) 675 break; 676 } 677 } 678 679 TMPFS_NODE_LOCK(node); 680 node->tn_status |= TMPFS_NODE_ACCESSED; 681 TMPFS_NODE_UNLOCK(node); 682 683 return de; 684 } 685 686 /* --------------------------------------------------------------------- */ 687 688 /* 689 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 690 * directory and returns it in the uio space. The function returns 0 691 * on success, -1 if there was not enough space in the uio structure to 692 * hold the directory entry or an appropriate error code if another 693 * error happens. 694 */ 695 int 696 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 697 { 698 int error; 699 struct dirent dent; 700 int dirsize; 701 702 TMPFS_VALIDATE_DIR(node); 703 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 704 705 dent.d_ino = node->tn_id; 706 dent.d_type = DT_DIR; 707 dent.d_namlen = 1; 708 dent.d_name[0] = '.'; 709 dent.d_name[1] = '\0'; 710 dirsize = _DIRENT_DIRSIZ(&dent); 711 712 if (dirsize > uio->uio_resid) 713 error = -1; 714 else { 715 error = uiomove((caddr_t)&dent, dirsize, uio); 716 if (error == 0) 717 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 718 } 719 720 TMPFS_NODE_LOCK(node); 721 node->tn_status |= TMPFS_NODE_ACCESSED; 722 TMPFS_NODE_UNLOCK(node); 723 724 return error; 725 } 726 727 /* --------------------------------------------------------------------- */ 728 729 /* 730 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 731 * directory and returns it in the uio space. The function returns 0 732 * on success, -1 if there was not enough space in the uio structure to 733 * hold the directory entry or an appropriate error code if another 734 * error happens. 735 */ 736 int 737 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 738 struct uio *uio) 739 { 740 int error; 741 struct dirent dent; 742 int dirsize; 743 744 TMPFS_VALIDATE_DIR(node); 745 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 746 747 if (node->tn_dir.tn_parent) { 748 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 749 dent.d_ino = node->tn_dir.tn_parent->tn_id; 750 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 751 } else { 752 dent.d_ino = tmp->tm_root->tn_id; 753 } 754 755 dent.d_type = DT_DIR; 756 dent.d_namlen = 2; 757 dent.d_name[0] = '.'; 758 dent.d_name[1] = '.'; 759 dent.d_name[2] = '\0'; 760 dirsize = _DIRENT_DIRSIZ(&dent); 761 762 if (dirsize > uio->uio_resid) 763 error = -1; 764 else { 765 error = uiomove((caddr_t)&dent, dirsize, uio); 766 if (error == 0) { 767 struct tmpfs_dirent *de; 768 769 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 770 if (de == NULL) 771 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 772 else 773 uio->uio_offset = tmpfs_dircookie(de); 774 } 775 } 776 777 TMPFS_NODE_LOCK(node); 778 node->tn_status |= TMPFS_NODE_ACCESSED; 779 TMPFS_NODE_UNLOCK(node); 780 781 return error; 782 } 783 784 /* --------------------------------------------------------------------- */ 785 786 /* 787 * Lookup a directory entry by its associated cookie. 788 */ 789 struct tmpfs_dirent * 790 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 791 { 792 struct tmpfs_dirent *de; 793 794 if (cookie == node->tn_dir.tn_readdir_lastn && 795 node->tn_dir.tn_readdir_lastp != NULL) { 796 return node->tn_dir.tn_readdir_lastp; 797 } 798 799 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 800 if (tmpfs_dircookie(de) == cookie) { 801 break; 802 } 803 } 804 805 return de; 806 } 807 808 /* --------------------------------------------------------------------- */ 809 810 /* 811 * Helper function for tmpfs_readdir. Returns as much directory entries 812 * as can fit in the uio space. The read starts at uio->uio_offset. 813 * The function returns 0 on success, -1 if there was not enough space 814 * in the uio structure to hold the directory entry or an appropriate 815 * error code if another error happens. 816 */ 817 int 818 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 819 { 820 int error; 821 off_t startcookie; 822 struct tmpfs_dirent *de; 823 824 TMPFS_VALIDATE_DIR(node); 825 826 /* Locate the first directory entry we have to return. We have cached 827 * the last readdir in the node, so use those values if appropriate. 828 * Otherwise do a linear scan to find the requested entry. */ 829 startcookie = uio->uio_offset; 830 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 831 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 832 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 833 return 0; 834 } else { 835 de = tmpfs_dir_lookupbycookie(node, startcookie); 836 } 837 if (de == NULL) { 838 return EINVAL; 839 } 840 841 /* Read as much entries as possible; i.e., until we reach the end of 842 * the directory or we exhaust uio space. */ 843 do { 844 struct dirent d; 845 int reclen; 846 847 /* Create a dirent structure representing the current 848 * tmpfs_node and fill it. */ 849 d.d_ino = de->td_node->tn_id; 850 switch (de->td_node->tn_type) { 851 case VBLK: 852 d.d_type = DT_BLK; 853 break; 854 855 case VCHR: 856 d.d_type = DT_CHR; 857 break; 858 859 case VDIR: 860 d.d_type = DT_DIR; 861 break; 862 863 case VFIFO: 864 d.d_type = DT_FIFO; 865 break; 866 867 case VLNK: 868 d.d_type = DT_LNK; 869 break; 870 871 case VREG: 872 d.d_type = DT_REG; 873 break; 874 875 case VSOCK: 876 d.d_type = DT_SOCK; 877 break; 878 879 default: 880 panic("tmpfs_dir_getdents: type %p %d", 881 de->td_node, (int)de->td_node->tn_type); 882 } 883 d.d_namlen = de->td_namelen; 884 KKASSERT(de->td_namelen < sizeof(d.d_name)); 885 bcopy(de->td_name, d.d_name, d.d_namlen); 886 d.d_name[d.d_namlen] = '\0'; 887 reclen = _DIRENT_RECLEN(d.d_namlen); 888 889 /* Stop reading if the directory entry we are treating is 890 * bigger than the amount of data that can be returned. */ 891 if (reclen > uio->uio_resid) { 892 error = -1; 893 break; 894 } 895 896 /* Copy the new dirent structure into the output buffer and 897 * advance pointers. */ 898 error = uiomove((caddr_t)&d, reclen, uio); 899 900 (*cntp)++; 901 de = TAILQ_NEXT(de, td_entries); 902 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 903 904 /* Update the offset and cache. */ 905 if (de == NULL) { 906 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 907 node->tn_dir.tn_readdir_lastn = 0; 908 node->tn_dir.tn_readdir_lastp = NULL; 909 } else { 910 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 911 node->tn_dir.tn_readdir_lastp = de; 912 } 913 node->tn_status |= TMPFS_NODE_ACCESSED; 914 915 return error; 916 } 917 918 /* --------------------------------------------------------------------- */ 919 920 /* 921 * Resizes the aobj associated to the regular file pointed to by vp to 922 * the size newsize. 'vp' must point to a vnode that represents a regular 923 * file. 'newsize' must be positive. 924 * 925 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 926 * to be zero filled. 927 * 928 * Returns zero on success or an appropriate error code on failure. 929 */ 930 int 931 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 932 { 933 int error; 934 vm_pindex_t newpages, oldpages; 935 struct tmpfs_mount *tmp; 936 struct tmpfs_node *node; 937 off_t oldsize; 938 939 #ifdef INVARIANTS 940 KKASSERT(vp->v_type == VREG); 941 KKASSERT(newsize >= 0); 942 #endif 943 944 node = VP_TO_TMPFS_NODE(vp); 945 tmp = VFS_TO_TMPFS(vp->v_mount); 946 947 /* Convert the old and new sizes to the number of pages needed to 948 * store them. It may happen that we do not need to do anything 949 * because the last allocated page can accommodate the change on 950 * its own. */ 951 oldsize = node->tn_size; 952 oldpages = round_page64(oldsize) / PAGE_SIZE; 953 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 954 newpages = round_page64(newsize) / PAGE_SIZE; 955 956 if (newpages > oldpages && 957 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 958 error = ENOSPC; 959 goto out; 960 } 961 962 TMPFS_LOCK(tmp); 963 tmp->tm_pages_used += (newpages - oldpages); 964 TMPFS_UNLOCK(tmp); 965 966 TMPFS_NODE_LOCK(node); 967 node->tn_reg.tn_aobj_pages = newpages; 968 node->tn_size = newsize; 969 TMPFS_NODE_UNLOCK(node); 970 971 /* 972 * When adjusting the vnode filesize and its VM object we must 973 * also adjust our backing VM object (aobj). The blocksize 974 * used must match the block sized we use for the buffer cache. 975 * 976 * The backing VM object contains no VM pages, only swap 977 * assignments. 978 */ 979 if (newsize < oldsize) { 980 vm_pindex_t osize; 981 vm_pindex_t nsize; 982 vm_object_t aobj; 983 984 error = nvtruncbuf(vp, newsize, BSIZE, -1); 985 aobj = node->tn_reg.tn_aobj; 986 if (aobj) { 987 osize = aobj->size; 988 nsize = vp->v_object->size; 989 if (nsize < osize) { 990 aobj->size = osize; 991 swap_pager_freespace(aobj, nsize, 992 osize - nsize); 993 } 994 } 995 } else { 996 vm_object_t aobj; 997 998 error = nvextendbuf(vp, oldsize, newsize, BSIZE, BSIZE, 999 -1, -1, trivial); 1000 aobj = node->tn_reg.tn_aobj; 1001 if (aobj) 1002 aobj->size = vp->v_object->size; 1003 } 1004 1005 out: 1006 return error; 1007 } 1008 1009 /* --------------------------------------------------------------------- */ 1010 1011 /* 1012 * Change flags of the given vnode. 1013 * Caller should execute tmpfs_update on vp after a successful execution. 1014 * The vnode must be locked on entry and remain locked on exit. 1015 */ 1016 int 1017 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1018 { 1019 int error; 1020 struct tmpfs_node *node; 1021 int flags; 1022 1023 KKASSERT(vn_islocked(vp)); 1024 1025 node = VP_TO_TMPFS_NODE(vp); 1026 flags = node->tn_flags; 1027 1028 /* Disallow this operation if the file system is mounted read-only. */ 1029 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1030 return EROFS; 1031 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1032 1033 /* 1034 * Unprivileged processes are not permitted to unset system 1035 * flags, or modify flags if any system flags are set. 1036 * 1037 * Silently enforce SF_NOCACHE on the root tmpfs vnode so 1038 * tmpfs data is not double-cached by swapcache. 1039 */ 1040 if (error == 0) { 1041 TMPFS_NODE_LOCK(node); 1042 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1043 if (vp->v_flag & VROOT) 1044 flags |= SF_NOCACHE; 1045 node->tn_flags = flags; 1046 } else { 1047 if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | 1048 SF_APPEND) || 1049 (flags & UF_SETTABLE) != flags) { 1050 error = EPERM; 1051 } else { 1052 node->tn_flags &= SF_SETTABLE; 1053 node->tn_flags |= (flags & UF_SETTABLE); 1054 } 1055 } 1056 node->tn_status |= TMPFS_NODE_CHANGED; 1057 TMPFS_NODE_UNLOCK(node); 1058 } 1059 1060 KKASSERT(vn_islocked(vp)); 1061 1062 return error; 1063 } 1064 1065 /* --------------------------------------------------------------------- */ 1066 1067 /* 1068 * Change access mode on the given vnode. 1069 * Caller should execute tmpfs_update on vp after a successful execution. 1070 * The vnode must be locked on entry and remain locked on exit. 1071 */ 1072 int 1073 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1074 { 1075 struct tmpfs_node *node; 1076 mode_t cur_mode; 1077 int error; 1078 1079 KKASSERT(vn_islocked(vp)); 1080 1081 node = VP_TO_TMPFS_NODE(vp); 1082 1083 /* Disallow this operation if the file system is mounted read-only. */ 1084 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1085 return EROFS; 1086 1087 /* Immutable or append-only files cannot be modified, either. */ 1088 if (node->tn_flags & (IMMUTABLE | APPEND)) 1089 return EPERM; 1090 1091 cur_mode = node->tn_mode; 1092 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1093 &cur_mode); 1094 1095 if (error == 0 && 1096 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1097 TMPFS_NODE_LOCK(node); 1098 node->tn_mode &= ~ALLPERMS; 1099 node->tn_mode |= cur_mode & ALLPERMS; 1100 1101 node->tn_status |= TMPFS_NODE_CHANGED; 1102 TMPFS_NODE_UNLOCK(node); 1103 } 1104 1105 KKASSERT(vn_islocked(vp)); 1106 1107 return 0; 1108 } 1109 1110 /* --------------------------------------------------------------------- */ 1111 1112 /* 1113 * Change ownership of the given vnode. At least one of uid or gid must 1114 * be different than VNOVAL. If one is set to that value, the attribute 1115 * is unchanged. 1116 * Caller should execute tmpfs_update on vp after a successful execution. 1117 * The vnode must be locked on entry and remain locked on exit. 1118 */ 1119 int 1120 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1121 { 1122 mode_t cur_mode; 1123 uid_t cur_uid; 1124 gid_t cur_gid; 1125 struct tmpfs_node *node; 1126 int error; 1127 1128 KKASSERT(vn_islocked(vp)); 1129 node = VP_TO_TMPFS_NODE(vp); 1130 1131 /* Disallow this operation if the file system is mounted read-only. */ 1132 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1133 return EROFS; 1134 1135 /* Immutable or append-only files cannot be modified, either. */ 1136 if (node->tn_flags & (IMMUTABLE | APPEND)) 1137 return EPERM; 1138 1139 cur_uid = node->tn_uid; 1140 cur_gid = node->tn_gid; 1141 cur_mode = node->tn_mode; 1142 error = vop_helper_chown(vp, uid, gid, cred, 1143 &cur_uid, &cur_gid, &cur_mode); 1144 1145 if (error == 0) { 1146 TMPFS_NODE_LOCK(node); 1147 if (cur_uid != node->tn_uid || 1148 cur_gid != node->tn_gid || 1149 cur_mode != node->tn_mode) { 1150 node->tn_uid = uid; 1151 node->tn_gid = gid; 1152 node->tn_mode = cur_mode; 1153 node->tn_status |= TMPFS_NODE_CHANGED; 1154 } 1155 TMPFS_NODE_UNLOCK(node); 1156 } 1157 1158 return error; 1159 } 1160 1161 /* --------------------------------------------------------------------- */ 1162 1163 /* 1164 * Change size of the given vnode. 1165 * Caller should execute tmpfs_update on vp after a successful execution. 1166 * The vnode must be locked on entry and remain locked on exit. 1167 */ 1168 int 1169 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1170 { 1171 int error; 1172 struct tmpfs_node *node; 1173 1174 KKASSERT(vn_islocked(vp)); 1175 1176 node = VP_TO_TMPFS_NODE(vp); 1177 1178 /* Decide whether this is a valid operation based on the file type. */ 1179 error = 0; 1180 switch (vp->v_type) { 1181 case VDIR: 1182 return EISDIR; 1183 1184 case VREG: 1185 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1186 return EROFS; 1187 break; 1188 1189 case VBLK: 1190 /* FALLTHROUGH */ 1191 case VCHR: 1192 /* FALLTHROUGH */ 1193 case VFIFO: 1194 /* Allow modifications of special files even if in the file 1195 * system is mounted read-only (we are not modifying the 1196 * files themselves, but the objects they represent). */ 1197 return 0; 1198 1199 default: 1200 /* Anything else is unsupported. */ 1201 return EOPNOTSUPP; 1202 } 1203 1204 /* Immutable or append-only files cannot be modified, either. */ 1205 if (node->tn_flags & (IMMUTABLE | APPEND)) 1206 return EPERM; 1207 1208 error = tmpfs_truncate(vp, size); 1209 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1210 * for us, as will update tn_status; no need to do that here. */ 1211 1212 KKASSERT(vn_islocked(vp)); 1213 1214 return error; 1215 } 1216 1217 /* --------------------------------------------------------------------- */ 1218 1219 /* 1220 * Change access and modification times of the given vnode. 1221 * Caller should execute tmpfs_update on vp after a successful execution. 1222 * The vnode must be locked on entry and remain locked on exit. 1223 */ 1224 int 1225 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1226 int vaflags, struct ucred *cred) 1227 { 1228 struct tmpfs_node *node; 1229 1230 KKASSERT(vn_islocked(vp)); 1231 1232 node = VP_TO_TMPFS_NODE(vp); 1233 1234 /* Disallow this operation if the file system is mounted read-only. */ 1235 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1236 return EROFS; 1237 1238 /* Immutable or append-only files cannot be modified, either. */ 1239 if (node->tn_flags & (IMMUTABLE | APPEND)) 1240 return EPERM; 1241 1242 TMPFS_NODE_LOCK(node); 1243 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1244 node->tn_status |= TMPFS_NODE_ACCESSED; 1245 1246 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1247 node->tn_status |= TMPFS_NODE_MODIFIED; 1248 1249 TMPFS_NODE_UNLOCK(node); 1250 1251 tmpfs_itimes(vp, atime, mtime); 1252 1253 KKASSERT(vn_islocked(vp)); 1254 1255 return 0; 1256 } 1257 1258 /* --------------------------------------------------------------------- */ 1259 /* Sync timestamps */ 1260 void 1261 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1262 const struct timespec *mod) 1263 { 1264 struct tmpfs_node *node; 1265 struct timespec now; 1266 1267 node = VP_TO_TMPFS_NODE(vp); 1268 1269 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1270 TMPFS_NODE_CHANGED)) == 0) 1271 return; 1272 1273 vfs_timestamp(&now); 1274 1275 TMPFS_NODE_LOCK(node); 1276 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1277 if (acc == NULL) 1278 acc = &now; 1279 node->tn_atime = acc->tv_sec; 1280 node->tn_atimensec = acc->tv_nsec; 1281 } 1282 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1283 if (mod == NULL) 1284 mod = &now; 1285 node->tn_mtime = mod->tv_sec; 1286 node->tn_mtimensec = mod->tv_nsec; 1287 } 1288 if (node->tn_status & TMPFS_NODE_CHANGED) { 1289 node->tn_ctime = now.tv_sec; 1290 node->tn_ctimensec = now.tv_nsec; 1291 } 1292 node->tn_status &= 1293 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1294 TMPFS_NODE_UNLOCK(node); 1295 } 1296 1297 /* --------------------------------------------------------------------- */ 1298 1299 void 1300 tmpfs_update(struct vnode *vp) 1301 { 1302 1303 tmpfs_itimes(vp, NULL, NULL); 1304 } 1305 1306 /* --------------------------------------------------------------------- */ 1307 1308 int 1309 tmpfs_truncate(struct vnode *vp, off_t length) 1310 { 1311 int error; 1312 struct tmpfs_node *node; 1313 1314 node = VP_TO_TMPFS_NODE(vp); 1315 1316 if (length < 0) { 1317 error = EINVAL; 1318 goto out; 1319 } 1320 1321 if (node->tn_size == length) { 1322 error = 0; 1323 goto out; 1324 } 1325 1326 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1327 return (EFBIG); 1328 1329 1330 error = tmpfs_reg_resize(vp, length, 1); 1331 1332 if (error == 0) { 1333 TMPFS_NODE_LOCK(node); 1334 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1335 TMPFS_NODE_UNLOCK(node); 1336 } 1337 1338 out: 1339 tmpfs_update(vp); 1340 1341 return error; 1342 } 1343 1344 /* --------------------------------------------------------------------- */ 1345 1346 static ino_t 1347 tmpfs_fetch_ino(void) 1348 { 1349 ino_t ret; 1350 1351 spin_lock(&ino_lock); 1352 ret = t_ino++; 1353 spin_unlock(&ino_lock); 1354 1355 return ret; 1356 } 1357