1 /* $NetBSD: tmpfs_subr.c,v 1.56 2009/11/11 09:59:41 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.56 2009/11/11 09:59:41 rmind Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/dirent.h> 42 #include <sys/event.h> 43 #include <sys/kmem.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/time.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/swap.h> 50 #include <sys/vnode.h> 51 #include <sys/kauth.h> 52 #include <sys/proc.h> 53 #include <sys/atomic.h> 54 55 #include <uvm/uvm.h> 56 57 #include <miscfs/specfs/specdev.h> 58 #include <miscfs/genfs/genfs.h> 59 #include <fs/tmpfs/tmpfs.h> 60 #include <fs/tmpfs/tmpfs_fifoops.h> 61 #include <fs/tmpfs/tmpfs_specops.h> 62 #include <fs/tmpfs/tmpfs_vnops.h> 63 64 /* --------------------------------------------------------------------- */ 65 66 /* 67 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 68 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 69 * using the credentials of the process 'p'. 70 * 71 * If the node type is set to 'VDIR', then the parent parameter must point 72 * to the parent directory of the node being created. It may only be NULL 73 * while allocating the root node. 74 * 75 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 76 * specifies the device the node represents. 77 * 78 * If the node type is set to 'VLNK', then the parameter target specifies 79 * the file name of the target file for the symbolic link that is being 80 * created. 81 * 82 * Note that new nodes are retrieved from the available list if it has 83 * items or, if it is empty, from the node pool as long as there is enough 84 * space to create them. 85 * 86 * Returns zero on success or an appropriate error code on failure. 87 */ 88 int 89 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 90 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 91 char *target, dev_t rdev, struct tmpfs_node **node) 92 { 93 struct tmpfs_node *nnode; 94 95 /* If the root directory of the 'tmp' file system is not yet 96 * allocated, this must be the request to do it. */ 97 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 98 99 KASSERT(IFF(type == VLNK, target != NULL)); 100 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 101 102 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 103 104 nnode = NULL; 105 if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) { 106 atomic_dec_uint(&tmp->tm_nodes_cnt); 107 return ENOSPC; 108 } 109 110 nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); 111 if (nnode == NULL) { 112 atomic_dec_uint(&tmp->tm_nodes_cnt); 113 return ENOSPC; 114 } 115 116 /* 117 * XXX Where the pool is backed by a map larger than (4GB * 118 * sizeof(*nnode)), this may produce duplicate inode numbers 119 * for applications that do not understand 64-bit ino_t. 120 */ 121 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 122 nnode->tn_gen = arc4random(); 123 124 /* Generic initialization. */ 125 nnode->tn_type = type; 126 nnode->tn_size = 0; 127 nnode->tn_status = 0; 128 nnode->tn_flags = 0; 129 nnode->tn_links = 0; 130 131 vfs_timestamp(&nnode->tn_atime); 132 nnode->tn_birthtime = nnode->tn_atime; 133 nnode->tn_ctime = nnode->tn_atime; 134 nnode->tn_mtime = nnode->tn_atime; 135 136 nnode->tn_uid = uid; 137 nnode->tn_gid = gid; 138 nnode->tn_mode = mode; 139 nnode->tn_lockf = NULL; 140 nnode->tn_vnode = NULL; 141 142 /* Type-specific initialization. */ 143 switch (nnode->tn_type) { 144 case VBLK: 145 case VCHR: 146 nnode->tn_spec.tn_dev.tn_rdev = rdev; 147 break; 148 149 case VDIR: 150 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 151 nnode->tn_spec.tn_dir.tn_parent = 152 (parent == NULL) ? nnode : parent; 153 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 154 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 155 nnode->tn_links++; 156 break; 157 158 case VFIFO: 159 /* FALLTHROUGH */ 160 case VSOCK: 161 break; 162 163 case VLNK: 164 KASSERT(strlen(target) < MAXPATHLEN); 165 nnode->tn_size = strlen(target); 166 nnode->tn_spec.tn_lnk.tn_link = 167 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0); 168 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 169 atomic_dec_uint(&tmp->tm_nodes_cnt); 170 TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode); 171 return ENOSPC; 172 } 173 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 174 break; 175 176 case VREG: 177 nnode->tn_spec.tn_reg.tn_aobj = 178 uao_create(INT32_MAX - PAGE_SIZE, 0); 179 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 180 break; 181 182 default: 183 KASSERT(0); 184 } 185 186 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 187 188 mutex_enter(&tmp->tm_lock); 189 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 190 mutex_exit(&tmp->tm_lock); 191 192 *node = nnode; 193 return 0; 194 } 195 196 /* --------------------------------------------------------------------- */ 197 198 /* 199 * Destroys the node pointed to by node from the file system 'tmp'. 200 * If the node does not belong to the given mount point, the results are 201 * unpredicted. 202 * 203 * If the node references a directory; no entries are allowed because 204 * their removal could need a recursive algorithm, something forbidden in 205 * kernel space. Furthermore, there is not need to provide such 206 * functionality (recursive removal) because the only primitives offered 207 * to the user are the removal of empty directories and the deletion of 208 * individual files. 209 * 210 * Note that nodes are not really deleted; in fact, when a node has been 211 * allocated, it cannot be deleted during the whole life of the file 212 * system. Instead, they are moved to the available list and remain there 213 * until reused. 214 */ 215 void 216 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 217 { 218 219 if (node->tn_type == VREG) { 220 atomic_add_int(&tmp->tm_pages_used, 221 -node->tn_spec.tn_reg.tn_aobj_pages); 222 } 223 atomic_dec_uint(&tmp->tm_nodes_cnt); 224 mutex_enter(&tmp->tm_lock); 225 LIST_REMOVE(node, tn_entries); 226 mutex_exit(&tmp->tm_lock); 227 228 switch (node->tn_type) { 229 case VLNK: 230 tmpfs_str_pool_put(&tmp->tm_str_pool, 231 node->tn_spec.tn_lnk.tn_link, node->tn_size); 232 break; 233 234 case VREG: 235 if (node->tn_spec.tn_reg.tn_aobj != NULL) 236 uao_detach(node->tn_spec.tn_reg.tn_aobj); 237 break; 238 239 default: 240 break; 241 } 242 243 mutex_destroy(&node->tn_vlock); 244 TMPFS_POOL_PUT(&tmp->tm_node_pool, node); 245 } 246 247 /* --------------------------------------------------------------------- */ 248 249 /* 250 * Allocates a new directory entry for the node node with a name of name. 251 * The new directory entry is returned in *de. 252 * 253 * The link count of node is increased by one to reflect the new object 254 * referencing it. This takes care of notifying kqueue listeners about 255 * this change. 256 * 257 * Returns zero on success or an appropriate error code on failure. 258 */ 259 int 260 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 261 const char *name, uint16_t len, struct tmpfs_dirent **de) 262 { 263 struct tmpfs_dirent *nde; 264 265 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0); 266 if (nde == NULL) 267 return ENOSPC; 268 269 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0); 270 if (nde->td_name == NULL) { 271 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde); 272 return ENOSPC; 273 } 274 nde->td_namelen = len; 275 memcpy(nde->td_name, name, len); 276 nde->td_node = node; 277 278 node->tn_links++; 279 if (node->tn_links > 1 && node->tn_vnode != NULL) 280 VN_KNOTE(node->tn_vnode, NOTE_LINK); 281 *de = nde; 282 283 return 0; 284 } 285 286 /* --------------------------------------------------------------------- */ 287 288 /* 289 * Frees a directory entry. It is the caller's responsibility to destroy 290 * the node referenced by it if needed. 291 * 292 * The link count of node is decreased by one to reflect the removal of an 293 * object that referenced it. This only happens if 'node_exists' is true; 294 * otherwise the function will not access the node referred to by the 295 * directory entry, as it may already have been released from the outside. 296 * 297 * Interested parties (kqueue) are notified of the link count change; note 298 * that this can include both the node pointed to by the directory entry 299 * as well as its parent. 300 */ 301 void 302 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 303 bool node_exists) 304 { 305 if (node_exists) { 306 struct tmpfs_node *node; 307 308 node = de->td_node; 309 310 KASSERT(node->tn_links > 0); 311 node->tn_links--; 312 if (node->tn_vnode != NULL) 313 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ? 314 NOTE_DELETE : NOTE_LINK); 315 if (node->tn_type == VDIR) 316 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode, 317 NOTE_LINK); 318 } 319 320 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen); 321 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de); 322 } 323 324 /* --------------------------------------------------------------------- */ 325 326 /* 327 * Allocates a new vnode for the node node or returns a new reference to 328 * an existing one if the node had already a vnode referencing it. The 329 * resulting locked vnode is returned in *vpp. 330 * 331 * Returns zero on success or an appropriate error code on failure. 332 */ 333 int 334 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) 335 { 336 int error; 337 struct vnode *vp; 338 339 /* If there is already a vnode, then lock it. */ 340 for (;;) { 341 mutex_enter(&node->tn_vlock); 342 if ((vp = node->tn_vnode) != NULL) { 343 mutex_enter(&vp->v_interlock); 344 mutex_exit(&node->tn_vlock); 345 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK); 346 if (error == ENOENT) { 347 /* vnode was reclaimed. */ 348 continue; 349 } 350 *vpp = vp; 351 return error; 352 } 353 break; 354 } 355 356 /* Get a new vnode and associate it with our node. */ 357 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); 358 if (error != 0) { 359 mutex_exit(&node->tn_vlock); 360 return error; 361 } 362 363 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 364 if (error != 0) { 365 mutex_exit(&node->tn_vlock); 366 ungetnewvnode(vp); 367 return error; 368 } 369 370 vp->v_type = node->tn_type; 371 372 /* Type-specific initialization. */ 373 switch (node->tn_type) { 374 case VBLK: 375 /* FALLTHROUGH */ 376 case VCHR: 377 vp->v_op = tmpfs_specop_p; 378 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 379 break; 380 381 case VDIR: 382 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 383 VV_ROOT : 0; 384 break; 385 386 case VFIFO: 387 vp->v_op = tmpfs_fifoop_p; 388 break; 389 390 case VLNK: 391 /* FALLTHROUGH */ 392 case VREG: 393 /* FALLTHROUGH */ 394 case VSOCK: 395 break; 396 397 default: 398 KASSERT(0); 399 } 400 401 uvm_vnp_setsize(vp, node->tn_size); 402 vp->v_data = node; 403 node->tn_vnode = vp; 404 mutex_exit(&node->tn_vlock); 405 *vpp = vp; 406 407 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 408 KASSERT(*vpp == node->tn_vnode); 409 410 return error; 411 } 412 413 /* --------------------------------------------------------------------- */ 414 415 /* 416 * Destroys the association between the vnode vp and the node it 417 * references. 418 */ 419 void 420 tmpfs_free_vp(struct vnode *vp) 421 { 422 struct tmpfs_node *node; 423 424 node = VP_TO_TMPFS_NODE(vp); 425 426 mutex_enter(&node->tn_vlock); 427 node->tn_vnode = NULL; 428 mutex_exit(&node->tn_vlock); 429 vp->v_data = NULL; 430 } 431 432 /* --------------------------------------------------------------------- */ 433 434 /* 435 * Allocates a new file of type 'type' and adds it to the parent directory 436 * 'dvp'; this addition is done using the component name given in 'cnp'. 437 * The ownership of the new file is automatically assigned based on the 438 * credentials of the caller (through 'cnp'), the group is set based on 439 * the parent directory and the mode is determined from the 'vap' argument. 440 * If successful, *vpp holds a vnode to the newly created file and zero 441 * is returned. Otherwise *vpp is NULL and the function returns an 442 * appropriate error code. 443 */ 444 int 445 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 446 struct componentname *cnp, char *target) 447 { 448 int error; 449 struct tmpfs_dirent *de; 450 struct tmpfs_mount *tmp; 451 struct tmpfs_node *dnode; 452 struct tmpfs_node *node; 453 struct tmpfs_node *parent; 454 455 KASSERT(VOP_ISLOCKED(dvp)); 456 KASSERT(cnp->cn_flags & HASBUF); 457 458 tmp = VFS_TO_TMPFS(dvp->v_mount); 459 dnode = VP_TO_TMPFS_DIR(dvp); 460 *vpp = NULL; 461 462 /* If the entry we are creating is a directory, we cannot overflow 463 * the number of links of its parent, because it will get a new 464 * link. */ 465 if (vap->va_type == VDIR) { 466 /* Ensure that we do not overflow the maximum number of links 467 * imposed by the system. */ 468 KASSERT(dnode->tn_links <= LINK_MAX); 469 if (dnode->tn_links == LINK_MAX) { 470 error = EMLINK; 471 goto out; 472 } 473 474 parent = dnode; 475 } else 476 parent = NULL; 477 478 /* Allocate a node that represents the new file. */ 479 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 480 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 481 if (error != 0) 482 goto out; 483 484 /* Allocate a directory entry that points to the new file. */ 485 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 486 &de); 487 if (error != 0) { 488 tmpfs_free_node(tmp, node); 489 goto out; 490 } 491 492 /* Allocate a vnode for the new file. */ 493 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp); 494 if (error != 0) { 495 tmpfs_free_dirent(tmp, de, true); 496 tmpfs_free_node(tmp, node); 497 goto out; 498 } 499 500 /* Now that all required items are allocated, we can proceed to 501 * insert the new node into the directory, an operation that 502 * cannot fail. */ 503 tmpfs_dir_attach(dvp, de); 504 if (vap->va_type == VDIR) { 505 VN_KNOTE(dvp, NOTE_LINK); 506 dnode->tn_links++; 507 KASSERT(dnode->tn_links <= LINK_MAX); 508 } 509 510 out: 511 if (error != 0 || !(cnp->cn_flags & SAVESTART)) 512 PNBUF_PUT(cnp->cn_pnbuf); 513 vput(dvp); 514 515 KASSERT(IFF(error == 0, *vpp != NULL)); 516 517 return error; 518 } 519 520 /* --------------------------------------------------------------------- */ 521 522 /* 523 * Attaches the directory entry de to the directory represented by vp. 524 * Note that this does not change the link count of the node pointed by 525 * the directory entry, as this is done by tmpfs_alloc_dirent. 526 * 527 * As the "parent" directory changes, interested parties are notified of 528 * a write to it. 529 */ 530 void 531 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 532 { 533 struct tmpfs_node *dnode; 534 535 dnode = VP_TO_TMPFS_DIR(vp); 536 537 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 538 dnode->tn_size += sizeof(struct tmpfs_dirent); 539 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 540 TMPFS_NODE_MODIFIED; 541 uvm_vnp_setsize(vp, dnode->tn_size); 542 543 VN_KNOTE(vp, NOTE_WRITE); 544 } 545 546 /* --------------------------------------------------------------------- */ 547 548 /* 549 * Detaches the directory entry de from the directory represented by vp. 550 * Note that this does not change the link count of the node pointed by 551 * the directory entry, as this is done by tmpfs_free_dirent. 552 * 553 * As the "parent" directory changes, interested parties are notified of 554 * a write to it. 555 */ 556 void 557 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 558 { 559 struct tmpfs_node *dnode; 560 561 KASSERT(VOP_ISLOCKED(vp)); 562 563 dnode = VP_TO_TMPFS_DIR(vp); 564 565 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 566 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 567 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 568 } 569 570 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 571 dnode->tn_size -= sizeof(struct tmpfs_dirent); 572 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 573 TMPFS_NODE_MODIFIED; 574 uvm_vnp_setsize(vp, dnode->tn_size); 575 576 VN_KNOTE(vp, NOTE_WRITE); 577 } 578 579 /* --------------------------------------------------------------------- */ 580 581 /* 582 * Looks for a directory entry in the directory represented by node. 583 * 'cnp' describes the name of the entry to look for. Note that the . 584 * and .. components are not allowed as they do not physically exist 585 * within directories. 586 * 587 * Returns a pointer to the entry when found, otherwise NULL. 588 */ 589 struct tmpfs_dirent * 590 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 591 { 592 struct tmpfs_dirent *de; 593 594 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 595 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 596 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 597 cnp->cn_nameptr[1] == '.'))); 598 TMPFS_VALIDATE_DIR(node); 599 600 node->tn_status |= TMPFS_NODE_ACCESSED; 601 602 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 603 KASSERT(cnp->cn_namelen < 0xffff); 604 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 605 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 606 break; 607 } 608 } 609 610 return de; 611 } 612 613 /* --------------------------------------------------------------------- */ 614 615 /* 616 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 617 * directory and returns it in the uio space. The function returns 0 618 * on success, -1 if there was not enough space in the uio structure to 619 * hold the directory entry or an appropriate error code if another 620 * error happens. 621 */ 622 int 623 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 624 { 625 int error; 626 struct dirent *dentp; 627 628 TMPFS_VALIDATE_DIR(node); 629 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 630 631 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 632 633 dentp->d_fileno = node->tn_id; 634 dentp->d_type = DT_DIR; 635 dentp->d_namlen = 1; 636 dentp->d_name[0] = '.'; 637 dentp->d_name[1] = '\0'; 638 dentp->d_reclen = _DIRENT_SIZE(dentp); 639 640 if (dentp->d_reclen > uio->uio_resid) 641 error = -1; 642 else { 643 error = uiomove(dentp, dentp->d_reclen, uio); 644 if (error == 0) 645 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 646 } 647 648 node->tn_status |= TMPFS_NODE_ACCESSED; 649 650 kmem_free(dentp, sizeof(struct dirent)); 651 return error; 652 } 653 654 /* --------------------------------------------------------------------- */ 655 656 /* 657 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 658 * directory and returns it in the uio space. The function returns 0 659 * on success, -1 if there was not enough space in the uio structure to 660 * hold the directory entry or an appropriate error code if another 661 * error happens. 662 */ 663 int 664 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 665 { 666 int error; 667 struct dirent *dentp; 668 669 TMPFS_VALIDATE_DIR(node); 670 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 671 672 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 673 674 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 675 dentp->d_type = DT_DIR; 676 dentp->d_namlen = 2; 677 dentp->d_name[0] = '.'; 678 dentp->d_name[1] = '.'; 679 dentp->d_name[2] = '\0'; 680 dentp->d_reclen = _DIRENT_SIZE(dentp); 681 682 if (dentp->d_reclen > uio->uio_resid) 683 error = -1; 684 else { 685 error = uiomove(dentp, dentp->d_reclen, uio); 686 if (error == 0) { 687 struct tmpfs_dirent *de; 688 689 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 690 if (de == NULL) 691 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 692 else 693 uio->uio_offset = tmpfs_dircookie(de); 694 } 695 } 696 697 node->tn_status |= TMPFS_NODE_ACCESSED; 698 699 kmem_free(dentp, sizeof(struct dirent)); 700 return error; 701 } 702 703 /* --------------------------------------------------------------------- */ 704 705 /* 706 * Lookup a directory entry by its associated cookie. 707 */ 708 struct tmpfs_dirent * 709 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 710 { 711 struct tmpfs_dirent *de; 712 713 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 714 715 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 716 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 717 return node->tn_spec.tn_dir.tn_readdir_lastp; 718 } 719 720 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 721 if (tmpfs_dircookie(de) == cookie) { 722 break; 723 } 724 } 725 726 return de; 727 } 728 729 /* --------------------------------------------------------------------- */ 730 731 /* 732 * Helper function for tmpfs_readdir. Returns as much directory entries 733 * as can fit in the uio space. The read starts at uio->uio_offset. 734 * The function returns 0 on success, -1 if there was not enough space 735 * in the uio structure to hold the directory entry or an appropriate 736 * error code if another error happens. 737 */ 738 int 739 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 740 { 741 int error; 742 off_t startcookie; 743 struct dirent *dentp; 744 struct tmpfs_dirent *de; 745 746 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 747 TMPFS_VALIDATE_DIR(node); 748 749 /* Locate the first directory entry we have to return. We have cached 750 * the last readdir in the node, so use those values if appropriate. 751 * Otherwise do a linear scan to find the requested entry. */ 752 startcookie = uio->uio_offset; 753 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 754 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 755 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 756 return 0; 757 } else { 758 de = tmpfs_dir_lookupbycookie(node, startcookie); 759 } 760 if (de == NULL) { 761 return EINVAL; 762 } 763 764 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 765 766 /* Read as much entries as possible; i.e., until we reach the end of 767 * the directory or we exhaust uio space. */ 768 do { 769 /* Create a dirent structure representing the current 770 * tmpfs_node and fill it. */ 771 dentp->d_fileno = de->td_node->tn_id; 772 switch (de->td_node->tn_type) { 773 case VBLK: 774 dentp->d_type = DT_BLK; 775 break; 776 777 case VCHR: 778 dentp->d_type = DT_CHR; 779 break; 780 781 case VDIR: 782 dentp->d_type = DT_DIR; 783 break; 784 785 case VFIFO: 786 dentp->d_type = DT_FIFO; 787 break; 788 789 case VLNK: 790 dentp->d_type = DT_LNK; 791 break; 792 793 case VREG: 794 dentp->d_type = DT_REG; 795 break; 796 797 case VSOCK: 798 dentp->d_type = DT_SOCK; 799 break; 800 801 default: 802 KASSERT(0); 803 } 804 dentp->d_namlen = de->td_namelen; 805 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 806 (void)memcpy(dentp->d_name, de->td_name, de->td_namelen); 807 dentp->d_name[de->td_namelen] = '\0'; 808 dentp->d_reclen = _DIRENT_SIZE(dentp); 809 810 /* Stop reading if the directory entry we are treating is 811 * bigger than the amount of data that can be returned. */ 812 if (dentp->d_reclen > uio->uio_resid) { 813 error = -1; 814 break; 815 } 816 817 /* Copy the new dirent structure into the output buffer and 818 * advance pointers. */ 819 error = uiomove(dentp, dentp->d_reclen, uio); 820 821 (*cntp)++; 822 de = TAILQ_NEXT(de, td_entries); 823 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 824 825 /* Update the offset and cache. */ 826 if (de == NULL) { 827 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 828 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 829 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 830 } else { 831 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 832 tmpfs_dircookie(de); 833 node->tn_spec.tn_dir.tn_readdir_lastp = de; 834 } 835 836 node->tn_status |= TMPFS_NODE_ACCESSED; 837 838 kmem_free(dentp, sizeof(struct dirent)); 839 return error; 840 } 841 842 /* --------------------------------------------------------------------- */ 843 844 /* 845 * Resizes the aobj associated to the regular file pointed to by vp to 846 * the size newsize. 'vp' must point to a vnode that represents a regular 847 * file. 'newsize' must be positive. 848 * 849 * If the file is extended, the appropriate kevent is raised. This does 850 * not rise a write event though because resizing is not the same as 851 * writing. 852 * 853 * Returns zero on success or an appropriate error code on failure. 854 */ 855 int 856 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 857 { 858 int error; 859 unsigned int newpages, oldpages; 860 struct tmpfs_mount *tmp; 861 struct tmpfs_node *node; 862 off_t oldsize; 863 864 KASSERT(vp->v_type == VREG); 865 KASSERT(newsize >= 0); 866 867 node = VP_TO_TMPFS_NODE(vp); 868 tmp = VFS_TO_TMPFS(vp->v_mount); 869 870 /* Convert the old and new sizes to the number of pages needed to 871 * store them. It may happen that we do not need to do anything 872 * because the last allocated page can accommodate the change on 873 * its own. */ 874 oldsize = node->tn_size; 875 oldpages = round_page(oldsize) / PAGE_SIZE; 876 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 877 newpages = round_page(newsize) / PAGE_SIZE; 878 879 if (newpages > oldpages && 880 (ssize_t)(newpages - oldpages) > TMPFS_PAGES_AVAIL(tmp)) { 881 error = ENOSPC; 882 goto out; 883 } 884 atomic_add_int(&tmp->tm_pages_used, newpages - oldpages); 885 886 if (newsize < oldsize) { 887 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 888 889 /* 890 * zero out the truncated part of the last page. 891 */ 892 893 uvm_vnp_zerorange(vp, newsize, zerolen); 894 } 895 896 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 897 node->tn_size = newsize; 898 uvm_vnp_setsize(vp, newsize); 899 900 /* 901 * free "backing store" 902 */ 903 904 if (newpages < oldpages) { 905 struct uvm_object *uobj; 906 907 uobj = node->tn_spec.tn_reg.tn_aobj; 908 909 mutex_enter(&uobj->vmobjlock); 910 uao_dropswap_range(uobj, newpages, oldpages); 911 mutex_exit(&uobj->vmobjlock); 912 } 913 914 error = 0; 915 916 if (newsize > oldsize) 917 VN_KNOTE(vp, NOTE_EXTEND); 918 919 out: 920 return error; 921 } 922 923 /* --------------------------------------------------------------------- */ 924 925 /* 926 * Returns information about the number of available memory pages, 927 * including physical and virtual ones. 928 * 929 * If 'total' is true, the value returned is the total amount of memory 930 * pages configured for the system (either in use or free). 931 * If it is FALSE, the value returned is the amount of free memory pages. 932 * 933 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid 934 * excessive memory usage. 935 * 936 */ 937 size_t 938 tmpfs_mem_info(bool total) 939 { 940 size_t size; 941 942 size = 0; 943 size += uvmexp.swpgavail; 944 if (!total) { 945 size -= uvmexp.swpgonly; 946 } 947 size += uvmexp.free; 948 size += uvmexp.filepages; 949 if (size > uvmexp.wired) { 950 size -= uvmexp.wired; 951 } else { 952 size = 0; 953 } 954 955 return size; 956 } 957 958 /* --------------------------------------------------------------------- */ 959 960 /* 961 * Change flags of the given vnode. 962 * Caller should execute tmpfs_update on vp after a successful execution. 963 * The vnode must be locked on entry and remain locked on exit. 964 */ 965 int 966 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 967 { 968 int error; 969 struct tmpfs_node *node; 970 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 971 int fs_decision = 0; 972 973 KASSERT(VOP_ISLOCKED(vp)); 974 975 node = VP_TO_TMPFS_NODE(vp); 976 977 /* Disallow this operation if the file system is mounted read-only. */ 978 if (vp->v_mount->mnt_flag & MNT_RDONLY) 979 return EROFS; 980 981 if (kauth_cred_geteuid(cred) != node->tn_uid) 982 fs_decision = EACCES; 983 984 /* 985 * If the new flags have non-user flags that are different than 986 * those on the node, we need special permission to change them. 987 */ 988 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 989 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 990 if (!fs_decision) 991 fs_decision = EPERM; 992 } 993 994 /* 995 * Indicate that this node's flags have system attributes in them if 996 * that's the case. 997 */ 998 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 999 action |= KAUTH_VNODE_HAS_SYSFLAGS; 1000 } 1001 1002 error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision); 1003 if (error) 1004 return error; 1005 1006 /* 1007 * Set the flags. If we're not setting non-user flags, be careful not 1008 * to overwrite them. 1009 * 1010 * XXX: Can't we always assign here? if the system flags are different, 1011 * the code above should catch attempts to change them without 1012 * proper permissions, and if we're here it means it's okay to 1013 * change them... 1014 */ 1015 if (action & KAUTH_VNODE_WRITE_SYSFLAGS) { 1016 node->tn_flags = flags; 1017 } else { 1018 /* Clear all user-settable flags and re-set them. */ 1019 node->tn_flags &= SF_SETTABLE; 1020 node->tn_flags |= (flags & UF_SETTABLE); 1021 } 1022 1023 node->tn_status |= TMPFS_NODE_CHANGED; 1024 VN_KNOTE(vp, NOTE_ATTRIB); 1025 1026 KASSERT(VOP_ISLOCKED(vp)); 1027 1028 return 0; 1029 } 1030 1031 /* --------------------------------------------------------------------- */ 1032 1033 /* 1034 * Change access mode on the given vnode. 1035 * Caller should execute tmpfs_update on vp after a successful execution. 1036 * The vnode must be locked on entry and remain locked on exit. 1037 */ 1038 int 1039 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l) 1040 { 1041 int error; 1042 struct tmpfs_node *node; 1043 1044 KASSERT(VOP_ISLOCKED(vp)); 1045 1046 node = VP_TO_TMPFS_NODE(vp); 1047 1048 /* Disallow this operation if the file system is mounted read-only. */ 1049 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1050 return EROFS; 1051 1052 /* Immutable or append-only files cannot be modified, either. */ 1053 if (node->tn_flags & (IMMUTABLE | APPEND)) 1054 return EPERM; 1055 1056 error = genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, 1057 mode); 1058 1059 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 1060 NULL, error); 1061 if (error) 1062 return (error); 1063 1064 node->tn_mode = (mode & ALLPERMS); 1065 1066 node->tn_status |= TMPFS_NODE_CHANGED; 1067 VN_KNOTE(vp, NOTE_ATTRIB); 1068 1069 KASSERT(VOP_ISLOCKED(vp)); 1070 1071 return 0; 1072 } 1073 1074 /* --------------------------------------------------------------------- */ 1075 1076 /* 1077 * Change ownership of the given vnode. At least one of uid or gid must 1078 * be different than VNOVAL. If one is set to that value, the attribute 1079 * is unchanged. 1080 * Caller should execute tmpfs_update on vp after a successful execution. 1081 * The vnode must be locked on entry and remain locked on exit. 1082 */ 1083 int 1084 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 1085 struct lwp *l) 1086 { 1087 int error; 1088 struct tmpfs_node *node; 1089 1090 KASSERT(VOP_ISLOCKED(vp)); 1091 1092 node = VP_TO_TMPFS_NODE(vp); 1093 1094 /* Assign default values if they are unknown. */ 1095 KASSERT(uid != VNOVAL || gid != VNOVAL); 1096 if (uid == VNOVAL) 1097 uid = node->tn_uid; 1098 if (gid == VNOVAL) 1099 gid = node->tn_gid; 1100 KASSERT(uid != VNOVAL && gid != VNOVAL); 1101 1102 /* Disallow this operation if the file system is mounted read-only. */ 1103 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1104 return EROFS; 1105 1106 /* Immutable or append-only files cannot be modified, either. */ 1107 if (node->tn_flags & (IMMUTABLE | APPEND)) 1108 return EPERM; 1109 1110 error = genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid, 1111 gid); 1112 1113 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 1114 NULL, error); 1115 if (error) 1116 return (error); 1117 1118 node->tn_uid = uid; 1119 node->tn_gid = gid; 1120 1121 node->tn_status |= TMPFS_NODE_CHANGED; 1122 VN_KNOTE(vp, NOTE_ATTRIB); 1123 1124 KASSERT(VOP_ISLOCKED(vp)); 1125 1126 return 0; 1127 } 1128 1129 /* --------------------------------------------------------------------- */ 1130 1131 /* 1132 * Change size of the given vnode. 1133 * Caller should execute tmpfs_update on vp after a successful execution. 1134 * The vnode must be locked on entry and remain locked on exit. 1135 */ 1136 int 1137 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred, 1138 struct lwp *l) 1139 { 1140 int error; 1141 struct tmpfs_node *node; 1142 1143 KASSERT(VOP_ISLOCKED(vp)); 1144 1145 node = VP_TO_TMPFS_NODE(vp); 1146 1147 /* Decide whether this is a valid operation based on the file type. */ 1148 error = 0; 1149 switch (vp->v_type) { 1150 case VDIR: 1151 return EISDIR; 1152 1153 case VREG: 1154 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1155 return EROFS; 1156 break; 1157 1158 case VBLK: 1159 /* FALLTHROUGH */ 1160 case VCHR: 1161 /* FALLTHROUGH */ 1162 case VFIFO: 1163 /* Allow modifications of special files even if in the file 1164 * system is mounted read-only (we are not modifying the 1165 * files themselves, but the objects they represent). */ 1166 return 0; 1167 1168 default: 1169 /* Anything else is unsupported. */ 1170 return EOPNOTSUPP; 1171 } 1172 1173 /* Immutable or append-only files cannot be modified, either. */ 1174 if (node->tn_flags & (IMMUTABLE | APPEND)) 1175 return EPERM; 1176 1177 error = tmpfs_truncate(vp, size); 1178 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1179 * for us, as will update tn_status; no need to do that here. */ 1180 1181 KASSERT(VOP_ISLOCKED(vp)); 1182 1183 return error; 1184 } 1185 1186 /* --------------------------------------------------------------------- */ 1187 1188 /* 1189 * Change access and modification times of the given vnode. 1190 * Caller should execute tmpfs_update on vp after a successful execution. 1191 * The vnode must be locked on entry and remain locked on exit. 1192 */ 1193 int 1194 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime, 1195 const struct timespec *mtime, const struct timespec *btime, 1196 int vaflags, kauth_cred_t cred, struct lwp *l) 1197 { 1198 int error; 1199 struct tmpfs_node *node; 1200 1201 KASSERT(VOP_ISLOCKED(vp)); 1202 1203 node = VP_TO_TMPFS_NODE(vp); 1204 1205 /* Disallow this operation if the file system is mounted read-only. */ 1206 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1207 return EROFS; 1208 1209 /* Immutable or append-only files cannot be modified, either. */ 1210 if (node->tn_flags & (IMMUTABLE | APPEND)) 1211 return EPERM; 1212 1213 error = genfs_can_chtimes(vp, vaflags, node->tn_uid, cred); 1214 1215 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1216 error); 1217 if (error) 1218 return (error); 1219 1220 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1221 node->tn_status |= TMPFS_NODE_ACCESSED; 1222 1223 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1224 node->tn_status |= TMPFS_NODE_MODIFIED; 1225 1226 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL) 1227 btime = NULL; 1228 1229 tmpfs_update(vp, atime, mtime, btime, 0); 1230 VN_KNOTE(vp, NOTE_ATTRIB); 1231 1232 KASSERT(VOP_ISLOCKED(vp)); 1233 1234 return 0; 1235 } 1236 1237 /* --------------------------------------------------------------------- */ 1238 1239 /* Sync timestamps */ 1240 void 1241 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1242 const struct timespec *mod, const struct timespec *birth) 1243 { 1244 struct tmpfs_node *node; 1245 struct timespec nowtm; 1246 1247 node = VP_TO_TMPFS_NODE(vp); 1248 1249 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1250 TMPFS_NODE_CHANGED)) == 0) 1251 return; 1252 1253 if (birth != NULL) { 1254 node->tn_birthtime = *birth; 1255 } 1256 vfs_timestamp(&nowtm); 1257 1258 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1259 node->tn_atime = acc ? *acc : nowtm; 1260 } 1261 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1262 node->tn_mtime = mod ? *mod : nowtm; 1263 } 1264 if (node->tn_status & TMPFS_NODE_CHANGED) { 1265 node->tn_ctime = nowtm; 1266 } 1267 1268 node->tn_status &= 1269 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1270 } 1271 1272 /* --------------------------------------------------------------------- */ 1273 1274 void 1275 tmpfs_update(struct vnode *vp, const struct timespec *acc, 1276 const struct timespec *mod, const struct timespec *birth, int flags) 1277 { 1278 1279 struct tmpfs_node *node; 1280 1281 KASSERT(VOP_ISLOCKED(vp)); 1282 1283 node = VP_TO_TMPFS_NODE(vp); 1284 1285 #if 0 1286 if (flags & UPDATE_CLOSE) 1287 ; /* XXX Need to do anything special? */ 1288 #endif 1289 1290 tmpfs_itimes(vp, acc, mod, birth); 1291 1292 KASSERT(VOP_ISLOCKED(vp)); 1293 } 1294 1295 /* --------------------------------------------------------------------- */ 1296 1297 int 1298 tmpfs_truncate(struct vnode *vp, off_t length) 1299 { 1300 bool extended; 1301 int error; 1302 struct tmpfs_node *node; 1303 1304 node = VP_TO_TMPFS_NODE(vp); 1305 extended = length > node->tn_size; 1306 1307 if (length < 0) { 1308 error = EINVAL; 1309 goto out; 1310 } 1311 1312 if (node->tn_size == length) { 1313 error = 0; 1314 goto out; 1315 } 1316 1317 error = tmpfs_reg_resize(vp, length); 1318 if (error == 0) 1319 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1320 1321 out: 1322 tmpfs_update(vp, NULL, NULL, NULL, 0); 1323 1324 return error; 1325 } 1326