1 /* $NetBSD: tmpfs_subr.c,v 1.46 2008/02/06 11:23:54 jmmv Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Efficient memory file system supporting functions. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.46 2008/02/06 11:23:54 jmmv Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/dirent.h> 49 #include <sys/event.h> 50 #include <sys/kmem.h> 51 #include <sys/mount.h> 52 #include <sys/namei.h> 53 #include <sys/time.h> 54 #include <sys/stat.h> 55 #include <sys/systm.h> 56 #include <sys/swap.h> 57 #include <sys/vnode.h> 58 #include <sys/kauth.h> 59 #include <sys/proc.h> 60 #include <sys/atomic.h> 61 62 #include <uvm/uvm.h> 63 64 #include <miscfs/specfs/specdev.h> 65 #include <fs/tmpfs/tmpfs.h> 66 #include <fs/tmpfs/tmpfs_fifoops.h> 67 #include <fs/tmpfs/tmpfs_specops.h> 68 #include <fs/tmpfs/tmpfs_vnops.h> 69 70 /* --------------------------------------------------------------------- */ 71 72 /* 73 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 74 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 75 * using the credentials of the process 'p'. 76 * 77 * If the node type is set to 'VDIR', then the parent parameter must point 78 * to the parent directory of the node being created. It may only be NULL 79 * while allocating the root node. 80 * 81 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 82 * specifies the device the node represents. 83 * 84 * If the node type is set to 'VLNK', then the parameter target specifies 85 * the file name of the target file for the symbolic link that is being 86 * created. 87 * 88 * Note that new nodes are retrieved from the available list if it has 89 * items or, if it is empty, from the node pool as long as there is enough 90 * space to create them. 91 * 92 * Returns zero on success or an appropriate error code on failure. 93 */ 94 int 95 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 96 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 97 char *target, dev_t rdev, struct tmpfs_node **node) 98 { 99 struct tmpfs_node *nnode; 100 101 /* If the root directory of the 'tmp' file system is not yet 102 * allocated, this must be the request to do it. */ 103 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 104 105 KASSERT(IFF(type == VLNK, target != NULL)); 106 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 107 108 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 109 110 nnode = NULL; 111 if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) { 112 atomic_dec_uint(&tmp->tm_nodes_cnt); 113 return ENOSPC; 114 } 115 116 nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); 117 if (nnode == NULL) { 118 atomic_dec_uint(&tmp->tm_nodes_cnt); 119 return ENOSPC; 120 } 121 122 /* 123 * XXX Where the pool is backed by a map larger than (4GB * 124 * sizeof(*nnode)), this may produce duplicate inode numbers 125 * for applications that do not understand 64-bit ino_t. 126 */ 127 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 128 nnode->tn_gen = arc4random(); 129 130 /* Generic initialization. */ 131 nnode->tn_type = type; 132 nnode->tn_size = 0; 133 nnode->tn_status = 0; 134 nnode->tn_flags = 0; 135 nnode->tn_links = 0; 136 getnanotime(&nnode->tn_atime); 137 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 138 nnode->tn_atime; 139 nnode->tn_uid = uid; 140 nnode->tn_gid = gid; 141 nnode->tn_mode = mode; 142 nnode->tn_lockf = NULL; 143 nnode->tn_vnode = NULL; 144 145 /* Type-specific initialization. */ 146 switch (nnode->tn_type) { 147 case VBLK: 148 case VCHR: 149 nnode->tn_spec.tn_dev.tn_rdev = rdev; 150 break; 151 152 case VDIR: 153 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 154 nnode->tn_spec.tn_dir.tn_parent = 155 (parent == NULL) ? nnode : parent; 156 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 157 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 158 nnode->tn_links++; 159 break; 160 161 case VFIFO: 162 /* FALLTHROUGH */ 163 case VSOCK: 164 break; 165 166 case VLNK: 167 KASSERT(strlen(target) < MAXPATHLEN); 168 nnode->tn_size = strlen(target); 169 nnode->tn_spec.tn_lnk.tn_link = 170 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0); 171 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 172 atomic_dec_uint(&tmp->tm_nodes_cnt); 173 TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode); 174 return ENOSPC; 175 } 176 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 177 break; 178 179 case VREG: 180 nnode->tn_spec.tn_reg.tn_aobj = 181 uao_create(INT32_MAX - PAGE_SIZE, 0); 182 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 183 break; 184 185 default: 186 KASSERT(0); 187 } 188 189 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 190 191 mutex_enter(&tmp->tm_lock); 192 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 193 mutex_exit(&tmp->tm_lock); 194 195 *node = nnode; 196 return 0; 197 } 198 199 /* --------------------------------------------------------------------- */ 200 201 /* 202 * Destroys the node pointed to by node from the file system 'tmp'. 203 * If the node does not belong to the given mount point, the results are 204 * unpredicted. 205 * 206 * If the node references a directory; no entries are allowed because 207 * their removal could need a recursive algorithm, something forbidden in 208 * kernel space. Furthermore, there is not need to provide such 209 * functionality (recursive removal) because the only primitives offered 210 * to the user are the removal of empty directories and the deletion of 211 * individual files. 212 * 213 * Note that nodes are not really deleted; in fact, when a node has been 214 * allocated, it cannot be deleted during the whole life of the file 215 * system. Instead, they are moved to the available list and remain there 216 * until reused. 217 */ 218 void 219 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 220 { 221 222 if (node->tn_type == VREG) { 223 atomic_add_int(&tmp->tm_pages_used, 224 -node->tn_spec.tn_reg.tn_aobj_pages); 225 } 226 atomic_dec_uint(&tmp->tm_nodes_cnt); 227 mutex_enter(&tmp->tm_lock); 228 LIST_REMOVE(node, tn_entries); 229 mutex_exit(&tmp->tm_lock); 230 231 switch (node->tn_type) { 232 case VLNK: 233 tmpfs_str_pool_put(&tmp->tm_str_pool, 234 node->tn_spec.tn_lnk.tn_link, node->tn_size); 235 break; 236 237 case VREG: 238 if (node->tn_spec.tn_reg.tn_aobj != NULL) 239 uao_detach(node->tn_spec.tn_reg.tn_aobj); 240 break; 241 242 default: 243 break; 244 } 245 246 mutex_destroy(&node->tn_vlock); 247 TMPFS_POOL_PUT(&tmp->tm_node_pool, node); 248 } 249 250 /* --------------------------------------------------------------------- */ 251 252 /* 253 * Allocates a new directory entry for the node node with a name of name. 254 * The new directory entry is returned in *de. 255 * 256 * The link count of node is increased by one to reflect the new object 257 * referencing it. This takes care of notifying kqueue listeners about 258 * this change. 259 * 260 * Returns zero on success or an appropriate error code on failure. 261 */ 262 int 263 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 264 const char *name, uint16_t len, struct tmpfs_dirent **de) 265 { 266 struct tmpfs_dirent *nde; 267 268 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0); 269 if (nde == NULL) 270 return ENOSPC; 271 272 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0); 273 if (nde->td_name == NULL) { 274 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde); 275 return ENOSPC; 276 } 277 nde->td_namelen = len; 278 memcpy(nde->td_name, name, len); 279 nde->td_node = node; 280 281 node->tn_links++; 282 if (node->tn_links > 1 && node->tn_vnode != NULL) 283 VN_KNOTE(node->tn_vnode, NOTE_LINK); 284 *de = nde; 285 286 return 0; 287 } 288 289 /* --------------------------------------------------------------------- */ 290 291 /* 292 * Frees a directory entry. It is the caller's responsibility to destroy 293 * the node referenced by it if needed. 294 * 295 * The link count of node is decreased by one to reflect the removal of an 296 * object that referenced it. This only happens if 'node_exists' is true; 297 * otherwise the function will not access the node referred to by the 298 * directory entry, as it may already have been released from the outside. 299 * 300 * Interested parties (kqueue) are notified of the link count change; note 301 * that this can include both the node pointed to by the directory entry 302 * as well as its parent. 303 */ 304 void 305 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 306 bool node_exists) 307 { 308 if (node_exists) { 309 struct tmpfs_node *node; 310 311 node = de->td_node; 312 313 KASSERT(node->tn_links > 0); 314 node->tn_links--; 315 if (node->tn_vnode != NULL) 316 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ? 317 NOTE_DELETE : NOTE_LINK); 318 if (node->tn_type == VDIR) 319 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode, 320 NOTE_LINK); 321 } 322 323 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen); 324 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de); 325 } 326 327 /* --------------------------------------------------------------------- */ 328 329 /* 330 * Allocates a new vnode for the node node or returns a new reference to 331 * an existing one if the node had already a vnode referencing it. The 332 * resulting locked vnode is returned in *vpp. 333 * 334 * Returns zero on success or an appropriate error code on failure. 335 */ 336 int 337 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) 338 { 339 int error; 340 struct vnode *vp; 341 342 /* If there is already a vnode, then lock it. */ 343 for (;;) { 344 mutex_enter(&node->tn_vlock); 345 if ((vp = node->tn_vnode) != NULL) { 346 mutex_enter(&vp->v_interlock); 347 mutex_exit(&node->tn_vlock); 348 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK); 349 if (error == ENOENT) { 350 /* vnode was reclaimed. */ 351 continue; 352 } 353 *vpp = vp; 354 return error; 355 } 356 break; 357 } 358 359 /* Get a new vnode and associate it with our node. */ 360 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); 361 if (error != 0) { 362 mutex_exit(&node->tn_vlock); 363 return error; 364 } 365 366 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 367 if (error != 0) { 368 mutex_exit(&node->tn_vlock); 369 ungetnewvnode(vp); 370 return error; 371 } 372 373 vp->v_type = node->tn_type; 374 375 /* Type-specific initialization. */ 376 switch (node->tn_type) { 377 case VBLK: 378 /* FALLTHROUGH */ 379 case VCHR: 380 vp->v_op = tmpfs_specop_p; 381 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 382 break; 383 384 case VDIR: 385 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 386 VV_ROOT : 0; 387 break; 388 389 case VFIFO: 390 vp->v_op = tmpfs_fifoop_p; 391 break; 392 393 case VLNK: 394 /* FALLTHROUGH */ 395 case VREG: 396 /* FALLTHROUGH */ 397 case VSOCK: 398 break; 399 400 default: 401 KASSERT(0); 402 } 403 404 uvm_vnp_setsize(vp, node->tn_size); 405 vp->v_data = node; 406 node->tn_vnode = vp; 407 mutex_exit(&node->tn_vlock); 408 *vpp = vp; 409 410 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 411 KASSERT(*vpp == node->tn_vnode); 412 413 return error; 414 } 415 416 /* --------------------------------------------------------------------- */ 417 418 /* 419 * Destroys the association between the vnode vp and the node it 420 * references. 421 */ 422 void 423 tmpfs_free_vp(struct vnode *vp) 424 { 425 struct tmpfs_node *node; 426 427 node = VP_TO_TMPFS_NODE(vp); 428 429 mutex_enter(&node->tn_vlock); 430 node->tn_vnode = NULL; 431 mutex_exit(&node->tn_vlock); 432 vp->v_data = NULL; 433 } 434 435 /* --------------------------------------------------------------------- */ 436 437 /* 438 * Allocates a new file of type 'type' and adds it to the parent directory 439 * 'dvp'; this addition is done using the component name given in 'cnp'. 440 * The ownership of the new file is automatically assigned based on the 441 * credentials of the caller (through 'cnp'), the group is set based on 442 * the parent directory and the mode is determined from the 'vap' argument. 443 * If successful, *vpp holds a vnode to the newly created file and zero 444 * is returned. Otherwise *vpp is NULL and the function returns an 445 * appropriate error code. 446 */ 447 int 448 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 449 struct componentname *cnp, char *target) 450 { 451 int error; 452 struct tmpfs_dirent *de; 453 struct tmpfs_mount *tmp; 454 struct tmpfs_node *dnode; 455 struct tmpfs_node *node; 456 struct tmpfs_node *parent; 457 458 KASSERT(VOP_ISLOCKED(dvp)); 459 KASSERT(cnp->cn_flags & HASBUF); 460 461 tmp = VFS_TO_TMPFS(dvp->v_mount); 462 dnode = VP_TO_TMPFS_DIR(dvp); 463 *vpp = NULL; 464 465 /* If the entry we are creating is a directory, we cannot overflow 466 * the number of links of its parent, because it will get a new 467 * link. */ 468 if (vap->va_type == VDIR) { 469 /* Ensure that we do not overflow the maximum number of links 470 * imposed by the system. */ 471 KASSERT(dnode->tn_links <= LINK_MAX); 472 if (dnode->tn_links == LINK_MAX) { 473 error = EMLINK; 474 goto out; 475 } 476 477 parent = dnode; 478 } else 479 parent = NULL; 480 481 /* Allocate a node that represents the new file. */ 482 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 483 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 484 if (error != 0) 485 goto out; 486 487 /* Allocate a directory entry that points to the new file. */ 488 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 489 &de); 490 if (error != 0) { 491 tmpfs_free_node(tmp, node); 492 goto out; 493 } 494 495 /* Allocate a vnode for the new file. */ 496 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp); 497 if (error != 0) { 498 tmpfs_free_dirent(tmp, de, true); 499 tmpfs_free_node(tmp, node); 500 goto out; 501 } 502 503 /* Now that all required items are allocated, we can proceed to 504 * insert the new node into the directory, an operation that 505 * cannot fail. */ 506 tmpfs_dir_attach(dvp, de); 507 if (vap->va_type == VDIR) { 508 VN_KNOTE(dvp, NOTE_LINK); 509 dnode->tn_links++; 510 KASSERT(dnode->tn_links <= LINK_MAX); 511 } 512 513 out: 514 if (error != 0 || !(cnp->cn_flags & SAVESTART)) 515 PNBUF_PUT(cnp->cn_pnbuf); 516 vput(dvp); 517 518 KASSERT(IFF(error == 0, *vpp != NULL)); 519 520 return error; 521 } 522 523 /* --------------------------------------------------------------------- */ 524 525 /* 526 * Attaches the directory entry de to the directory represented by vp. 527 * Note that this does not change the link count of the node pointed by 528 * the directory entry, as this is done by tmpfs_alloc_dirent. 529 * 530 * As the "parent" directory changes, interested parties are notified of 531 * a write to it. 532 */ 533 void 534 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 535 { 536 struct tmpfs_node *dnode; 537 538 dnode = VP_TO_TMPFS_DIR(vp); 539 540 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 541 dnode->tn_size += sizeof(struct tmpfs_dirent); 542 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 543 TMPFS_NODE_MODIFIED; 544 uvm_vnp_setsize(vp, dnode->tn_size); 545 546 VN_KNOTE(vp, NOTE_WRITE); 547 } 548 549 /* --------------------------------------------------------------------- */ 550 551 /* 552 * Detaches the directory entry de from the directory represented by vp. 553 * Note that this does not change the link count of the node pointed by 554 * the directory entry, as this is done by tmpfs_free_dirent. 555 * 556 * As the "parent" directory changes, interested parties are notified of 557 * a write to it. 558 */ 559 void 560 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 561 { 562 struct tmpfs_node *dnode; 563 564 KASSERT(VOP_ISLOCKED(vp)); 565 566 dnode = VP_TO_TMPFS_DIR(vp); 567 568 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 569 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 570 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 571 } 572 573 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 574 dnode->tn_size -= sizeof(struct tmpfs_dirent); 575 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 576 TMPFS_NODE_MODIFIED; 577 uvm_vnp_setsize(vp, dnode->tn_size); 578 579 VN_KNOTE(vp, NOTE_WRITE); 580 } 581 582 /* --------------------------------------------------------------------- */ 583 584 /* 585 * Looks for a directory entry in the directory represented by node. 586 * 'cnp' describes the name of the entry to look for. Note that the . 587 * and .. components are not allowed as they do not physically exist 588 * within directories. 589 * 590 * Returns a pointer to the entry when found, otherwise NULL. 591 */ 592 struct tmpfs_dirent * 593 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 594 { 595 bool found; 596 struct tmpfs_dirent *de; 597 598 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 599 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 600 cnp->cn_nameptr[1] == '.'))); 601 TMPFS_VALIDATE_DIR(node); 602 603 node->tn_status |= TMPFS_NODE_ACCESSED; 604 605 found = 0; 606 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 607 KASSERT(cnp->cn_namelen < 0xffff); 608 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 609 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 610 found = 1; 611 break; 612 } 613 } 614 615 return found ? de : NULL; 616 } 617 618 /* --------------------------------------------------------------------- */ 619 620 /* 621 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 622 * directory and returns it in the uio space. The function returns 0 623 * on success, -1 if there was not enough space in the uio structure to 624 * hold the directory entry or an appropriate error code if another 625 * error happens. 626 */ 627 int 628 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 629 { 630 int error; 631 struct dirent *dentp; 632 633 TMPFS_VALIDATE_DIR(node); 634 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 635 636 dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); 637 638 dentp->d_fileno = node->tn_id; 639 dentp->d_type = DT_DIR; 640 dentp->d_namlen = 1; 641 dentp->d_name[0] = '.'; 642 dentp->d_name[1] = '\0'; 643 dentp->d_reclen = _DIRENT_SIZE(dentp); 644 645 if (dentp->d_reclen > uio->uio_resid) 646 error = -1; 647 else { 648 error = uiomove(dentp, dentp->d_reclen, uio); 649 if (error == 0) 650 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 651 } 652 653 node->tn_status |= TMPFS_NODE_ACCESSED; 654 655 kmem_free(dentp, sizeof(struct dirent)); 656 return error; 657 } 658 659 /* --------------------------------------------------------------------- */ 660 661 /* 662 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 663 * directory and returns it in the uio space. The function returns 0 664 * on success, -1 if there was not enough space in the uio structure to 665 * hold the directory entry or an appropriate error code if another 666 * error happens. 667 */ 668 int 669 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 670 { 671 int error; 672 struct dirent *dentp; 673 674 TMPFS_VALIDATE_DIR(node); 675 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 676 677 dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); 678 679 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 680 dentp->d_type = DT_DIR; 681 dentp->d_namlen = 2; 682 dentp->d_name[0] = '.'; 683 dentp->d_name[1] = '.'; 684 dentp->d_name[2] = '\0'; 685 dentp->d_reclen = _DIRENT_SIZE(dentp); 686 687 if (dentp->d_reclen > uio->uio_resid) 688 error = -1; 689 else { 690 error = uiomove(dentp, dentp->d_reclen, uio); 691 if (error == 0) { 692 struct tmpfs_dirent *de; 693 694 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 695 if (de == NULL) 696 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 697 else 698 uio->uio_offset = tmpfs_dircookie(de); 699 } 700 } 701 702 node->tn_status |= TMPFS_NODE_ACCESSED; 703 704 kmem_free(dentp, sizeof(struct dirent)); 705 return error; 706 } 707 708 /* --------------------------------------------------------------------- */ 709 710 /* 711 * Lookup a directory entry by its associated cookie. 712 */ 713 struct tmpfs_dirent * 714 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 715 { 716 struct tmpfs_dirent *de; 717 718 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 719 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 720 return node->tn_spec.tn_dir.tn_readdir_lastp; 721 } 722 723 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 724 if (tmpfs_dircookie(de) == cookie) { 725 break; 726 } 727 } 728 729 return de; 730 } 731 732 /* --------------------------------------------------------------------- */ 733 734 /* 735 * Helper function for tmpfs_readdir. Returns as much directory entries 736 * as can fit in the uio space. The read starts at uio->uio_offset. 737 * The function returns 0 on success, -1 if there was not enough space 738 * in the uio structure to hold the directory entry or an appropriate 739 * error code if another error happens. 740 */ 741 int 742 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 743 { 744 int error; 745 off_t startcookie; 746 struct dirent *dentp; 747 struct tmpfs_dirent *de; 748 749 TMPFS_VALIDATE_DIR(node); 750 751 /* Locate the first directory entry we have to return. We have cached 752 * the last readdir in the node, so use those values if appropriate. 753 * Otherwise do a linear scan to find the requested entry. */ 754 startcookie = uio->uio_offset; 755 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 756 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 757 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 758 return 0; 759 } else { 760 de = tmpfs_dir_lookupbycookie(node, startcookie); 761 } 762 if (de == NULL) { 763 return EINVAL; 764 } 765 766 dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); 767 768 /* Read as much entries as possible; i.e., until we reach the end of 769 * the directory or we exhaust uio space. */ 770 do { 771 /* Create a dirent structure representing the current 772 * tmpfs_node and fill it. */ 773 dentp->d_fileno = de->td_node->tn_id; 774 switch (de->td_node->tn_type) { 775 case VBLK: 776 dentp->d_type = DT_BLK; 777 break; 778 779 case VCHR: 780 dentp->d_type = DT_CHR; 781 break; 782 783 case VDIR: 784 dentp->d_type = DT_DIR; 785 break; 786 787 case VFIFO: 788 dentp->d_type = DT_FIFO; 789 break; 790 791 case VLNK: 792 dentp->d_type = DT_LNK; 793 break; 794 795 case VREG: 796 dentp->d_type = DT_REG; 797 break; 798 799 case VSOCK: 800 dentp->d_type = DT_SOCK; 801 break; 802 803 default: 804 KASSERT(0); 805 } 806 dentp->d_namlen = de->td_namelen; 807 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 808 (void)memcpy(dentp->d_name, de->td_name, de->td_namelen); 809 dentp->d_name[de->td_namelen] = '\0'; 810 dentp->d_reclen = _DIRENT_SIZE(dentp); 811 812 /* Stop reading if the directory entry we are treating is 813 * bigger than the amount of data that can be returned. */ 814 if (dentp->d_reclen > uio->uio_resid) { 815 error = -1; 816 break; 817 } 818 819 /* Copy the new dirent structure into the output buffer and 820 * advance pointers. */ 821 error = uiomove(dentp, dentp->d_reclen, uio); 822 823 (*cntp)++; 824 de = TAILQ_NEXT(de, td_entries); 825 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 826 827 /* Update the offset and cache. */ 828 if (de == NULL) { 829 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 830 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 831 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 832 } else { 833 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 834 tmpfs_dircookie(de); 835 node->tn_spec.tn_dir.tn_readdir_lastp = de; 836 } 837 838 node->tn_status |= TMPFS_NODE_ACCESSED; 839 840 kmem_free(dentp, sizeof(struct dirent)); 841 return error; 842 } 843 844 /* --------------------------------------------------------------------- */ 845 846 /* 847 * Resizes the aobj associated to the regular file pointed to by vp to 848 * the size newsize. 'vp' must point to a vnode that represents a regular 849 * file. 'newsize' must be positive. 850 * 851 * If the file is extended, the appropriate kevent is raised. This does 852 * not rise a write event though because resizing is not the same as 853 * writing. 854 * 855 * Returns zero on success or an appropriate error code on failure. 856 */ 857 int 858 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 859 { 860 int error; 861 unsigned int newpages, oldpages; 862 struct tmpfs_mount *tmp; 863 struct tmpfs_node *node; 864 off_t oldsize; 865 866 KASSERT(vp->v_type == VREG); 867 KASSERT(newsize >= 0); 868 869 node = VP_TO_TMPFS_NODE(vp); 870 tmp = VFS_TO_TMPFS(vp->v_mount); 871 872 /* Convert the old and new sizes to the number of pages needed to 873 * store them. It may happen that we do not need to do anything 874 * because the last allocated page can accommodate the change on 875 * its own. */ 876 oldsize = node->tn_size; 877 oldpages = round_page(oldsize) / PAGE_SIZE; 878 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 879 newpages = round_page(newsize) / PAGE_SIZE; 880 881 if (newpages > oldpages && 882 (ssize_t)(newpages - oldpages) > TMPFS_PAGES_AVAIL(tmp)) { 883 error = ENOSPC; 884 goto out; 885 } 886 atomic_add_int(&tmp->tm_pages_used, newpages - oldpages); 887 888 if (newsize < oldsize) { 889 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 890 891 /* 892 * zero out the truncated part of the last page. 893 */ 894 895 uvm_vnp_zerorange(vp, newsize, zerolen); 896 } 897 898 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 899 node->tn_size = newsize; 900 uvm_vnp_setsize(vp, newsize); 901 902 /* 903 * free "backing store" 904 */ 905 906 if (newpages < oldpages) { 907 struct uvm_object *uobj; 908 909 uobj = node->tn_spec.tn_reg.tn_aobj; 910 911 mutex_enter(&uobj->vmobjlock); 912 uao_dropswap_range(uobj, newpages, oldpages); 913 mutex_exit(&uobj->vmobjlock); 914 } 915 916 error = 0; 917 918 if (newsize > oldsize) 919 VN_KNOTE(vp, NOTE_EXTEND); 920 921 out: 922 return error; 923 } 924 925 /* --------------------------------------------------------------------- */ 926 927 /* 928 * Returns information about the number of available memory pages, 929 * including physical and virtual ones. 930 * 931 * If 'total' is true, the value returned is the total amount of memory 932 * pages configured for the system (either in use or free). 933 * If it is FALSE, the value returned is the amount of free memory pages. 934 * 935 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid 936 * excessive memory usage. 937 * 938 */ 939 size_t 940 tmpfs_mem_info(bool total) 941 { 942 size_t size; 943 944 size = 0; 945 size += uvmexp.swpgavail; 946 if (!total) { 947 size -= uvmexp.swpgonly; 948 } 949 size += uvmexp.free; 950 size += uvmexp.filepages; 951 if (size > uvmexp.wired) { 952 size -= uvmexp.wired; 953 } else { 954 size = 0; 955 } 956 957 return size; 958 } 959 960 /* --------------------------------------------------------------------- */ 961 962 /* 963 * Change flags of the given vnode. 964 * Caller should execute tmpfs_update on vp after a successful execution. 965 * The vnode must be locked on entry and remain locked on exit. 966 */ 967 int 968 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 969 { 970 int error; 971 struct tmpfs_node *node; 972 973 KASSERT(VOP_ISLOCKED(vp)); 974 975 node = VP_TO_TMPFS_NODE(vp); 976 977 /* Disallow this operation if the file system is mounted read-only. */ 978 if (vp->v_mount->mnt_flag & MNT_RDONLY) 979 return EROFS; 980 981 /* XXX: The following comes from UFS code, and can be found in 982 * several other file systems. Shouldn't this be centralized 983 * somewhere? */ 984 if (kauth_cred_geteuid(cred) != node->tn_uid && 985 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 986 NULL))) 987 return error; 988 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) { 989 /* The super-user is only allowed to change flags if the file 990 * wasn't protected before and the securelevel is zero. */ 991 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) && 992 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHSYSFLAGS, 993 0, NULL, NULL, NULL)) 994 return EPERM; 995 node->tn_flags = flags; 996 } else { 997 /* Regular users can change flags provided they only want to 998 * change user-specific ones, not those reserved for the 999 * super-user. */ 1000 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) || 1001 (flags & UF_SETTABLE) != flags) 1002 return EPERM; 1003 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE)) 1004 return EPERM; 1005 node->tn_flags &= SF_SETTABLE; 1006 node->tn_flags |= (flags & UF_SETTABLE); 1007 } 1008 1009 node->tn_status |= TMPFS_NODE_CHANGED; 1010 VN_KNOTE(vp, NOTE_ATTRIB); 1011 1012 KASSERT(VOP_ISLOCKED(vp)); 1013 1014 return 0; 1015 } 1016 1017 /* --------------------------------------------------------------------- */ 1018 1019 /* 1020 * Change access mode on the given vnode. 1021 * Caller should execute tmpfs_update on vp after a successful execution. 1022 * The vnode must be locked on entry and remain locked on exit. 1023 */ 1024 int 1025 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l) 1026 { 1027 int error, ismember = 0; 1028 struct tmpfs_node *node; 1029 1030 KASSERT(VOP_ISLOCKED(vp)); 1031 1032 node = VP_TO_TMPFS_NODE(vp); 1033 1034 /* Disallow this operation if the file system is mounted read-only. */ 1035 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1036 return EROFS; 1037 1038 /* Immutable or append-only files cannot be modified, either. */ 1039 if (node->tn_flags & (IMMUTABLE | APPEND)) 1040 return EPERM; 1041 1042 /* XXX: The following comes from UFS code, and can be found in 1043 * several other file systems. Shouldn't this be centralized 1044 * somewhere? */ 1045 if (kauth_cred_geteuid(cred) != node->tn_uid && 1046 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1047 NULL))) 1048 return error; 1049 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) { 1050 if (vp->v_type != VDIR && (mode & S_ISTXT)) 1051 return EFTYPE; 1052 1053 if ((kauth_cred_ismember_gid(cred, node->tn_gid, 1054 &ismember) != 0 || !ismember) && (mode & S_ISGID)) 1055 return EPERM; 1056 } 1057 1058 node->tn_mode = (mode & ALLPERMS); 1059 1060 node->tn_status |= TMPFS_NODE_CHANGED; 1061 VN_KNOTE(vp, NOTE_ATTRIB); 1062 1063 KASSERT(VOP_ISLOCKED(vp)); 1064 1065 return 0; 1066 } 1067 1068 /* --------------------------------------------------------------------- */ 1069 1070 /* 1071 * Change ownership of the given vnode. At least one of uid or gid must 1072 * be different than VNOVAL. If one is set to that value, the attribute 1073 * is unchanged. 1074 * Caller should execute tmpfs_update on vp after a successful execution. 1075 * The vnode must be locked on entry and remain locked on exit. 1076 */ 1077 int 1078 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 1079 struct lwp *l) 1080 { 1081 int error, ismember = 0; 1082 struct tmpfs_node *node; 1083 1084 KASSERT(VOP_ISLOCKED(vp)); 1085 1086 node = VP_TO_TMPFS_NODE(vp); 1087 1088 /* Assign default values if they are unknown. */ 1089 KASSERT(uid != VNOVAL || gid != VNOVAL); 1090 if (uid == VNOVAL) 1091 uid = node->tn_uid; 1092 if (gid == VNOVAL) 1093 gid = node->tn_gid; 1094 KASSERT(uid != VNOVAL && gid != VNOVAL); 1095 1096 /* Disallow this operation if the file system is mounted read-only. */ 1097 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1098 return EROFS; 1099 1100 /* Immutable or append-only files cannot be modified, either. */ 1101 if (node->tn_flags & (IMMUTABLE | APPEND)) 1102 return EPERM; 1103 1104 /* XXX: The following comes from UFS code, and can be found in 1105 * several other file systems. Shouldn't this be centralized 1106 * somewhere? */ 1107 if ((kauth_cred_geteuid(cred) != node->tn_uid || uid != node->tn_uid || 1108 (gid != node->tn_gid && !(kauth_cred_getegid(cred) == node->tn_gid || 1109 (kauth_cred_ismember_gid(cred, gid, &ismember) == 0 && ismember)))) && 1110 ((error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1111 NULL)) != 0)) 1112 return error; 1113 1114 node->tn_uid = uid; 1115 node->tn_gid = gid; 1116 1117 node->tn_status |= TMPFS_NODE_CHANGED; 1118 VN_KNOTE(vp, NOTE_ATTRIB); 1119 1120 KASSERT(VOP_ISLOCKED(vp)); 1121 1122 return 0; 1123 } 1124 1125 /* --------------------------------------------------------------------- */ 1126 1127 /* 1128 * Change size of the given vnode. 1129 * Caller should execute tmpfs_update on vp after a successful execution. 1130 * The vnode must be locked on entry and remain locked on exit. 1131 */ 1132 int 1133 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred, 1134 struct lwp *l) 1135 { 1136 int error; 1137 struct tmpfs_node *node; 1138 1139 KASSERT(VOP_ISLOCKED(vp)); 1140 1141 node = VP_TO_TMPFS_NODE(vp); 1142 1143 /* Decide whether this is a valid operation based on the file type. */ 1144 error = 0; 1145 switch (vp->v_type) { 1146 case VDIR: 1147 return EISDIR; 1148 1149 case VREG: 1150 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1151 return EROFS; 1152 break; 1153 1154 case VBLK: 1155 /* FALLTHROUGH */ 1156 case VCHR: 1157 /* FALLTHROUGH */ 1158 case VFIFO: 1159 /* Allow modifications of special files even if in the file 1160 * system is mounted read-only (we are not modifying the 1161 * files themselves, but the objects they represent). */ 1162 return 0; 1163 1164 default: 1165 /* Anything else is unsupported. */ 1166 return EOPNOTSUPP; 1167 } 1168 1169 /* Immutable or append-only files cannot be modified, either. */ 1170 if (node->tn_flags & (IMMUTABLE | APPEND)) 1171 return EPERM; 1172 1173 error = tmpfs_truncate(vp, size); 1174 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1175 * for us, as will update tn_status; no need to do that here. */ 1176 1177 KASSERT(VOP_ISLOCKED(vp)); 1178 1179 return error; 1180 } 1181 1182 /* --------------------------------------------------------------------- */ 1183 1184 /* 1185 * Change access and modification times of the given vnode. 1186 * Caller should execute tmpfs_update on vp after a successful execution. 1187 * The vnode must be locked on entry and remain locked on exit. 1188 */ 1189 int 1190 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1191 int vaflags, kauth_cred_t cred, struct lwp *l) 1192 { 1193 int error; 1194 struct tmpfs_node *node; 1195 1196 KASSERT(VOP_ISLOCKED(vp)); 1197 1198 node = VP_TO_TMPFS_NODE(vp); 1199 1200 /* Disallow this operation if the file system is mounted read-only. */ 1201 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1202 return EROFS; 1203 1204 /* Immutable or append-only files cannot be modified, either. */ 1205 if (node->tn_flags & (IMMUTABLE | APPEND)) 1206 return EPERM; 1207 1208 /* XXX: The following comes from UFS code, and can be found in 1209 * several other file systems. Shouldn't this be centralized 1210 * somewhere? */ 1211 if (kauth_cred_geteuid(cred) != node->tn_uid && 1212 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1213 NULL)) && ((vaflags & VA_UTIMES_NULL) == 0 || 1214 (error = VOP_ACCESS(vp, VWRITE, cred)))) 1215 return error; 1216 1217 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1218 node->tn_status |= TMPFS_NODE_ACCESSED; 1219 1220 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1221 node->tn_status |= TMPFS_NODE_MODIFIED; 1222 1223 tmpfs_update(vp, atime, mtime, 0); 1224 VN_KNOTE(vp, NOTE_ATTRIB); 1225 1226 KASSERT(VOP_ISLOCKED(vp)); 1227 1228 return 0; 1229 } 1230 1231 /* --------------------------------------------------------------------- */ 1232 1233 /* Sync timestamps */ 1234 void 1235 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1236 const struct timespec *mod) 1237 { 1238 struct timespec now; 1239 struct tmpfs_node *node; 1240 1241 node = VP_TO_TMPFS_NODE(vp); 1242 1243 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1244 TMPFS_NODE_CHANGED)) == 0) 1245 return; 1246 1247 getnanotime(&now); 1248 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1249 if (acc == NULL) 1250 acc = &now; 1251 node->tn_atime = *acc; 1252 } 1253 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1254 if (mod == NULL) 1255 mod = &now; 1256 node->tn_mtime = *mod; 1257 } 1258 if (node->tn_status & TMPFS_NODE_CHANGED) 1259 node->tn_ctime = now; 1260 1261 node->tn_status &= 1262 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1263 } 1264 1265 /* --------------------------------------------------------------------- */ 1266 1267 void 1268 tmpfs_update(struct vnode *vp, const struct timespec *acc, 1269 const struct timespec *mod, int flags) 1270 { 1271 1272 struct tmpfs_node *node; 1273 1274 KASSERT(VOP_ISLOCKED(vp)); 1275 1276 node = VP_TO_TMPFS_NODE(vp); 1277 1278 #if 0 1279 if (flags & UPDATE_CLOSE) 1280 ; /* XXX Need to do anything special? */ 1281 #endif 1282 1283 tmpfs_itimes(vp, acc, mod); 1284 1285 KASSERT(VOP_ISLOCKED(vp)); 1286 } 1287 1288 /* --------------------------------------------------------------------- */ 1289 1290 int 1291 tmpfs_truncate(struct vnode *vp, off_t length) 1292 { 1293 bool extended; 1294 int error; 1295 struct tmpfs_node *node; 1296 1297 node = VP_TO_TMPFS_NODE(vp); 1298 extended = length > node->tn_size; 1299 1300 if (length < 0) { 1301 error = EINVAL; 1302 goto out; 1303 } 1304 1305 if (node->tn_size == length) { 1306 error = 0; 1307 goto out; 1308 } 1309 1310 error = tmpfs_reg_resize(vp, length); 1311 if (error == 0) 1312 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1313 1314 out: 1315 tmpfs_update(vp, NULL, NULL, 0); 1316 1317 return error; 1318 } 1319