1 /* $NetBSD: tmpfs_subr.c,v 1.63 2011/04/01 17:40:54 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.63 2011/04/01 17:40:54 hannken Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/dirent.h> 42 #include <sys/event.h> 43 #include <sys/kmem.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/time.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/swap.h> 50 #include <sys/vnode.h> 51 #include <sys/kauth.h> 52 #include <sys/proc.h> 53 #include <sys/atomic.h> 54 55 #include <uvm/uvm.h> 56 57 #include <miscfs/specfs/specdev.h> 58 #include <miscfs/genfs/genfs.h> 59 #include <fs/tmpfs/tmpfs.h> 60 #include <fs/tmpfs/tmpfs_fifoops.h> 61 #include <fs/tmpfs/tmpfs_specops.h> 62 #include <fs/tmpfs/tmpfs_vnops.h> 63 64 /* --------------------------------------------------------------------- */ 65 66 /* 67 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 68 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 69 * using the credentials of the process 'p'. 70 * 71 * If the node type is set to 'VDIR', then the parent parameter must point 72 * to the parent directory of the node being created. It may only be NULL 73 * while allocating the root node. 74 * 75 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 76 * specifies the device the node represents. 77 * 78 * If the node type is set to 'VLNK', then the parameter target specifies 79 * the file name of the target file for the symbolic link that is being 80 * created. 81 * 82 * Note that new nodes are retrieved from the available list if it has 83 * items or, if it is empty, from the node pool as long as there is enough 84 * space to create them. 85 * 86 * Returns zero on success or an appropriate error code on failure. 87 */ 88 int 89 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 90 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 91 char *target, dev_t rdev, struct tmpfs_node **node) 92 { 93 struct tmpfs_node *nnode; 94 95 /* If the root directory of the 'tmp' file system is not yet 96 * allocated, this must be the request to do it. */ 97 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 98 99 KASSERT(IFF(type == VLNK, target != NULL)); 100 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 101 102 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 103 104 nnode = NULL; 105 if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) { 106 atomic_dec_uint(&tmp->tm_nodes_cnt); 107 return ENOSPC; 108 } 109 110 nnode = tmpfs_node_get(tmp); 111 if (nnode == NULL) { 112 atomic_dec_uint(&tmp->tm_nodes_cnt); 113 return ENOSPC; 114 } 115 116 /* 117 * XXX Where the pool is backed by a map larger than (4GB * 118 * sizeof(*nnode)), this may produce duplicate inode numbers 119 * for applications that do not understand 64-bit ino_t. 120 */ 121 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 122 nnode->tn_gen = arc4random(); 123 124 /* Generic initialization. */ 125 nnode->tn_type = type; 126 nnode->tn_size = 0; 127 nnode->tn_status = 0; 128 nnode->tn_flags = 0; 129 nnode->tn_links = 0; 130 131 vfs_timestamp(&nnode->tn_atime); 132 nnode->tn_birthtime = nnode->tn_atime; 133 nnode->tn_ctime = nnode->tn_atime; 134 nnode->tn_mtime = nnode->tn_atime; 135 136 nnode->tn_uid = uid; 137 nnode->tn_gid = gid; 138 nnode->tn_mode = mode; 139 nnode->tn_lockf = NULL; 140 nnode->tn_vnode = NULL; 141 142 /* Type-specific initialization. */ 143 switch (nnode->tn_type) { 144 case VBLK: 145 case VCHR: 146 nnode->tn_spec.tn_dev.tn_rdev = rdev; 147 break; 148 149 case VDIR: 150 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 151 nnode->tn_spec.tn_dir.tn_parent = 152 (parent == NULL) ? nnode : parent; 153 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 154 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 155 nnode->tn_links++; 156 break; 157 158 case VFIFO: 159 /* FALLTHROUGH */ 160 case VSOCK: 161 break; 162 163 case VLNK: 164 KASSERT(strlen(target) < MAXPATHLEN); 165 nnode->tn_size = strlen(target); 166 if (nnode->tn_size == 0) { 167 nnode->tn_spec.tn_lnk.tn_link = NULL; 168 break; 169 } 170 nnode->tn_spec.tn_lnk.tn_link = 171 tmpfs_strname_alloc(tmp, nnode->tn_size); 172 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 173 atomic_dec_uint(&tmp->tm_nodes_cnt); 174 tmpfs_node_put(tmp, nnode); 175 return ENOSPC; 176 } 177 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 178 break; 179 180 case VREG: 181 nnode->tn_spec.tn_reg.tn_aobj = 182 uao_create(INT32_MAX - PAGE_SIZE, 0); 183 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 184 break; 185 186 default: 187 KASSERT(0); 188 } 189 190 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 191 192 mutex_enter(&tmp->tm_lock); 193 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 194 mutex_exit(&tmp->tm_lock); 195 196 *node = nnode; 197 return 0; 198 } 199 200 /* --------------------------------------------------------------------- */ 201 202 /* 203 * Destroys the node pointed to by node from the file system 'tmp'. 204 * If the node does not belong to the given mount point, the results are 205 * unpredicted. 206 * 207 * If the node references a directory; no entries are allowed because 208 * their removal could need a recursive algorithm, something forbidden in 209 * kernel space. Furthermore, there is not need to provide such 210 * functionality (recursive removal) because the only primitives offered 211 * to the user are the removal of empty directories and the deletion of 212 * individual files. 213 * 214 * Note that nodes are not really deleted; in fact, when a node has been 215 * allocated, it cannot be deleted during the whole life of the file 216 * system. Instead, they are moved to the available list and remain there 217 * until reused. 218 */ 219 void 220 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 221 { 222 size_t objsz; 223 224 mutex_enter(&tmp->tm_lock); 225 LIST_REMOVE(node, tn_entries); 226 mutex_exit(&tmp->tm_lock); 227 atomic_dec_uint(&tmp->tm_nodes_cnt); 228 229 switch (node->tn_type) { 230 case VLNK: 231 if (node->tn_size > 0) 232 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 233 node->tn_size); 234 break; 235 case VREG: 236 /* 237 * Calculate the size of node data, decrease the used-memory 238 * counter, and destroy the memory object (if any). 239 */ 240 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 241 if (objsz != 0) { 242 tmpfs_mem_decr(tmp, objsz); 243 } 244 if (node->tn_spec.tn_reg.tn_aobj != NULL) { 245 uao_detach(node->tn_spec.tn_reg.tn_aobj); 246 } 247 break; 248 default: 249 break; 250 } 251 252 mutex_destroy(&node->tn_vlock); 253 tmpfs_node_put(tmp, node); 254 } 255 256 /* --------------------------------------------------------------------- */ 257 258 /* 259 * Allocates a new directory entry for the node node with a name of name. 260 * The new directory entry is returned in *de. 261 * 262 * The link count of node is increased by one to reflect the new object 263 * referencing it. This takes care of notifying kqueue listeners about 264 * this change. 265 * 266 * Returns zero on success or an appropriate error code on failure. 267 */ 268 int 269 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 270 const char *name, uint16_t len, struct tmpfs_dirent **de) 271 { 272 struct tmpfs_dirent *nde; 273 274 nde = tmpfs_dirent_get(tmp); 275 if (nde == NULL) 276 return ENOSPC; 277 278 nde->td_name = tmpfs_strname_alloc(tmp, len); 279 if (nde->td_name == NULL) { 280 tmpfs_dirent_put(tmp, nde); 281 return ENOSPC; 282 } 283 nde->td_namelen = len; 284 memcpy(nde->td_name, name, len); 285 nde->td_node = node; 286 287 if (node != TMPFS_NODE_WHITEOUT) { 288 node->tn_links++; 289 if (node->tn_links > 1 && node->tn_vnode != NULL) 290 VN_KNOTE(node->tn_vnode, NOTE_LINK); 291 } 292 *de = nde; 293 294 return 0; 295 } 296 297 /* --------------------------------------------------------------------- */ 298 299 /* 300 * Frees a directory entry. It is the caller's responsibility to destroy 301 * the node referenced by it if needed. 302 * 303 * The link count of node is decreased by one to reflect the removal of an 304 * object that referenced it. This only happens if 'node_exists' is true; 305 * otherwise the function will not access the node referred to by the 306 * directory entry, as it may already have been released from the outside. 307 * 308 * Interested parties (kqueue) are notified of the link count change; note 309 * that this can include both the node pointed to by the directory entry 310 * as well as its parent. 311 */ 312 void 313 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 314 bool node_exists) 315 { 316 if (node_exists && de->td_node != TMPFS_NODE_WHITEOUT) { 317 struct tmpfs_node *node; 318 319 node = de->td_node; 320 321 KASSERT(node->tn_links > 0); 322 node->tn_links--; 323 if (node->tn_vnode != NULL) 324 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ? 325 NOTE_DELETE : NOTE_LINK); 326 if (node->tn_type == VDIR) 327 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode, 328 NOTE_LINK); 329 } 330 331 tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 332 tmpfs_dirent_put(tmp, de); 333 } 334 335 /* --------------------------------------------------------------------- */ 336 337 /* 338 * Allocates a new vnode for the node node or returns a new reference to 339 * an existing one if the node had already a vnode referencing it. The 340 * resulting locked vnode is returned in *vpp. 341 * 342 * Returns zero on success or an appropriate error code on failure. 343 */ 344 int 345 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) 346 { 347 int error; 348 struct vnode *vp; 349 350 /* If there is already a vnode, then lock it. */ 351 for (;;) { 352 mutex_enter(&node->tn_vlock); 353 if ((vp = node->tn_vnode) != NULL) { 354 mutex_enter(&vp->v_interlock); 355 mutex_exit(&node->tn_vlock); 356 error = vget(vp, LK_EXCLUSIVE); 357 if (error == ENOENT) { 358 /* vnode was reclaimed. */ 359 continue; 360 } 361 *vpp = vp; 362 return error; 363 } 364 break; 365 } 366 367 /* Get a new vnode and associate it with our node. */ 368 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); 369 if (error != 0) { 370 mutex_exit(&node->tn_vlock); 371 return error; 372 } 373 374 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 375 if (error != 0) { 376 mutex_exit(&node->tn_vlock); 377 ungetnewvnode(vp); 378 return error; 379 } 380 381 vp->v_type = node->tn_type; 382 383 /* Type-specific initialization. */ 384 switch (node->tn_type) { 385 case VBLK: 386 /* FALLTHROUGH */ 387 case VCHR: 388 vp->v_op = tmpfs_specop_p; 389 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 390 break; 391 392 case VDIR: 393 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 394 VV_ROOT : 0; 395 break; 396 397 case VFIFO: 398 vp->v_op = tmpfs_fifoop_p; 399 break; 400 401 case VLNK: 402 /* FALLTHROUGH */ 403 case VREG: 404 /* FALLTHROUGH */ 405 case VSOCK: 406 break; 407 408 default: 409 KASSERT(0); 410 } 411 412 uvm_vnp_setsize(vp, node->tn_size); 413 vp->v_data = node; 414 node->tn_vnode = vp; 415 mutex_exit(&node->tn_vlock); 416 *vpp = vp; 417 418 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 419 KASSERT(*vpp == node->tn_vnode); 420 421 return error; 422 } 423 424 /* --------------------------------------------------------------------- */ 425 426 /* 427 * Destroys the association between the vnode vp and the node it 428 * references. 429 */ 430 void 431 tmpfs_free_vp(struct vnode *vp) 432 { 433 struct tmpfs_node *node; 434 435 node = VP_TO_TMPFS_NODE(vp); 436 437 mutex_enter(&node->tn_vlock); 438 node->tn_vnode = NULL; 439 mutex_exit(&node->tn_vlock); 440 vp->v_data = NULL; 441 } 442 443 /* --------------------------------------------------------------------- */ 444 445 /* 446 * Allocates a new file of type 'type' and adds it to the parent directory 447 * 'dvp'; this addition is done using the component name given in 'cnp'. 448 * The ownership of the new file is automatically assigned based on the 449 * credentials of the caller (through 'cnp'), the group is set based on 450 * the parent directory and the mode is determined from the 'vap' argument. 451 * If successful, *vpp holds a vnode to the newly created file and zero 452 * is returned. Otherwise *vpp is NULL and the function returns an 453 * appropriate error code. 454 */ 455 int 456 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 457 struct componentname *cnp, char *target) 458 { 459 int error; 460 struct tmpfs_dirent *de; 461 struct tmpfs_mount *tmp; 462 struct tmpfs_node *dnode; 463 struct tmpfs_node *node; 464 struct tmpfs_node *parent; 465 466 KASSERT(VOP_ISLOCKED(dvp)); 467 468 tmp = VFS_TO_TMPFS(dvp->v_mount); 469 dnode = VP_TO_TMPFS_DIR(dvp); 470 *vpp = NULL; 471 472 /* If the entry we are creating is a directory, we cannot overflow 473 * the number of links of its parent, because it will get a new 474 * link. */ 475 if (vap->va_type == VDIR) { 476 /* Ensure that we do not overflow the maximum number of links 477 * imposed by the system. */ 478 KASSERT(dnode->tn_links <= LINK_MAX); 479 if (dnode->tn_links == LINK_MAX) { 480 error = EMLINK; 481 goto out; 482 } 483 484 parent = dnode; 485 } else 486 parent = NULL; 487 488 /* Allocate a node that represents the new file. */ 489 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 490 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 491 if (error != 0) 492 goto out; 493 494 /* Allocate a directory entry that points to the new file. */ 495 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 496 &de); 497 if (error != 0) { 498 tmpfs_free_node(tmp, node); 499 goto out; 500 } 501 502 /* Allocate a vnode for the new file. */ 503 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp); 504 if (error != 0) { 505 tmpfs_free_dirent(tmp, de, true); 506 tmpfs_free_node(tmp, node); 507 goto out; 508 } 509 510 /* Now that all required items are allocated, we can proceed to 511 * insert the new node into the directory, an operation that 512 * cannot fail. */ 513 tmpfs_dir_attach(dvp, de); 514 if (vap->va_type == VDIR) { 515 VN_KNOTE(dvp, NOTE_LINK); 516 dnode->tn_links++; 517 KASSERT(dnode->tn_links <= LINK_MAX); 518 } 519 520 out: 521 vput(dvp); 522 523 KASSERT(IFF(error == 0, *vpp != NULL)); 524 525 return error; 526 } 527 528 /* --------------------------------------------------------------------- */ 529 530 /* 531 * Attaches the directory entry de to the directory represented by vp. 532 * Note that this does not change the link count of the node pointed by 533 * the directory entry, as this is done by tmpfs_alloc_dirent. 534 * 535 * As the "parent" directory changes, interested parties are notified of 536 * a write to it. 537 */ 538 void 539 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 540 { 541 struct tmpfs_node *dnode; 542 543 KASSERT(VOP_ISLOCKED(vp)); 544 dnode = VP_TO_TMPFS_DIR(vp); 545 546 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 547 dnode->tn_size += sizeof(struct tmpfs_dirent); 548 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 549 TMPFS_NODE_MODIFIED; 550 uvm_vnp_setsize(vp, dnode->tn_size); 551 552 VN_KNOTE(vp, NOTE_WRITE); 553 } 554 555 /* --------------------------------------------------------------------- */ 556 557 /* 558 * Detaches the directory entry de from the directory represented by vp. 559 * Note that this does not change the link count of the node pointed by 560 * the directory entry, as this is done by tmpfs_free_dirent. 561 * 562 * As the "parent" directory changes, interested parties are notified of 563 * a write to it. 564 */ 565 void 566 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 567 { 568 struct tmpfs_node *dnode; 569 570 KASSERT(VOP_ISLOCKED(vp)); 571 dnode = VP_TO_TMPFS_DIR(vp); 572 573 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 574 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 575 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 576 } 577 578 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 579 dnode->tn_size -= sizeof(struct tmpfs_dirent); 580 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 581 TMPFS_NODE_MODIFIED; 582 uvm_vnp_setsize(vp, dnode->tn_size); 583 584 VN_KNOTE(vp, NOTE_WRITE); 585 } 586 587 /* --------------------------------------------------------------------- */ 588 589 /* 590 * Looks for a directory entry in the directory represented by node. 591 * 'cnp' describes the name of the entry to look for. Note that the . 592 * and .. components are not allowed as they do not physically exist 593 * within directories. 594 * 595 * Returns a pointer to the entry when found, otherwise NULL. 596 */ 597 struct tmpfs_dirent * 598 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 599 { 600 struct tmpfs_dirent *de; 601 602 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 603 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 604 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 605 cnp->cn_nameptr[1] == '.'))); 606 TMPFS_VALIDATE_DIR(node); 607 608 node->tn_status |= TMPFS_NODE_ACCESSED; 609 610 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 611 KASSERT(cnp->cn_namelen < 0xffff); 612 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 613 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 614 break; 615 } 616 } 617 618 return de; 619 } 620 621 /* --------------------------------------------------------------------- */ 622 623 /* 624 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 625 * directory and returns it in the uio space. The function returns 0 626 * on success, -1 if there was not enough space in the uio structure to 627 * hold the directory entry or an appropriate error code if another 628 * error happens. 629 */ 630 int 631 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 632 { 633 int error; 634 struct dirent *dentp; 635 636 TMPFS_VALIDATE_DIR(node); 637 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 638 639 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 640 641 dentp->d_fileno = node->tn_id; 642 dentp->d_type = DT_DIR; 643 dentp->d_namlen = 1; 644 dentp->d_name[0] = '.'; 645 dentp->d_name[1] = '\0'; 646 dentp->d_reclen = _DIRENT_SIZE(dentp); 647 648 if (dentp->d_reclen > uio->uio_resid) 649 error = -1; 650 else { 651 error = uiomove(dentp, dentp->d_reclen, uio); 652 if (error == 0) 653 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 654 } 655 656 node->tn_status |= TMPFS_NODE_ACCESSED; 657 658 kmem_free(dentp, sizeof(struct dirent)); 659 return error; 660 } 661 662 /* --------------------------------------------------------------------- */ 663 664 /* 665 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 666 * directory and returns it in the uio space. The function returns 0 667 * on success, -1 if there was not enough space in the uio structure to 668 * hold the directory entry or an appropriate error code if another 669 * error happens. 670 */ 671 int 672 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 673 { 674 int error; 675 struct dirent *dentp; 676 677 TMPFS_VALIDATE_DIR(node); 678 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 679 680 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 681 682 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 683 dentp->d_type = DT_DIR; 684 dentp->d_namlen = 2; 685 dentp->d_name[0] = '.'; 686 dentp->d_name[1] = '.'; 687 dentp->d_name[2] = '\0'; 688 dentp->d_reclen = _DIRENT_SIZE(dentp); 689 690 if (dentp->d_reclen > uio->uio_resid) 691 error = -1; 692 else { 693 error = uiomove(dentp, dentp->d_reclen, uio); 694 if (error == 0) { 695 struct tmpfs_dirent *de; 696 697 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 698 if (de == NULL) 699 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 700 else 701 uio->uio_offset = tmpfs_dircookie(de); 702 } 703 } 704 705 node->tn_status |= TMPFS_NODE_ACCESSED; 706 707 kmem_free(dentp, sizeof(struct dirent)); 708 return error; 709 } 710 711 /* --------------------------------------------------------------------- */ 712 713 /* 714 * Lookup a directory entry by its associated cookie. 715 */ 716 struct tmpfs_dirent * 717 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 718 { 719 struct tmpfs_dirent *de; 720 721 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 722 723 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 724 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 725 return node->tn_spec.tn_dir.tn_readdir_lastp; 726 } 727 728 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 729 if (tmpfs_dircookie(de) == cookie) { 730 break; 731 } 732 } 733 734 return de; 735 } 736 737 /* --------------------------------------------------------------------- */ 738 739 /* 740 * Helper function for tmpfs_readdir. Returns as much directory entries 741 * as can fit in the uio space. The read starts at uio->uio_offset. 742 * The function returns 0 on success, -1 if there was not enough space 743 * in the uio structure to hold the directory entry or an appropriate 744 * error code if another error happens. 745 */ 746 int 747 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 748 { 749 int error; 750 off_t startcookie; 751 struct dirent *dentp; 752 struct tmpfs_dirent *de; 753 754 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 755 TMPFS_VALIDATE_DIR(node); 756 757 /* Locate the first directory entry we have to return. We have cached 758 * the last readdir in the node, so use those values if appropriate. 759 * Otherwise do a linear scan to find the requested entry. */ 760 startcookie = uio->uio_offset; 761 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 762 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 763 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 764 return 0; 765 } else { 766 de = tmpfs_dir_lookupbycookie(node, startcookie); 767 } 768 if (de == NULL) { 769 return EINVAL; 770 } 771 772 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 773 774 /* Read as much entries as possible; i.e., until we reach the end of 775 * the directory or we exhaust uio space. */ 776 do { 777 /* Create a dirent structure representing the current 778 * tmpfs_node and fill it. */ 779 if (de->td_node == TMPFS_NODE_WHITEOUT) { 780 dentp->d_fileno = 1; 781 dentp->d_type = DT_WHT; 782 } else { 783 dentp->d_fileno = de->td_node->tn_id; 784 switch (de->td_node->tn_type) { 785 case VBLK: 786 dentp->d_type = DT_BLK; 787 break; 788 789 case VCHR: 790 dentp->d_type = DT_CHR; 791 break; 792 793 case VDIR: 794 dentp->d_type = DT_DIR; 795 break; 796 797 case VFIFO: 798 dentp->d_type = DT_FIFO; 799 break; 800 801 case VLNK: 802 dentp->d_type = DT_LNK; 803 break; 804 805 case VREG: 806 dentp->d_type = DT_REG; 807 break; 808 809 case VSOCK: 810 dentp->d_type = DT_SOCK; 811 break; 812 813 default: 814 KASSERT(0); 815 } 816 } 817 dentp->d_namlen = de->td_namelen; 818 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 819 (void)memcpy(dentp->d_name, de->td_name, de->td_namelen); 820 dentp->d_name[de->td_namelen] = '\0'; 821 dentp->d_reclen = _DIRENT_SIZE(dentp); 822 823 /* Stop reading if the directory entry we are treating is 824 * bigger than the amount of data that can be returned. */ 825 if (dentp->d_reclen > uio->uio_resid) { 826 error = -1; 827 break; 828 } 829 830 /* Copy the new dirent structure into the output buffer and 831 * advance pointers. */ 832 error = uiomove(dentp, dentp->d_reclen, uio); 833 834 (*cntp)++; 835 de = TAILQ_NEXT(de, td_entries); 836 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 837 838 /* Update the offset and cache. */ 839 if (de == NULL) { 840 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 841 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 842 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 843 } else { 844 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 845 tmpfs_dircookie(de); 846 node->tn_spec.tn_dir.tn_readdir_lastp = de; 847 } 848 849 node->tn_status |= TMPFS_NODE_ACCESSED; 850 851 kmem_free(dentp, sizeof(struct dirent)); 852 return error; 853 } 854 855 /* --------------------------------------------------------------------- */ 856 857 /* 858 * Resizes the aobj associated to the regular file pointed to by vp to 859 * the size newsize. 'vp' must point to a vnode that represents a regular 860 * file. 'newsize' must be positive. 861 * 862 * If the file is extended, the appropriate kevent is raised. This does 863 * not rise a write event though because resizing is not the same as 864 * writing. 865 * 866 * Returns zero on success or an appropriate error code on failure. 867 */ 868 int 869 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 870 { 871 size_t newpages, oldpages; 872 struct tmpfs_mount *tmp; 873 struct tmpfs_node *node; 874 off_t oldsize; 875 876 KASSERT(vp->v_type == VREG); 877 KASSERT(newsize >= 0); 878 879 node = VP_TO_TMPFS_NODE(vp); 880 tmp = VFS_TO_TMPFS(vp->v_mount); 881 882 oldsize = node->tn_size; 883 oldpages = round_page(oldsize) >> PAGE_SHIFT; 884 newpages = round_page(newsize) >> PAGE_SHIFT; 885 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 886 887 if (newpages > oldpages) { 888 /* Increase the used-memory counter if getting extra pages. */ 889 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) { 890 return ENOSPC; 891 } 892 } else if (newsize < oldsize) { 893 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 894 895 /* Zero out the truncated part of the last page. */ 896 uvm_vnp_zerorange(vp, newsize, zerolen); 897 } 898 899 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 900 node->tn_size = newsize; 901 uvm_vnp_setsize(vp, newsize); 902 903 /* 904 * Free "backing store". 905 */ 906 if (newpages < oldpages) { 907 struct uvm_object *uobj; 908 909 uobj = node->tn_spec.tn_reg.tn_aobj; 910 911 mutex_enter(&uobj->vmobjlock); 912 uao_dropswap_range(uobj, newpages, oldpages); 913 mutex_exit(&uobj->vmobjlock); 914 915 /* Decrease the used-memory counter. */ 916 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 917 } 918 919 if (newsize > oldsize) 920 VN_KNOTE(vp, NOTE_EXTEND); 921 922 return 0; 923 } 924 925 /* 926 * Change flags of the given vnode. 927 * Caller should execute tmpfs_update on vp after a successful execution. 928 * The vnode must be locked on entry and remain locked on exit. 929 */ 930 int 931 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 932 { 933 int error; 934 struct tmpfs_node *node; 935 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 936 int fs_decision = 0; 937 938 KASSERT(VOP_ISLOCKED(vp)); 939 940 node = VP_TO_TMPFS_NODE(vp); 941 942 /* Disallow this operation if the file system is mounted read-only. */ 943 if (vp->v_mount->mnt_flag & MNT_RDONLY) 944 return EROFS; 945 946 if (kauth_cred_geteuid(cred) != node->tn_uid) 947 fs_decision = EACCES; 948 949 /* 950 * If the new flags have non-user flags that are different than 951 * those on the node, we need special permission to change them. 952 */ 953 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 954 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 955 if (!fs_decision) 956 fs_decision = EPERM; 957 } 958 959 /* 960 * Indicate that this node's flags have system attributes in them if 961 * that's the case. 962 */ 963 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 964 action |= KAUTH_VNODE_HAS_SYSFLAGS; 965 } 966 967 error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision); 968 if (error) 969 return error; 970 971 /* 972 * Set the flags. If we're not setting non-user flags, be careful not 973 * to overwrite them. 974 * 975 * XXX: Can't we always assign here? if the system flags are different, 976 * the code above should catch attempts to change them without 977 * proper permissions, and if we're here it means it's okay to 978 * change them... 979 */ 980 if (action & KAUTH_VNODE_WRITE_SYSFLAGS) { 981 node->tn_flags = flags; 982 } else { 983 /* Clear all user-settable flags and re-set them. */ 984 node->tn_flags &= SF_SETTABLE; 985 node->tn_flags |= (flags & UF_SETTABLE); 986 } 987 988 node->tn_status |= TMPFS_NODE_CHANGED; 989 VN_KNOTE(vp, NOTE_ATTRIB); 990 991 KASSERT(VOP_ISLOCKED(vp)); 992 993 return 0; 994 } 995 996 /* --------------------------------------------------------------------- */ 997 998 /* 999 * Change access mode on the given vnode. 1000 * Caller should execute tmpfs_update on vp after a successful execution. 1001 * The vnode must be locked on entry and remain locked on exit. 1002 */ 1003 int 1004 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l) 1005 { 1006 int error; 1007 struct tmpfs_node *node; 1008 1009 KASSERT(VOP_ISLOCKED(vp)); 1010 1011 node = VP_TO_TMPFS_NODE(vp); 1012 1013 /* Disallow this operation if the file system is mounted read-only. */ 1014 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1015 return EROFS; 1016 1017 /* Immutable or append-only files cannot be modified, either. */ 1018 if (node->tn_flags & (IMMUTABLE | APPEND)) 1019 return EPERM; 1020 1021 error = genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, 1022 mode); 1023 1024 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 1025 NULL, error); 1026 if (error) 1027 return (error); 1028 1029 node->tn_mode = (mode & ALLPERMS); 1030 1031 node->tn_status |= TMPFS_NODE_CHANGED; 1032 VN_KNOTE(vp, NOTE_ATTRIB); 1033 1034 KASSERT(VOP_ISLOCKED(vp)); 1035 1036 return 0; 1037 } 1038 1039 /* --------------------------------------------------------------------- */ 1040 1041 /* 1042 * Change ownership of the given vnode. At least one of uid or gid must 1043 * be different than VNOVAL. If one is set to that value, the attribute 1044 * is unchanged. 1045 * Caller should execute tmpfs_update on vp after a successful execution. 1046 * The vnode must be locked on entry and remain locked on exit. 1047 */ 1048 int 1049 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 1050 struct lwp *l) 1051 { 1052 int error; 1053 struct tmpfs_node *node; 1054 1055 KASSERT(VOP_ISLOCKED(vp)); 1056 1057 node = VP_TO_TMPFS_NODE(vp); 1058 1059 /* Assign default values if they are unknown. */ 1060 KASSERT(uid != VNOVAL || gid != VNOVAL); 1061 if (uid == VNOVAL) 1062 uid = node->tn_uid; 1063 if (gid == VNOVAL) 1064 gid = node->tn_gid; 1065 KASSERT(uid != VNOVAL && gid != VNOVAL); 1066 1067 /* Disallow this operation if the file system is mounted read-only. */ 1068 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1069 return EROFS; 1070 1071 /* Immutable or append-only files cannot be modified, either. */ 1072 if (node->tn_flags & (IMMUTABLE | APPEND)) 1073 return EPERM; 1074 1075 error = genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid, 1076 gid); 1077 1078 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 1079 NULL, error); 1080 if (error) 1081 return (error); 1082 1083 node->tn_uid = uid; 1084 node->tn_gid = gid; 1085 1086 node->tn_status |= TMPFS_NODE_CHANGED; 1087 VN_KNOTE(vp, NOTE_ATTRIB); 1088 1089 KASSERT(VOP_ISLOCKED(vp)); 1090 1091 return 0; 1092 } 1093 1094 /* --------------------------------------------------------------------- */ 1095 1096 /* 1097 * Change size of the given vnode. 1098 * Caller should execute tmpfs_update on vp after a successful execution. 1099 * The vnode must be locked on entry and remain locked on exit. 1100 */ 1101 int 1102 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred, 1103 struct lwp *l) 1104 { 1105 int error; 1106 struct tmpfs_node *node; 1107 1108 KASSERT(VOP_ISLOCKED(vp)); 1109 1110 node = VP_TO_TMPFS_NODE(vp); 1111 1112 /* Decide whether this is a valid operation based on the file type. */ 1113 error = 0; 1114 switch (vp->v_type) { 1115 case VDIR: 1116 return EISDIR; 1117 1118 case VREG: 1119 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1120 return EROFS; 1121 break; 1122 1123 case VBLK: 1124 /* FALLTHROUGH */ 1125 case VCHR: 1126 /* FALLTHROUGH */ 1127 case VFIFO: 1128 /* Allow modifications of special files even if in the file 1129 * system is mounted read-only (we are not modifying the 1130 * files themselves, but the objects they represent). */ 1131 return 0; 1132 1133 default: 1134 /* Anything else is unsupported. */ 1135 return EOPNOTSUPP; 1136 } 1137 1138 /* Immutable or append-only files cannot be modified, either. */ 1139 if (node->tn_flags & (IMMUTABLE | APPEND)) 1140 return EPERM; 1141 1142 error = tmpfs_truncate(vp, size); 1143 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1144 * for us, as will update tn_status; no need to do that here. */ 1145 1146 KASSERT(VOP_ISLOCKED(vp)); 1147 1148 return error; 1149 } 1150 1151 /* --------------------------------------------------------------------- */ 1152 1153 /* 1154 * Change access and modification times of the given vnode. 1155 * Caller should execute tmpfs_update on vp after a successful execution. 1156 * The vnode must be locked on entry and remain locked on exit. 1157 */ 1158 int 1159 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime, 1160 const struct timespec *mtime, const struct timespec *btime, 1161 int vaflags, kauth_cred_t cred, struct lwp *l) 1162 { 1163 int error; 1164 struct tmpfs_node *node; 1165 1166 KASSERT(VOP_ISLOCKED(vp)); 1167 1168 node = VP_TO_TMPFS_NODE(vp); 1169 1170 /* Disallow this operation if the file system is mounted read-only. */ 1171 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1172 return EROFS; 1173 1174 /* Immutable or append-only files cannot be modified, either. */ 1175 if (node->tn_flags & (IMMUTABLE | APPEND)) 1176 return EPERM; 1177 1178 error = genfs_can_chtimes(vp, vaflags, node->tn_uid, cred); 1179 1180 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1181 error); 1182 if (error) 1183 return (error); 1184 1185 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1186 node->tn_status |= TMPFS_NODE_ACCESSED; 1187 1188 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1189 node->tn_status |= TMPFS_NODE_MODIFIED; 1190 1191 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL) 1192 btime = NULL; 1193 1194 tmpfs_update(vp, atime, mtime, btime, 0); 1195 VN_KNOTE(vp, NOTE_ATTRIB); 1196 1197 KASSERT(VOP_ISLOCKED(vp)); 1198 1199 return 0; 1200 } 1201 1202 /* --------------------------------------------------------------------- */ 1203 1204 /* Sync timestamps */ 1205 void 1206 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1207 const struct timespec *mod, const struct timespec *birth) 1208 { 1209 struct tmpfs_node *node; 1210 struct timespec nowtm; 1211 1212 node = VP_TO_TMPFS_NODE(vp); 1213 1214 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1215 TMPFS_NODE_CHANGED)) == 0) 1216 return; 1217 1218 if (birth != NULL) { 1219 node->tn_birthtime = *birth; 1220 } 1221 vfs_timestamp(&nowtm); 1222 1223 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1224 node->tn_atime = acc ? *acc : nowtm; 1225 } 1226 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1227 node->tn_mtime = mod ? *mod : nowtm; 1228 } 1229 if (node->tn_status & TMPFS_NODE_CHANGED) { 1230 node->tn_ctime = nowtm; 1231 } 1232 1233 node->tn_status &= 1234 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1235 } 1236 1237 /* --------------------------------------------------------------------- */ 1238 1239 void 1240 tmpfs_update(struct vnode *vp, const struct timespec *acc, 1241 const struct timespec *mod, const struct timespec *birth, int flags) 1242 { 1243 1244 struct tmpfs_node *node; 1245 1246 KASSERT(VOP_ISLOCKED(vp)); 1247 1248 node = VP_TO_TMPFS_NODE(vp); 1249 1250 #if 0 1251 if (flags & UPDATE_CLOSE) 1252 ; /* XXX Need to do anything special? */ 1253 #endif 1254 1255 tmpfs_itimes(vp, acc, mod, birth); 1256 1257 KASSERT(VOP_ISLOCKED(vp)); 1258 } 1259 1260 /* --------------------------------------------------------------------- */ 1261 1262 int 1263 tmpfs_truncate(struct vnode *vp, off_t length) 1264 { 1265 bool extended; 1266 int error; 1267 struct tmpfs_node *node; 1268 1269 node = VP_TO_TMPFS_NODE(vp); 1270 extended = length > node->tn_size; 1271 1272 if (length < 0) { 1273 error = EINVAL; 1274 goto out; 1275 } 1276 1277 if (node->tn_size == length) { 1278 error = 0; 1279 goto out; 1280 } 1281 1282 error = tmpfs_reg_resize(vp, length); 1283 if (error == 0) 1284 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1285 1286 out: 1287 tmpfs_update(vp, NULL, NULL, NULL, 0); 1288 1289 return error; 1290 } 1291