1 /* $NetBSD: tmpfs_subr.c,v 1.34 2007/02/22 06:37:00 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Efficient memory file system supporting functions. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.34 2007/02/22 06:37:00 thorpej Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/dirent.h> 49 #include <sys/event.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/namei.h> 53 #include <sys/time.h> 54 #include <sys/stat.h> 55 #include <sys/systm.h> 56 #include <sys/swap.h> 57 #include <sys/vnode.h> 58 #include <sys/kauth.h> 59 60 #include <uvm/uvm.h> 61 62 #include <miscfs/specfs/specdev.h> 63 #include <fs/tmpfs/tmpfs.h> 64 #include <fs/tmpfs/tmpfs_fifoops.h> 65 #include <fs/tmpfs/tmpfs_specops.h> 66 #include <fs/tmpfs/tmpfs_vnops.h> 67 68 /* --------------------------------------------------------------------- */ 69 70 /* 71 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 72 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 73 * using the credentials of the process 'p'. 74 * 75 * If the node type is set to 'VDIR', then the parent parameter must point 76 * to the parent directory of the node being created. It may only be NULL 77 * while allocating the root node. 78 * 79 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 80 * specifies the device the node represents. 81 * 82 * If the node type is set to 'VLNK', then the parameter target specifies 83 * the file name of the target file for the symbolic link that is being 84 * created. 85 * 86 * Note that new nodes are retrieved from the available list if it has 87 * items or, if it is empty, from the node pool as long as there is enough 88 * space to create them. 89 * 90 * Returns zero on success or an appropriate error code on failure. 91 */ 92 int 93 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 94 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 95 char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node) 96 { 97 struct tmpfs_node *nnode; 98 99 /* If the root directory of the 'tmp' file system is not yet 100 * allocated, this must be the request to do it. */ 101 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 102 103 KASSERT(IFF(type == VLNK, target != NULL)); 104 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 105 106 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 107 108 nnode = NULL; 109 if (LIST_EMPTY(&tmp->tm_nodes_avail)) { 110 KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max); 111 if (tmp->tm_nodes_last == tmp->tm_nodes_max) 112 return ENOSPC; 113 114 nnode = 115 (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); 116 if (nnode == NULL) 117 return ENOSPC; 118 nnode->tn_id = tmp->tm_nodes_last++; 119 nnode->tn_gen = arc4random(); 120 } else { 121 nnode = LIST_FIRST(&tmp->tm_nodes_avail); 122 LIST_REMOVE(nnode, tn_entries); 123 nnode->tn_gen++; 124 } 125 KASSERT(nnode != NULL); 126 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 127 128 /* Generic initialization. */ 129 nnode->tn_type = type; 130 nnode->tn_size = 0; 131 nnode->tn_status = 0; 132 nnode->tn_flags = 0; 133 nnode->tn_links = 0; 134 getnanotime(&nnode->tn_atime); 135 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 136 nnode->tn_atime; 137 nnode->tn_uid = uid; 138 nnode->tn_gid = gid; 139 nnode->tn_mode = mode; 140 nnode->tn_lockf = NULL; 141 nnode->tn_vnode = NULL; 142 143 /* Type-specific initialization. */ 144 switch (nnode->tn_type) { 145 case VBLK: 146 case VCHR: 147 nnode->tn_spec.tn_dev.tn_rdev = rdev; 148 break; 149 150 case VDIR: 151 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 152 nnode->tn_spec.tn_dir.tn_parent = 153 (parent == NULL) ? nnode : parent; 154 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 155 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 156 nnode->tn_links++; 157 nnode->tn_spec.tn_dir.tn_parent->tn_links++; 158 if (parent != NULL) { 159 KASSERT(parent->tn_vnode != NULL); 160 VN_KNOTE(parent->tn_vnode, NOTE_LINK); 161 } 162 break; 163 164 case VFIFO: 165 /* FALLTHROUGH */ 166 case VSOCK: 167 break; 168 169 case VLNK: 170 KASSERT(strlen(target) < MAXPATHLEN); 171 nnode->tn_size = strlen(target); 172 nnode->tn_spec.tn_lnk.tn_link = 173 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0); 174 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 175 nnode->tn_type = VNON; 176 tmpfs_free_node(tmp, nnode); 177 return ENOSPC; 178 } 179 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 180 break; 181 182 case VREG: 183 nnode->tn_spec.tn_reg.tn_aobj = 184 uao_create(INT32_MAX - PAGE_SIZE, 0); 185 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 186 break; 187 188 default: 189 KASSERT(0); 190 } 191 192 *node = nnode; 193 return 0; 194 } 195 196 /* --------------------------------------------------------------------- */ 197 198 /* 199 * Destroys the node pointed to by node from the file system 'tmp'. 200 * If the node does not belong to the given mount point, the results are 201 * unpredicted. 202 * 203 * If the node references a directory; no entries are allowed because 204 * their removal could need a recursive algorithm, something forbidden in 205 * kernel space. Furthermore, there is not need to provide such 206 * functionality (recursive removal) because the only primitives offered 207 * to the user are the removal of empty directories and the deletion of 208 * individual files. 209 * 210 * Note that nodes are not really deleted; in fact, when a node has been 211 * allocated, it cannot be deleted during the whole life of the file 212 * system. Instead, they are moved to the available list and remain there 213 * until reused. 214 */ 215 void 216 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 217 { 218 ino_t id; 219 unsigned long gen; 220 size_t pages; 221 222 switch (node->tn_type) { 223 case VNON: 224 /* Do not do anything. VNON is provided to let the 225 * allocation routine clean itself easily by avoiding 226 * duplicating code in it. */ 227 /* FALLTHROUGH */ 228 case VBLK: 229 /* FALLTHROUGH */ 230 case VCHR: 231 /* FALLTHROUGH */ 232 case VDIR: 233 /* FALLTHROUGH */ 234 case VFIFO: 235 /* FALLTHROUGH */ 236 case VSOCK: 237 pages = 0; 238 break; 239 240 case VLNK: 241 tmpfs_str_pool_put(&tmp->tm_str_pool, 242 node->tn_spec.tn_lnk.tn_link, node->tn_size); 243 pages = 0; 244 break; 245 246 case VREG: 247 if (node->tn_spec.tn_reg.tn_aobj != NULL) 248 uao_detach(node->tn_spec.tn_reg.tn_aobj); 249 pages = node->tn_spec.tn_reg.tn_aobj_pages; 250 break; 251 252 default: 253 KASSERT(0); 254 pages = 0; /* Shut up gcc when !DIAGNOSTIC. */ 255 break; 256 } 257 258 tmp->tm_pages_used -= pages; 259 260 LIST_REMOVE(node, tn_entries); 261 id = node->tn_id; 262 gen = node->tn_gen; 263 memset(node, 0, sizeof(struct tmpfs_node)); 264 node->tn_id = id; 265 node->tn_type = VNON; 266 node->tn_gen = gen; 267 LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries); 268 } 269 270 /* --------------------------------------------------------------------- */ 271 272 /* 273 * Allocates a new directory entry for the node node with a name of name. 274 * The new directory entry is returned in *de. 275 * 276 * The link count of node is increased by one to reflect the new object 277 * referencing it. This takes care of notifying kqueue listeners about 278 * this change. 279 * 280 * Returns zero on success or an appropriate error code on failure. 281 */ 282 int 283 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 284 const char *name, uint16_t len, struct tmpfs_dirent **de) 285 { 286 struct tmpfs_dirent *nde; 287 288 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0); 289 if (nde == NULL) 290 return ENOSPC; 291 292 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0); 293 if (nde->td_name == NULL) { 294 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde); 295 return ENOSPC; 296 } 297 nde->td_namelen = len; 298 memcpy(nde->td_name, name, len); 299 nde->td_node = node; 300 301 node->tn_links++; 302 if (node->tn_links > 1 && node->tn_vnode != NULL) 303 VN_KNOTE(node->tn_vnode, NOTE_LINK); 304 *de = nde; 305 306 return 0; 307 } 308 309 /* --------------------------------------------------------------------- */ 310 311 /* 312 * Frees a directory entry. It is the caller's responsibility to destroy 313 * the node referenced by it if needed. 314 * 315 * The link count of node is decreased by one to reflect the removal of an 316 * object that referenced it. This only happens if 'node_exists' is true; 317 * otherwise the function will not access the node referred to by the 318 * directory entry, as it may already have been released from the outside. 319 * 320 * Interested parties (kqueue) are notified of the link count change; note 321 * that this can include both the node pointed to by the directory entry 322 * as well as its parent. 323 */ 324 void 325 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 326 bool node_exists) 327 { 328 if (node_exists) { 329 struct tmpfs_node *node; 330 331 node = de->td_node; 332 333 KASSERT(node->tn_links > 0); 334 node->tn_links--; 335 if (node->tn_vnode != NULL) 336 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ? 337 NOTE_DELETE : NOTE_LINK); 338 if (node->tn_type == VDIR) 339 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode, 340 NOTE_LINK); 341 } 342 343 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen); 344 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de); 345 } 346 347 /* --------------------------------------------------------------------- */ 348 349 /* 350 * Allocates a new vnode for the node node or returns a new reference to 351 * an existing one if the node had already a vnode referencing it. The 352 * resulting locked vnode is returned in *vpp. 353 * 354 * Returns zero on success or an appropriate error code on failure. 355 */ 356 int 357 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) 358 { 359 int error; 360 struct vnode *nvp; 361 struct vnode *vp; 362 363 vp = NULL; 364 365 if (node->tn_vnode != NULL) { 366 vp = node->tn_vnode; 367 vget(vp, LK_EXCLUSIVE | LK_RETRY); 368 error = 0; 369 goto out; 370 } 371 372 /* Get a new vnode and associate it with our node. */ 373 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); 374 if (error != 0) 375 goto out; 376 KASSERT(vp != NULL); 377 378 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 379 if (error != 0) { 380 vp->v_data = NULL; 381 ungetnewvnode(vp); 382 vp = NULL; 383 goto out; 384 } 385 386 vp->v_data = node; 387 vp->v_type = node->tn_type; 388 389 /* Type-specific initialization. */ 390 switch (node->tn_type) { 391 case VBLK: 392 /* FALLTHROUGH */ 393 case VCHR: 394 vp->v_op = tmpfs_specop_p; 395 nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp); 396 if (nvp != NULL) { 397 /* Discard unneeded vnode, but save its inode. */ 398 nvp->v_data = vp->v_data; 399 vp->v_data = NULL; 400 401 /* XXX spec_vnodeops has no locking, so we have to 402 * do it explicitly. */ 403 VOP_UNLOCK(vp, 0); 404 vp->v_op = spec_vnodeop_p; 405 vp->v_flag &= ~VLOCKSWORK; 406 vrele(vp); 407 vgone(vp); 408 409 /* Reinitialize aliased node. */ 410 vp = nvp; 411 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 412 if (error != 0) { 413 vp->v_data = NULL; 414 vp = NULL; 415 goto out; 416 } 417 } 418 break; 419 420 case VDIR: 421 vp->v_flag = node->tn_spec.tn_dir.tn_parent == node ? VROOT : 0; 422 break; 423 424 case VFIFO: 425 vp->v_op = tmpfs_fifoop_p; 426 break; 427 428 case VLNK: 429 /* FALLTHROUGH */ 430 case VREG: 431 /* FALLTHROUGH */ 432 case VSOCK: 433 break; 434 435 default: 436 KASSERT(0); 437 } 438 439 uvm_vnp_setsize(vp, node->tn_size); 440 441 error = 0; 442 443 out: 444 *vpp = node->tn_vnode = vp; 445 446 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 447 KASSERT(*vpp == node->tn_vnode); 448 449 return error; 450 } 451 452 /* --------------------------------------------------------------------- */ 453 454 /* 455 * Destroys the association between the vnode vp and the node it 456 * references. 457 */ 458 void 459 tmpfs_free_vp(struct vnode *vp) 460 { 461 struct tmpfs_node *node; 462 463 node = VP_TO_TMPFS_NODE(vp); 464 465 node->tn_vnode = NULL; 466 vp->v_data = NULL; 467 } 468 469 /* --------------------------------------------------------------------- */ 470 471 /* 472 * Allocates a new file of type 'type' and adds it to the parent directory 473 * 'dvp'; this addition is done using the component name given in 'cnp'. 474 * The ownership of the new file is automatically assigned based on the 475 * credentials of the caller (through 'cnp'), the group is set based on 476 * the parent directory and the mode is determined from the 'vap' argument. 477 * If successful, *vpp holds a vnode to the newly created file and zero 478 * is returned. Otherwise *vpp is NULL and the function returns an 479 * appropriate error code. 480 */ 481 int 482 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 483 struct componentname *cnp, char *target) 484 { 485 int error; 486 struct tmpfs_dirent *de; 487 struct tmpfs_mount *tmp; 488 struct tmpfs_node *dnode; 489 struct tmpfs_node *node; 490 struct tmpfs_node *parent; 491 492 KASSERT(VOP_ISLOCKED(dvp)); 493 KASSERT(cnp->cn_flags & HASBUF); 494 495 tmp = VFS_TO_TMPFS(dvp->v_mount); 496 dnode = VP_TO_TMPFS_DIR(dvp); 497 *vpp = NULL; 498 499 /* If the entry we are creating is a directory, we cannot overflow 500 * the number of links of its parent, because it will get a new 501 * link. */ 502 if (vap->va_type == VDIR) { 503 /* Ensure that we do not overflow the maximum number of links 504 * imposed by the system. */ 505 KASSERT(dnode->tn_links <= LINK_MAX); 506 if (dnode->tn_links == LINK_MAX) { 507 error = EMLINK; 508 goto out; 509 } 510 511 parent = dnode; 512 } else 513 parent = NULL; 514 515 /* Allocate a node that represents the new file. */ 516 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 517 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, 518 cnp->cn_lwp->l_proc, &node); 519 if (error != 0) 520 goto out; 521 522 /* Allocate a directory entry that points to the new file. */ 523 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 524 &de); 525 if (error != 0) { 526 tmpfs_free_node(tmp, node); 527 goto out; 528 } 529 530 /* Allocate a vnode for the new file. */ 531 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp); 532 if (error != 0) { 533 tmpfs_free_dirent(tmp, de, true); 534 tmpfs_free_node(tmp, node); 535 goto out; 536 } 537 538 /* Now that all required items are allocated, we can proceed to 539 * insert the new node into the directory, an operation that 540 * cannot fail. */ 541 tmpfs_dir_attach(dvp, de); 542 543 out: 544 if (error != 0 || !(cnp->cn_flags & SAVESTART)) 545 PNBUF_PUT(cnp->cn_pnbuf); 546 vput(dvp); 547 548 KASSERT(!VOP_ISLOCKED(dvp)); 549 KASSERT(IFF(error == 0, *vpp != NULL)); 550 551 return error; 552 } 553 554 /* --------------------------------------------------------------------- */ 555 556 /* 557 * Attaches the directory entry de to the directory represented by vp. 558 * Note that this does not change the link count of the node pointed by 559 * the directory entry, as this is done by tmpfs_alloc_dirent. 560 * 561 * As the "parent" directory changes, interested parties are notified of 562 * a write to it. 563 */ 564 void 565 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 566 { 567 struct tmpfs_node *dnode; 568 569 dnode = VP_TO_TMPFS_DIR(vp); 570 571 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 572 dnode->tn_size += sizeof(struct tmpfs_dirent); 573 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 574 TMPFS_NODE_MODIFIED; 575 uvm_vnp_setsize(vp, dnode->tn_size); 576 577 VN_KNOTE(vp, NOTE_WRITE); 578 } 579 580 /* --------------------------------------------------------------------- */ 581 582 /* 583 * Detaches the directory entry de from the directory represented by vp. 584 * Note that this does not change the link count of the node pointed by 585 * the directory entry, as this is done by tmpfs_free_dirent. 586 * 587 * As the "parent" directory changes, interested parties are notified of 588 * a write to it. 589 */ 590 void 591 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 592 { 593 struct tmpfs_node *dnode; 594 595 KASSERT(VOP_ISLOCKED(vp)); 596 597 dnode = VP_TO_TMPFS_DIR(vp); 598 599 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 600 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 601 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 602 } 603 604 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 605 dnode->tn_size -= sizeof(struct tmpfs_dirent); 606 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 607 TMPFS_NODE_MODIFIED; 608 uvm_vnp_setsize(vp, dnode->tn_size); 609 610 VN_KNOTE(vp, NOTE_WRITE); 611 } 612 613 /* --------------------------------------------------------------------- */ 614 615 /* 616 * Looks for a directory entry in the directory represented by node. 617 * 'cnp' describes the name of the entry to look for. Note that the . 618 * and .. components are not allowed as they do not physically exist 619 * within directories. 620 * 621 * Returns a pointer to the entry when found, otherwise NULL. 622 */ 623 struct tmpfs_dirent * 624 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 625 { 626 bool found; 627 struct tmpfs_dirent *de; 628 629 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 630 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 631 cnp->cn_nameptr[1] == '.'))); 632 TMPFS_VALIDATE_DIR(node); 633 634 node->tn_status |= TMPFS_NODE_ACCESSED; 635 636 found = 0; 637 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 638 KASSERT(cnp->cn_namelen < 0xffff); 639 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 640 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 641 found = 1; 642 break; 643 } 644 } 645 646 return found ? de : NULL; 647 } 648 649 /* --------------------------------------------------------------------- */ 650 651 /* 652 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 653 * directory and returns it in the uio space. The function returns 0 654 * on success, -1 if there was not enough space in the uio structure to 655 * hold the directory entry or an appropriate error code if another 656 * error happens. 657 */ 658 int 659 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 660 { 661 int error; 662 struct dirent dent; 663 664 TMPFS_VALIDATE_DIR(node); 665 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 666 667 dent.d_fileno = node->tn_id; 668 dent.d_type = DT_DIR; 669 dent.d_namlen = 1; 670 dent.d_name[0] = '.'; 671 dent.d_name[1] = '\0'; 672 dent.d_reclen = _DIRENT_SIZE(&dent); 673 674 if (dent.d_reclen > uio->uio_resid) 675 error = -1; 676 else { 677 error = uiomove(&dent, dent.d_reclen, uio); 678 if (error == 0) 679 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 680 } 681 682 node->tn_status |= TMPFS_NODE_ACCESSED; 683 684 return error; 685 } 686 687 /* --------------------------------------------------------------------- */ 688 689 /* 690 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 691 * directory and returns it in the uio space. The function returns 0 692 * on success, -1 if there was not enough space in the uio structure to 693 * hold the directory entry or an appropriate error code if another 694 * error happens. 695 */ 696 int 697 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 698 { 699 int error; 700 struct dirent dent; 701 702 TMPFS_VALIDATE_DIR(node); 703 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 704 705 dent.d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 706 dent.d_type = DT_DIR; 707 dent.d_namlen = 2; 708 dent.d_name[0] = '.'; 709 dent.d_name[1] = '.'; 710 dent.d_name[2] = '\0'; 711 dent.d_reclen = _DIRENT_SIZE(&dent); 712 713 if (dent.d_reclen > uio->uio_resid) 714 error = -1; 715 else { 716 error = uiomove(&dent, dent.d_reclen, uio); 717 if (error == 0) { 718 struct tmpfs_dirent *de; 719 720 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 721 if (de == NULL) 722 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 723 else 724 uio->uio_offset = tmpfs_dircookie(de); 725 } 726 } 727 728 node->tn_status |= TMPFS_NODE_ACCESSED; 729 730 return error; 731 } 732 733 /* --------------------------------------------------------------------- */ 734 735 /* 736 * Lookup a directory entry by its associated cookie. 737 */ 738 struct tmpfs_dirent * 739 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 740 { 741 struct tmpfs_dirent *de; 742 743 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 744 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 745 return node->tn_spec.tn_dir.tn_readdir_lastp; 746 } 747 748 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 749 if (tmpfs_dircookie(de) == cookie) { 750 break; 751 } 752 } 753 754 return de; 755 } 756 757 /* --------------------------------------------------------------------- */ 758 759 /* 760 * Helper function for tmpfs_readdir. Returns as much directory entries 761 * as can fit in the uio space. The read starts at uio->uio_offset. 762 * The function returns 0 on success, -1 if there was not enough space 763 * in the uio structure to hold the directory entry or an appropriate 764 * error code if another error happens. 765 */ 766 int 767 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 768 { 769 int error; 770 off_t startcookie; 771 struct tmpfs_dirent *de; 772 773 TMPFS_VALIDATE_DIR(node); 774 775 /* Locate the first directory entry we have to return. We have cached 776 * the last readdir in the node, so use those values if appropriate. 777 * Otherwise do a linear scan to find the requested entry. */ 778 startcookie = uio->uio_offset; 779 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 780 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 781 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 782 return 0; 783 } else { 784 de = tmpfs_dir_lookupbycookie(node, startcookie); 785 } 786 if (de == NULL) { 787 return EINVAL; 788 } 789 790 /* Read as much entries as possible; i.e., until we reach the end of 791 * the directory or we exhaust uio space. */ 792 do { 793 struct dirent d; 794 795 /* Create a dirent structure representing the current 796 * tmpfs_node and fill it. */ 797 d.d_fileno = de->td_node->tn_id; 798 switch (de->td_node->tn_type) { 799 case VBLK: 800 d.d_type = DT_BLK; 801 break; 802 803 case VCHR: 804 d.d_type = DT_CHR; 805 break; 806 807 case VDIR: 808 d.d_type = DT_DIR; 809 break; 810 811 case VFIFO: 812 d.d_type = DT_FIFO; 813 break; 814 815 case VLNK: 816 d.d_type = DT_LNK; 817 break; 818 819 case VREG: 820 d.d_type = DT_REG; 821 break; 822 823 case VSOCK: 824 d.d_type = DT_SOCK; 825 break; 826 827 default: 828 KASSERT(0); 829 } 830 d.d_namlen = de->td_namelen; 831 KASSERT(de->td_namelen < sizeof(d.d_name)); 832 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 833 d.d_name[de->td_namelen] = '\0'; 834 d.d_reclen = _DIRENT_SIZE(&d); 835 836 /* Stop reading if the directory entry we are treating is 837 * bigger than the amount of data that can be returned. */ 838 if (d.d_reclen > uio->uio_resid) { 839 error = -1; 840 break; 841 } 842 843 /* Copy the new dirent structure into the output buffer and 844 * advance pointers. */ 845 error = uiomove(&d, d.d_reclen, uio); 846 847 (*cntp)++; 848 de = TAILQ_NEXT(de, td_entries); 849 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 850 851 /* Update the offset and cache. */ 852 if (de == NULL) { 853 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 854 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 855 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 856 } else { 857 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 858 tmpfs_dircookie(de); 859 node->tn_spec.tn_dir.tn_readdir_lastp = de; 860 } 861 862 node->tn_status |= TMPFS_NODE_ACCESSED; 863 864 return error; 865 } 866 867 /* --------------------------------------------------------------------- */ 868 869 /* 870 * Resizes the aobj associated to the regular file pointed to by vp to 871 * the size newsize. 'vp' must point to a vnode that represents a regular 872 * file. 'newsize' must be positive. 873 * 874 * If the file is extended, the appropriate kevent is raised. This does 875 * not rise a write event though because resizing is not the same as 876 * writing. 877 * 878 * Returns zero on success or an appropriate error code on failure. 879 */ 880 int 881 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 882 { 883 int error; 884 size_t newpages, oldpages; 885 struct tmpfs_mount *tmp; 886 struct tmpfs_node *node; 887 off_t oldsize; 888 889 KASSERT(vp->v_type == VREG); 890 KASSERT(newsize >= 0); 891 892 node = VP_TO_TMPFS_NODE(vp); 893 tmp = VFS_TO_TMPFS(vp->v_mount); 894 895 /* Convert the old and new sizes to the number of pages needed to 896 * store them. It may happen that we do not need to do anything 897 * because the last allocated page can accommodate the change on 898 * its own. */ 899 oldsize = node->tn_size; 900 oldpages = round_page(oldsize) / PAGE_SIZE; 901 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 902 newpages = round_page(newsize) / PAGE_SIZE; 903 904 if (newpages > oldpages && 905 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { 906 error = ENOSPC; 907 goto out; 908 } 909 910 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 911 912 tmp->tm_pages_used += (newpages - oldpages); 913 node->tn_size = newsize; 914 uvm_vnp_setsize(vp, newsize); 915 if (newsize < oldsize) { 916 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 917 918 /* 919 * free "backing store" 920 */ 921 922 if (newpages < oldpages) { 923 struct uvm_object *uobj; 924 925 uobj = node->tn_spec.tn_reg.tn_aobj; 926 927 simple_lock(&uobj->vmobjlock); 928 uao_dropswap_range(uobj, newpages, oldpages); 929 simple_unlock(&uobj->vmobjlock); 930 } 931 932 /* 933 * zero out the truncated part of the last page. 934 */ 935 936 uvm_vnp_zerorange(vp, newsize, zerolen); 937 } 938 939 error = 0; 940 941 if (newsize > oldsize) 942 VN_KNOTE(vp, NOTE_EXTEND); 943 944 out: 945 return error; 946 } 947 948 /* --------------------------------------------------------------------- */ 949 950 /* 951 * Returns information about the number of available memory pages, 952 * including physical and virtual ones. 953 * 954 * If 'total' is true, the value returned is the total amount of memory 955 * pages configured for the system (either in use or free). 956 * If it is FALSE, the value returned is the amount of free memory pages. 957 * 958 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid 959 * excessive memory usage. 960 * 961 */ 962 size_t 963 tmpfs_mem_info(bool total) 964 { 965 size_t size; 966 967 size = 0; 968 size += uvmexp.swpgavail; 969 if (!total) { 970 size -= uvmexp.swpgonly; 971 } 972 size += uvmexp.free; 973 size += uvmexp.filepages; 974 if (size > uvmexp.wired) { 975 size -= uvmexp.wired; 976 } else { 977 size = 0; 978 } 979 980 return size; 981 } 982 983 /* --------------------------------------------------------------------- */ 984 985 /* 986 * Change flags of the given vnode. 987 * Caller should execute tmpfs_update on vp after a successful execution. 988 * The vnode must be locked on entry and remain locked on exit. 989 */ 990 int 991 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 992 { 993 int error; 994 struct tmpfs_node *node; 995 996 KASSERT(VOP_ISLOCKED(vp)); 997 998 node = VP_TO_TMPFS_NODE(vp); 999 1000 /* Disallow this operation if the file system is mounted read-only. */ 1001 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1002 return EROFS; 1003 1004 /* XXX: The following comes from UFS code, and can be found in 1005 * several other file systems. Shouldn't this be centralized 1006 * somewhere? */ 1007 if (kauth_cred_geteuid(cred) != node->tn_uid && 1008 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1009 NULL))) 1010 return error; 1011 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) { 1012 /* The super-user is only allowed to change flags if the file 1013 * wasn't protected before and the securelevel is zero. */ 1014 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) && 1015 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHSYSFLAGS, 1016 0, NULL, NULL, NULL)) 1017 return EPERM; 1018 node->tn_flags = flags; 1019 } else { 1020 /* Regular users can change flags provided they only want to 1021 * change user-specific ones, not those reserved for the 1022 * super-user. */ 1023 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) || 1024 (flags & UF_SETTABLE) != flags) 1025 return EPERM; 1026 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE)) 1027 return EPERM; 1028 node->tn_flags &= SF_SETTABLE; 1029 node->tn_flags |= (flags & UF_SETTABLE); 1030 } 1031 1032 node->tn_status |= TMPFS_NODE_CHANGED; 1033 VN_KNOTE(vp, NOTE_ATTRIB); 1034 1035 KASSERT(VOP_ISLOCKED(vp)); 1036 1037 return 0; 1038 } 1039 1040 /* --------------------------------------------------------------------- */ 1041 1042 /* 1043 * Change access mode on the given vnode. 1044 * Caller should execute tmpfs_update on vp after a successful execution. 1045 * The vnode must be locked on entry and remain locked on exit. 1046 */ 1047 int 1048 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l) 1049 { 1050 int error, ismember = 0; 1051 struct tmpfs_node *node; 1052 1053 KASSERT(VOP_ISLOCKED(vp)); 1054 1055 node = VP_TO_TMPFS_NODE(vp); 1056 1057 /* Disallow this operation if the file system is mounted read-only. */ 1058 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1059 return EROFS; 1060 1061 /* Immutable or append-only files cannot be modified, either. */ 1062 if (node->tn_flags & (IMMUTABLE | APPEND)) 1063 return EPERM; 1064 1065 /* XXX: The following comes from UFS code, and can be found in 1066 * several other file systems. Shouldn't this be centralized 1067 * somewhere? */ 1068 if (kauth_cred_geteuid(cred) != node->tn_uid && 1069 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1070 NULL))) 1071 return error; 1072 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) { 1073 if (vp->v_type != VDIR && (mode & S_ISTXT)) 1074 return EFTYPE; 1075 1076 if ((kauth_cred_ismember_gid(cred, node->tn_gid, 1077 &ismember) != 0 || !ismember) && (mode & S_ISGID)) 1078 return EPERM; 1079 } 1080 1081 node->tn_mode = (mode & ALLPERMS); 1082 1083 node->tn_status |= TMPFS_NODE_CHANGED; 1084 VN_KNOTE(vp, NOTE_ATTRIB); 1085 1086 KASSERT(VOP_ISLOCKED(vp)); 1087 1088 return 0; 1089 } 1090 1091 /* --------------------------------------------------------------------- */ 1092 1093 /* 1094 * Change ownership of the given vnode. At least one of uid or gid must 1095 * be different than VNOVAL. If one is set to that value, the attribute 1096 * is unchanged. 1097 * Caller should execute tmpfs_update on vp after a successful execution. 1098 * The vnode must be locked on entry and remain locked on exit. 1099 */ 1100 int 1101 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 1102 struct lwp *l) 1103 { 1104 int error, ismember = 0; 1105 struct tmpfs_node *node; 1106 1107 KASSERT(VOP_ISLOCKED(vp)); 1108 1109 node = VP_TO_TMPFS_NODE(vp); 1110 1111 /* Assign default values if they are unknown. */ 1112 KASSERT(uid != VNOVAL || gid != VNOVAL); 1113 if (uid == VNOVAL) 1114 uid = node->tn_uid; 1115 if (gid == VNOVAL) 1116 gid = node->tn_gid; 1117 KASSERT(uid != VNOVAL && gid != VNOVAL); 1118 1119 /* Disallow this operation if the file system is mounted read-only. */ 1120 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1121 return EROFS; 1122 1123 /* Immutable or append-only files cannot be modified, either. */ 1124 if (node->tn_flags & (IMMUTABLE | APPEND)) 1125 return EPERM; 1126 1127 /* XXX: The following comes from UFS code, and can be found in 1128 * several other file systems. Shouldn't this be centralized 1129 * somewhere? */ 1130 if ((kauth_cred_geteuid(cred) != node->tn_uid || uid != node->tn_uid || 1131 (gid != node->tn_gid && !(kauth_cred_getegid(cred) == node->tn_gid || 1132 (kauth_cred_ismember_gid(cred, gid, &ismember) == 0 && ismember)))) && 1133 ((error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1134 NULL)) != 0)) 1135 return error; 1136 1137 node->tn_uid = uid; 1138 node->tn_gid = gid; 1139 1140 node->tn_status |= TMPFS_NODE_CHANGED; 1141 VN_KNOTE(vp, NOTE_ATTRIB); 1142 1143 KASSERT(VOP_ISLOCKED(vp)); 1144 1145 return 0; 1146 } 1147 1148 /* --------------------------------------------------------------------- */ 1149 1150 /* 1151 * Change size of the given vnode. 1152 * Caller should execute tmpfs_update on vp after a successful execution. 1153 * The vnode must be locked on entry and remain locked on exit. 1154 */ 1155 int 1156 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred, 1157 struct lwp *l) 1158 { 1159 int error; 1160 struct tmpfs_node *node; 1161 1162 KASSERT(VOP_ISLOCKED(vp)); 1163 1164 node = VP_TO_TMPFS_NODE(vp); 1165 1166 /* Decide whether this is a valid operation based on the file type. */ 1167 error = 0; 1168 switch (vp->v_type) { 1169 case VDIR: 1170 return EISDIR; 1171 1172 case VREG: 1173 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1174 return EROFS; 1175 break; 1176 1177 case VBLK: 1178 /* FALLTHROUGH */ 1179 case VCHR: 1180 /* FALLTHROUGH */ 1181 case VFIFO: 1182 /* Allow modifications of special files even if in the file 1183 * system is mounted read-only (we are not modifying the 1184 * files themselves, but the objects they represent). */ 1185 return 0; 1186 1187 default: 1188 /* Anything else is unsupported. */ 1189 return EOPNOTSUPP; 1190 } 1191 1192 /* Immutable or append-only files cannot be modified, either. */ 1193 if (node->tn_flags & (IMMUTABLE | APPEND)) 1194 return EPERM; 1195 1196 error = tmpfs_truncate(vp, size); 1197 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1198 * for us, as will update tn_status; no need to do that here. */ 1199 1200 KASSERT(VOP_ISLOCKED(vp)); 1201 1202 return error; 1203 } 1204 1205 /* --------------------------------------------------------------------- */ 1206 1207 /* 1208 * Change access and modification times of the given vnode. 1209 * Caller should execute tmpfs_update on vp after a successful execution. 1210 * The vnode must be locked on entry and remain locked on exit. 1211 */ 1212 int 1213 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1214 int vaflags, kauth_cred_t cred, struct lwp *l) 1215 { 1216 int error; 1217 struct tmpfs_node *node; 1218 1219 KASSERT(VOP_ISLOCKED(vp)); 1220 1221 node = VP_TO_TMPFS_NODE(vp); 1222 1223 /* Disallow this operation if the file system is mounted read-only. */ 1224 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1225 return EROFS; 1226 1227 /* Immutable or append-only files cannot be modified, either. */ 1228 if (node->tn_flags & (IMMUTABLE | APPEND)) 1229 return EPERM; 1230 1231 /* XXX: The following comes from UFS code, and can be found in 1232 * several other file systems. Shouldn't this be centralized 1233 * somewhere? */ 1234 if (kauth_cred_geteuid(cred) != node->tn_uid && 1235 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1236 NULL)) && ((vaflags & VA_UTIMES_NULL) == 0 || 1237 (error = VOP_ACCESS(vp, VWRITE, cred, l)))) 1238 return error; 1239 1240 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1241 node->tn_status |= TMPFS_NODE_ACCESSED; 1242 1243 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1244 node->tn_status |= TMPFS_NODE_MODIFIED; 1245 1246 tmpfs_update(vp, atime, mtime, 0); 1247 VN_KNOTE(vp, NOTE_ATTRIB); 1248 1249 KASSERT(VOP_ISLOCKED(vp)); 1250 1251 return 0; 1252 } 1253 1254 /* --------------------------------------------------------------------- */ 1255 1256 /* Sync timestamps */ 1257 void 1258 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1259 const struct timespec *mod) 1260 { 1261 struct timespec now; 1262 struct tmpfs_node *node; 1263 1264 node = VP_TO_TMPFS_NODE(vp); 1265 1266 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1267 TMPFS_NODE_CHANGED)) == 0) 1268 return; 1269 1270 getnanotime(&now); 1271 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1272 if (acc == NULL) 1273 acc = &now; 1274 node->tn_atime = *acc; 1275 } 1276 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1277 if (mod == NULL) 1278 mod = &now; 1279 node->tn_mtime = *mod; 1280 } 1281 if (node->tn_status & TMPFS_NODE_CHANGED) 1282 node->tn_ctime = now; 1283 1284 node->tn_status &= 1285 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1286 } 1287 1288 /* --------------------------------------------------------------------- */ 1289 1290 void 1291 tmpfs_update(struct vnode *vp, const struct timespec *acc, 1292 const struct timespec *mod, int flags) 1293 { 1294 1295 struct tmpfs_node *node; 1296 1297 KASSERT(VOP_ISLOCKED(vp)); 1298 1299 node = VP_TO_TMPFS_NODE(vp); 1300 1301 #if 0 1302 if (flags & UPDATE_CLOSE) 1303 ; /* XXX Need to do anything special? */ 1304 #endif 1305 1306 tmpfs_itimes(vp, acc, mod); 1307 1308 KASSERT(VOP_ISLOCKED(vp)); 1309 } 1310 1311 /* --------------------------------------------------------------------- */ 1312 1313 int 1314 tmpfs_truncate(struct vnode *vp, off_t length) 1315 { 1316 bool extended; 1317 int error; 1318 struct tmpfs_node *node; 1319 1320 node = VP_TO_TMPFS_NODE(vp); 1321 extended = length > node->tn_size; 1322 1323 if (length < 0) { 1324 error = EINVAL; 1325 goto out; 1326 } 1327 1328 if (node->tn_size == length) { 1329 error = 0; 1330 goto out; 1331 } 1332 1333 error = tmpfs_reg_resize(vp, length); 1334 if (error == 0) 1335 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1336 1337 out: 1338 tmpfs_update(vp, NULL, NULL, 0); 1339 1340 return error; 1341 } 1342