1 /* $NetBSD: tmpfs_subr.c,v 1.36 2007/08/06 16:08:55 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Efficient memory file system supporting functions. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.36 2007/08/06 16:08:55 pooka Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/dirent.h> 49 #include <sys/event.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/namei.h> 53 #include <sys/time.h> 54 #include <sys/stat.h> 55 #include <sys/systm.h> 56 #include <sys/swap.h> 57 #include <sys/vnode.h> 58 #include <sys/kauth.h> 59 #include <sys/proc.h> 60 61 #include <uvm/uvm.h> 62 63 #include <miscfs/specfs/specdev.h> 64 #include <fs/tmpfs/tmpfs.h> 65 #include <fs/tmpfs/tmpfs_fifoops.h> 66 #include <fs/tmpfs/tmpfs_specops.h> 67 #include <fs/tmpfs/tmpfs_vnops.h> 68 69 /* --------------------------------------------------------------------- */ 70 71 /* 72 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 73 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 74 * using the credentials of the process 'p'. 75 * 76 * If the node type is set to 'VDIR', then the parent parameter must point 77 * to the parent directory of the node being created. It may only be NULL 78 * while allocating the root node. 79 * 80 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 81 * specifies the device the node represents. 82 * 83 * If the node type is set to 'VLNK', then the parameter target specifies 84 * the file name of the target file for the symbolic link that is being 85 * created. 86 * 87 * Note that new nodes are retrieved from the available list if it has 88 * items or, if it is empty, from the node pool as long as there is enough 89 * space to create them. 90 * 91 * Returns zero on success or an appropriate error code on failure. 92 */ 93 int 94 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 95 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 96 char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node) 97 { 98 struct tmpfs_node *nnode; 99 100 /* If the root directory of the 'tmp' file system is not yet 101 * allocated, this must be the request to do it. */ 102 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 103 104 KASSERT(IFF(type == VLNK, target != NULL)); 105 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 106 107 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 108 109 nnode = NULL; 110 if (LIST_EMPTY(&tmp->tm_nodes_avail)) { 111 KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max); 112 if (tmp->tm_nodes_last == tmp->tm_nodes_max) 113 return ENOSPC; 114 115 nnode = 116 (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); 117 if (nnode == NULL) 118 return ENOSPC; 119 nnode->tn_id = tmp->tm_nodes_last++; 120 nnode->tn_gen = arc4random(); 121 } else { 122 nnode = LIST_FIRST(&tmp->tm_nodes_avail); 123 LIST_REMOVE(nnode, tn_entries); 124 nnode->tn_gen++; 125 } 126 KASSERT(nnode != NULL); 127 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 128 129 /* Generic initialization. */ 130 nnode->tn_type = type; 131 nnode->tn_size = 0; 132 nnode->tn_status = 0; 133 nnode->tn_flags = 0; 134 nnode->tn_links = 0; 135 getnanotime(&nnode->tn_atime); 136 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 137 nnode->tn_atime; 138 nnode->tn_uid = uid; 139 nnode->tn_gid = gid; 140 nnode->tn_mode = mode; 141 nnode->tn_lockf = NULL; 142 nnode->tn_vnode = NULL; 143 144 /* Type-specific initialization. */ 145 switch (nnode->tn_type) { 146 case VBLK: 147 case VCHR: 148 nnode->tn_spec.tn_dev.tn_rdev = rdev; 149 break; 150 151 case VDIR: 152 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 153 nnode->tn_spec.tn_dir.tn_parent = 154 (parent == NULL) ? nnode : parent; 155 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 156 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 157 nnode->tn_links++; 158 nnode->tn_spec.tn_dir.tn_parent->tn_links++; 159 if (parent != NULL) { 160 KASSERT(parent->tn_vnode != NULL); 161 VN_KNOTE(parent->tn_vnode, NOTE_LINK); 162 } 163 break; 164 165 case VFIFO: 166 /* FALLTHROUGH */ 167 case VSOCK: 168 break; 169 170 case VLNK: 171 KASSERT(strlen(target) < MAXPATHLEN); 172 nnode->tn_size = strlen(target); 173 nnode->tn_spec.tn_lnk.tn_link = 174 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0); 175 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 176 nnode->tn_type = VNON; 177 tmpfs_free_node(tmp, nnode); 178 return ENOSPC; 179 } 180 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 181 break; 182 183 case VREG: 184 nnode->tn_spec.tn_reg.tn_aobj = 185 uao_create(INT32_MAX - PAGE_SIZE, 0); 186 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 187 break; 188 189 default: 190 KASSERT(0); 191 } 192 193 *node = nnode; 194 return 0; 195 } 196 197 /* --------------------------------------------------------------------- */ 198 199 /* 200 * Destroys the node pointed to by node from the file system 'tmp'. 201 * If the node does not belong to the given mount point, the results are 202 * unpredicted. 203 * 204 * If the node references a directory; no entries are allowed because 205 * their removal could need a recursive algorithm, something forbidden in 206 * kernel space. Furthermore, there is not need to provide such 207 * functionality (recursive removal) because the only primitives offered 208 * to the user are the removal of empty directories and the deletion of 209 * individual files. 210 * 211 * Note that nodes are not really deleted; in fact, when a node has been 212 * allocated, it cannot be deleted during the whole life of the file 213 * system. Instead, they are moved to the available list and remain there 214 * until reused. 215 */ 216 void 217 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 218 { 219 ino_t id; 220 unsigned long gen; 221 size_t pages; 222 223 switch (node->tn_type) { 224 case VNON: 225 /* Do not do anything. VNON is provided to let the 226 * allocation routine clean itself easily by avoiding 227 * duplicating code in it. */ 228 /* FALLTHROUGH */ 229 case VBLK: 230 /* FALLTHROUGH */ 231 case VCHR: 232 /* FALLTHROUGH */ 233 case VDIR: 234 /* FALLTHROUGH */ 235 case VFIFO: 236 /* FALLTHROUGH */ 237 case VSOCK: 238 pages = 0; 239 break; 240 241 case VLNK: 242 tmpfs_str_pool_put(&tmp->tm_str_pool, 243 node->tn_spec.tn_lnk.tn_link, node->tn_size); 244 pages = 0; 245 break; 246 247 case VREG: 248 if (node->tn_spec.tn_reg.tn_aobj != NULL) 249 uao_detach(node->tn_spec.tn_reg.tn_aobj); 250 pages = node->tn_spec.tn_reg.tn_aobj_pages; 251 break; 252 253 default: 254 KASSERT(0); 255 pages = 0; /* Shut up gcc when !DIAGNOSTIC. */ 256 break; 257 } 258 259 tmp->tm_pages_used -= pages; 260 261 LIST_REMOVE(node, tn_entries); 262 id = node->tn_id; 263 gen = node->tn_gen; 264 memset(node, 0, sizeof(struct tmpfs_node)); 265 node->tn_id = id; 266 node->tn_type = VNON; 267 node->tn_gen = gen; 268 LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries); 269 } 270 271 /* --------------------------------------------------------------------- */ 272 273 /* 274 * Allocates a new directory entry for the node node with a name of name. 275 * The new directory entry is returned in *de. 276 * 277 * The link count of node is increased by one to reflect the new object 278 * referencing it. This takes care of notifying kqueue listeners about 279 * this change. 280 * 281 * Returns zero on success or an appropriate error code on failure. 282 */ 283 int 284 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 285 const char *name, uint16_t len, struct tmpfs_dirent **de) 286 { 287 struct tmpfs_dirent *nde; 288 289 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0); 290 if (nde == NULL) 291 return ENOSPC; 292 293 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0); 294 if (nde->td_name == NULL) { 295 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde); 296 return ENOSPC; 297 } 298 nde->td_namelen = len; 299 memcpy(nde->td_name, name, len); 300 nde->td_node = node; 301 302 node->tn_links++; 303 if (node->tn_links > 1 && node->tn_vnode != NULL) 304 VN_KNOTE(node->tn_vnode, NOTE_LINK); 305 *de = nde; 306 307 return 0; 308 } 309 310 /* --------------------------------------------------------------------- */ 311 312 /* 313 * Frees a directory entry. It is the caller's responsibility to destroy 314 * the node referenced by it if needed. 315 * 316 * The link count of node is decreased by one to reflect the removal of an 317 * object that referenced it. This only happens if 'node_exists' is true; 318 * otherwise the function will not access the node referred to by the 319 * directory entry, as it may already have been released from the outside. 320 * 321 * Interested parties (kqueue) are notified of the link count change; note 322 * that this can include both the node pointed to by the directory entry 323 * as well as its parent. 324 */ 325 void 326 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 327 bool node_exists) 328 { 329 if (node_exists) { 330 struct tmpfs_node *node; 331 332 node = de->td_node; 333 334 KASSERT(node->tn_links > 0); 335 node->tn_links--; 336 if (node->tn_vnode != NULL) 337 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ? 338 NOTE_DELETE : NOTE_LINK); 339 if (node->tn_type == VDIR) 340 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode, 341 NOTE_LINK); 342 } 343 344 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen); 345 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de); 346 } 347 348 /* --------------------------------------------------------------------- */ 349 350 /* 351 * Allocates a new vnode for the node node or returns a new reference to 352 * an existing one if the node had already a vnode referencing it. The 353 * resulting locked vnode is returned in *vpp. 354 * 355 * Returns zero on success or an appropriate error code on failure. 356 */ 357 int 358 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) 359 { 360 int error; 361 struct vnode *nvp; 362 struct vnode *vp; 363 364 vp = NULL; 365 366 if (node->tn_vnode != NULL) { 367 vp = node->tn_vnode; 368 vget(vp, LK_EXCLUSIVE | LK_RETRY); 369 error = 0; 370 goto out; 371 } 372 373 /* Get a new vnode and associate it with our node. */ 374 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); 375 if (error != 0) 376 goto out; 377 KASSERT(vp != NULL); 378 379 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 380 if (error != 0) { 381 vp->v_data = NULL; 382 ungetnewvnode(vp); 383 vp = NULL; 384 goto out; 385 } 386 387 vp->v_data = node; 388 vp->v_type = node->tn_type; 389 390 /* Type-specific initialization. */ 391 switch (node->tn_type) { 392 case VBLK: 393 /* FALLTHROUGH */ 394 case VCHR: 395 vp->v_op = tmpfs_specop_p; 396 nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp); 397 if (nvp != NULL) { 398 /* Discard unneeded vnode, but save its inode. */ 399 nvp->v_data = vp->v_data; 400 vp->v_data = NULL; 401 402 /* XXX spec_vnodeops has no locking, so we have to 403 * do it explicitly. */ 404 VOP_UNLOCK(vp, 0); 405 vp->v_op = spec_vnodeop_p; 406 vp->v_flag &= ~VLOCKSWORK; 407 vrele(vp); 408 vgone(vp); 409 410 /* Reinitialize aliased node. */ 411 vp = nvp; 412 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 413 if (error != 0) { 414 vp->v_data = NULL; 415 vp = NULL; 416 goto out; 417 } 418 } 419 break; 420 421 case VDIR: 422 vp->v_flag = node->tn_spec.tn_dir.tn_parent == node ? VROOT : 0; 423 break; 424 425 case VFIFO: 426 vp->v_op = tmpfs_fifoop_p; 427 break; 428 429 case VLNK: 430 /* FALLTHROUGH */ 431 case VREG: 432 /* FALLTHROUGH */ 433 case VSOCK: 434 break; 435 436 default: 437 KASSERT(0); 438 } 439 440 uvm_vnp_setsize(vp, node->tn_size); 441 442 error = 0; 443 444 out: 445 *vpp = node->tn_vnode = vp; 446 447 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 448 KASSERT(*vpp == node->tn_vnode); 449 450 return error; 451 } 452 453 /* --------------------------------------------------------------------- */ 454 455 /* 456 * Destroys the association between the vnode vp and the node it 457 * references. 458 */ 459 void 460 tmpfs_free_vp(struct vnode *vp) 461 { 462 struct tmpfs_node *node; 463 464 node = VP_TO_TMPFS_NODE(vp); 465 466 node->tn_vnode = NULL; 467 vp->v_data = NULL; 468 } 469 470 /* --------------------------------------------------------------------- */ 471 472 /* 473 * Allocates a new file of type 'type' and adds it to the parent directory 474 * 'dvp'; this addition is done using the component name given in 'cnp'. 475 * The ownership of the new file is automatically assigned based on the 476 * credentials of the caller (through 'cnp'), the group is set based on 477 * the parent directory and the mode is determined from the 'vap' argument. 478 * If successful, *vpp holds a vnode to the newly created file and zero 479 * is returned. Otherwise *vpp is NULL and the function returns an 480 * appropriate error code. 481 */ 482 int 483 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 484 struct componentname *cnp, char *target) 485 { 486 int error; 487 struct tmpfs_dirent *de; 488 struct tmpfs_mount *tmp; 489 struct tmpfs_node *dnode; 490 struct tmpfs_node *node; 491 struct tmpfs_node *parent; 492 493 KASSERT(VOP_ISLOCKED(dvp)); 494 KASSERT(cnp->cn_flags & HASBUF); 495 496 tmp = VFS_TO_TMPFS(dvp->v_mount); 497 dnode = VP_TO_TMPFS_DIR(dvp); 498 *vpp = NULL; 499 500 /* If the entry we are creating is a directory, we cannot overflow 501 * the number of links of its parent, because it will get a new 502 * link. */ 503 if (vap->va_type == VDIR) { 504 /* Ensure that we do not overflow the maximum number of links 505 * imposed by the system. */ 506 KASSERT(dnode->tn_links <= LINK_MAX); 507 if (dnode->tn_links == LINK_MAX) { 508 error = EMLINK; 509 goto out; 510 } 511 512 parent = dnode; 513 } else 514 parent = NULL; 515 516 /* Allocate a node that represents the new file. */ 517 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 518 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, 519 cnp->cn_lwp->l_proc, &node); 520 if (error != 0) 521 goto out; 522 523 /* Allocate a directory entry that points to the new file. */ 524 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 525 &de); 526 if (error != 0) { 527 tmpfs_free_node(tmp, node); 528 goto out; 529 } 530 531 /* Allocate a vnode for the new file. */ 532 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp); 533 if (error != 0) { 534 tmpfs_free_dirent(tmp, de, true); 535 tmpfs_free_node(tmp, node); 536 goto out; 537 } 538 539 /* Now that all required items are allocated, we can proceed to 540 * insert the new node into the directory, an operation that 541 * cannot fail. */ 542 tmpfs_dir_attach(dvp, de); 543 544 out: 545 if (error != 0 || !(cnp->cn_flags & SAVESTART)) 546 PNBUF_PUT(cnp->cn_pnbuf); 547 vput(dvp); 548 549 KASSERT(!VOP_ISLOCKED(dvp)); 550 KASSERT(IFF(error == 0, *vpp != NULL)); 551 552 return error; 553 } 554 555 /* --------------------------------------------------------------------- */ 556 557 /* 558 * Attaches the directory entry de to the directory represented by vp. 559 * Note that this does not change the link count of the node pointed by 560 * the directory entry, as this is done by tmpfs_alloc_dirent. 561 * 562 * As the "parent" directory changes, interested parties are notified of 563 * a write to it. 564 */ 565 void 566 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 567 { 568 struct tmpfs_node *dnode; 569 570 dnode = VP_TO_TMPFS_DIR(vp); 571 572 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 573 dnode->tn_size += sizeof(struct tmpfs_dirent); 574 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 575 TMPFS_NODE_MODIFIED; 576 uvm_vnp_setsize(vp, dnode->tn_size); 577 578 VN_KNOTE(vp, NOTE_WRITE); 579 } 580 581 /* --------------------------------------------------------------------- */ 582 583 /* 584 * Detaches the directory entry de from the directory represented by vp. 585 * Note that this does not change the link count of the node pointed by 586 * the directory entry, as this is done by tmpfs_free_dirent. 587 * 588 * As the "parent" directory changes, interested parties are notified of 589 * a write to it. 590 */ 591 void 592 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 593 { 594 struct tmpfs_node *dnode; 595 596 KASSERT(VOP_ISLOCKED(vp)); 597 598 dnode = VP_TO_TMPFS_DIR(vp); 599 600 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 601 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 602 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 603 } 604 605 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 606 dnode->tn_size -= sizeof(struct tmpfs_dirent); 607 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 608 TMPFS_NODE_MODIFIED; 609 uvm_vnp_setsize(vp, dnode->tn_size); 610 611 VN_KNOTE(vp, NOTE_WRITE); 612 } 613 614 /* --------------------------------------------------------------------- */ 615 616 /* 617 * Looks for a directory entry in the directory represented by node. 618 * 'cnp' describes the name of the entry to look for. Note that the . 619 * and .. components are not allowed as they do not physically exist 620 * within directories. 621 * 622 * Returns a pointer to the entry when found, otherwise NULL. 623 */ 624 struct tmpfs_dirent * 625 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 626 { 627 bool found; 628 struct tmpfs_dirent *de; 629 630 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 631 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 632 cnp->cn_nameptr[1] == '.'))); 633 TMPFS_VALIDATE_DIR(node); 634 635 node->tn_status |= TMPFS_NODE_ACCESSED; 636 637 found = 0; 638 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 639 KASSERT(cnp->cn_namelen < 0xffff); 640 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 641 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 642 found = 1; 643 break; 644 } 645 } 646 647 return found ? de : NULL; 648 } 649 650 /* --------------------------------------------------------------------- */ 651 652 /* 653 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 654 * directory and returns it in the uio space. The function returns 0 655 * on success, -1 if there was not enough space in the uio structure to 656 * hold the directory entry or an appropriate error code if another 657 * error happens. 658 */ 659 int 660 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 661 { 662 int error; 663 struct dirent dent; 664 665 TMPFS_VALIDATE_DIR(node); 666 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 667 668 dent.d_fileno = node->tn_id; 669 dent.d_type = DT_DIR; 670 dent.d_namlen = 1; 671 dent.d_name[0] = '.'; 672 dent.d_name[1] = '\0'; 673 dent.d_reclen = _DIRENT_SIZE(&dent); 674 675 if (dent.d_reclen > uio->uio_resid) 676 error = -1; 677 else { 678 error = uiomove(&dent, dent.d_reclen, uio); 679 if (error == 0) 680 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 681 } 682 683 node->tn_status |= TMPFS_NODE_ACCESSED; 684 685 return error; 686 } 687 688 /* --------------------------------------------------------------------- */ 689 690 /* 691 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 692 * directory and returns it in the uio space. The function returns 0 693 * on success, -1 if there was not enough space in the uio structure to 694 * hold the directory entry or an appropriate error code if another 695 * error happens. 696 */ 697 int 698 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 699 { 700 int error; 701 struct dirent dent; 702 703 TMPFS_VALIDATE_DIR(node); 704 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 705 706 dent.d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 707 dent.d_type = DT_DIR; 708 dent.d_namlen = 2; 709 dent.d_name[0] = '.'; 710 dent.d_name[1] = '.'; 711 dent.d_name[2] = '\0'; 712 dent.d_reclen = _DIRENT_SIZE(&dent); 713 714 if (dent.d_reclen > uio->uio_resid) 715 error = -1; 716 else { 717 error = uiomove(&dent, dent.d_reclen, uio); 718 if (error == 0) { 719 struct tmpfs_dirent *de; 720 721 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 722 if (de == NULL) 723 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 724 else 725 uio->uio_offset = tmpfs_dircookie(de); 726 } 727 } 728 729 node->tn_status |= TMPFS_NODE_ACCESSED; 730 731 return error; 732 } 733 734 /* --------------------------------------------------------------------- */ 735 736 /* 737 * Lookup a directory entry by its associated cookie. 738 */ 739 struct tmpfs_dirent * 740 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 741 { 742 struct tmpfs_dirent *de; 743 744 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 745 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 746 return node->tn_spec.tn_dir.tn_readdir_lastp; 747 } 748 749 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 750 if (tmpfs_dircookie(de) == cookie) { 751 break; 752 } 753 } 754 755 return de; 756 } 757 758 /* --------------------------------------------------------------------- */ 759 760 /* 761 * Helper function for tmpfs_readdir. Returns as much directory entries 762 * as can fit in the uio space. The read starts at uio->uio_offset. 763 * The function returns 0 on success, -1 if there was not enough space 764 * in the uio structure to hold the directory entry or an appropriate 765 * error code if another error happens. 766 */ 767 int 768 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 769 { 770 int error; 771 off_t startcookie; 772 struct tmpfs_dirent *de; 773 774 TMPFS_VALIDATE_DIR(node); 775 776 /* Locate the first directory entry we have to return. We have cached 777 * the last readdir in the node, so use those values if appropriate. 778 * Otherwise do a linear scan to find the requested entry. */ 779 startcookie = uio->uio_offset; 780 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 781 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 782 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 783 return 0; 784 } else { 785 de = tmpfs_dir_lookupbycookie(node, startcookie); 786 } 787 if (de == NULL) { 788 return EINVAL; 789 } 790 791 /* Read as much entries as possible; i.e., until we reach the end of 792 * the directory or we exhaust uio space. */ 793 do { 794 struct dirent d; 795 796 /* Create a dirent structure representing the current 797 * tmpfs_node and fill it. */ 798 d.d_fileno = de->td_node->tn_id; 799 switch (de->td_node->tn_type) { 800 case VBLK: 801 d.d_type = DT_BLK; 802 break; 803 804 case VCHR: 805 d.d_type = DT_CHR; 806 break; 807 808 case VDIR: 809 d.d_type = DT_DIR; 810 break; 811 812 case VFIFO: 813 d.d_type = DT_FIFO; 814 break; 815 816 case VLNK: 817 d.d_type = DT_LNK; 818 break; 819 820 case VREG: 821 d.d_type = DT_REG; 822 break; 823 824 case VSOCK: 825 d.d_type = DT_SOCK; 826 break; 827 828 default: 829 KASSERT(0); 830 } 831 d.d_namlen = de->td_namelen; 832 KASSERT(de->td_namelen < sizeof(d.d_name)); 833 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 834 d.d_name[de->td_namelen] = '\0'; 835 d.d_reclen = _DIRENT_SIZE(&d); 836 837 /* Stop reading if the directory entry we are treating is 838 * bigger than the amount of data that can be returned. */ 839 if (d.d_reclen > uio->uio_resid) { 840 error = -1; 841 break; 842 } 843 844 /* Copy the new dirent structure into the output buffer and 845 * advance pointers. */ 846 error = uiomove(&d, d.d_reclen, uio); 847 848 (*cntp)++; 849 de = TAILQ_NEXT(de, td_entries); 850 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 851 852 /* Update the offset and cache. */ 853 if (de == NULL) { 854 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 855 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 856 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 857 } else { 858 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 859 tmpfs_dircookie(de); 860 node->tn_spec.tn_dir.tn_readdir_lastp = de; 861 } 862 863 node->tn_status |= TMPFS_NODE_ACCESSED; 864 865 return error; 866 } 867 868 /* --------------------------------------------------------------------- */ 869 870 /* 871 * Resizes the aobj associated to the regular file pointed to by vp to 872 * the size newsize. 'vp' must point to a vnode that represents a regular 873 * file. 'newsize' must be positive. 874 * 875 * If the file is extended, the appropriate kevent is raised. This does 876 * not rise a write event though because resizing is not the same as 877 * writing. 878 * 879 * Returns zero on success or an appropriate error code on failure. 880 */ 881 int 882 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 883 { 884 int error; 885 size_t newpages, oldpages; 886 struct tmpfs_mount *tmp; 887 struct tmpfs_node *node; 888 off_t oldsize; 889 890 KASSERT(vp->v_type == VREG); 891 KASSERT(newsize >= 0); 892 893 node = VP_TO_TMPFS_NODE(vp); 894 tmp = VFS_TO_TMPFS(vp->v_mount); 895 896 /* Convert the old and new sizes to the number of pages needed to 897 * store them. It may happen that we do not need to do anything 898 * because the last allocated page can accommodate the change on 899 * its own. */ 900 oldsize = node->tn_size; 901 oldpages = round_page(oldsize) / PAGE_SIZE; 902 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 903 newpages = round_page(newsize) / PAGE_SIZE; 904 905 if (newpages > oldpages && 906 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { 907 error = ENOSPC; 908 goto out; 909 } 910 911 if (newsize < oldsize) { 912 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 913 914 /* 915 * free "backing store" 916 */ 917 918 if (newpages < oldpages) { 919 struct uvm_object *uobj; 920 921 uobj = node->tn_spec.tn_reg.tn_aobj; 922 923 simple_lock(&uobj->vmobjlock); 924 uao_dropswap_range(uobj, newpages, oldpages); 925 simple_unlock(&uobj->vmobjlock); 926 } 927 928 /* 929 * zero out the truncated part of the last page. 930 */ 931 932 uvm_vnp_zerorange(vp, newsize, zerolen); 933 } 934 935 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 936 node->tn_size = newsize; 937 uvm_vnp_setsize(vp, newsize); 938 939 tmp->tm_pages_used += (newpages - oldpages); 940 941 error = 0; 942 943 if (newsize > oldsize) 944 VN_KNOTE(vp, NOTE_EXTEND); 945 946 out: 947 return error; 948 } 949 950 /* --------------------------------------------------------------------- */ 951 952 /* 953 * Returns information about the number of available memory pages, 954 * including physical and virtual ones. 955 * 956 * If 'total' is true, the value returned is the total amount of memory 957 * pages configured for the system (either in use or free). 958 * If it is FALSE, the value returned is the amount of free memory pages. 959 * 960 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid 961 * excessive memory usage. 962 * 963 */ 964 size_t 965 tmpfs_mem_info(bool total) 966 { 967 size_t size; 968 969 size = 0; 970 size += uvmexp.swpgavail; 971 if (!total) { 972 size -= uvmexp.swpgonly; 973 } 974 size += uvmexp.free; 975 size += uvmexp.filepages; 976 if (size > uvmexp.wired) { 977 size -= uvmexp.wired; 978 } else { 979 size = 0; 980 } 981 982 return size; 983 } 984 985 /* --------------------------------------------------------------------- */ 986 987 /* 988 * Change flags of the given vnode. 989 * Caller should execute tmpfs_update on vp after a successful execution. 990 * The vnode must be locked on entry and remain locked on exit. 991 */ 992 int 993 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 994 { 995 int error; 996 struct tmpfs_node *node; 997 998 KASSERT(VOP_ISLOCKED(vp)); 999 1000 node = VP_TO_TMPFS_NODE(vp); 1001 1002 /* Disallow this operation if the file system is mounted read-only. */ 1003 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1004 return EROFS; 1005 1006 /* XXX: The following comes from UFS code, and can be found in 1007 * several other file systems. Shouldn't this be centralized 1008 * somewhere? */ 1009 if (kauth_cred_geteuid(cred) != node->tn_uid && 1010 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1011 NULL))) 1012 return error; 1013 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) { 1014 /* The super-user is only allowed to change flags if the file 1015 * wasn't protected before and the securelevel is zero. */ 1016 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) && 1017 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHSYSFLAGS, 1018 0, NULL, NULL, NULL)) 1019 return EPERM; 1020 node->tn_flags = flags; 1021 } else { 1022 /* Regular users can change flags provided they only want to 1023 * change user-specific ones, not those reserved for the 1024 * super-user. */ 1025 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) || 1026 (flags & UF_SETTABLE) != flags) 1027 return EPERM; 1028 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE)) 1029 return EPERM; 1030 node->tn_flags &= SF_SETTABLE; 1031 node->tn_flags |= (flags & UF_SETTABLE); 1032 } 1033 1034 node->tn_status |= TMPFS_NODE_CHANGED; 1035 VN_KNOTE(vp, NOTE_ATTRIB); 1036 1037 KASSERT(VOP_ISLOCKED(vp)); 1038 1039 return 0; 1040 } 1041 1042 /* --------------------------------------------------------------------- */ 1043 1044 /* 1045 * Change access mode on the given vnode. 1046 * Caller should execute tmpfs_update on vp after a successful execution. 1047 * The vnode must be locked on entry and remain locked on exit. 1048 */ 1049 int 1050 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l) 1051 { 1052 int error, ismember = 0; 1053 struct tmpfs_node *node; 1054 1055 KASSERT(VOP_ISLOCKED(vp)); 1056 1057 node = VP_TO_TMPFS_NODE(vp); 1058 1059 /* Disallow this operation if the file system is mounted read-only. */ 1060 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1061 return EROFS; 1062 1063 /* Immutable or append-only files cannot be modified, either. */ 1064 if (node->tn_flags & (IMMUTABLE | APPEND)) 1065 return EPERM; 1066 1067 /* XXX: The following comes from UFS code, and can be found in 1068 * several other file systems. Shouldn't this be centralized 1069 * somewhere? */ 1070 if (kauth_cred_geteuid(cred) != node->tn_uid && 1071 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1072 NULL))) 1073 return error; 1074 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) { 1075 if (vp->v_type != VDIR && (mode & S_ISTXT)) 1076 return EFTYPE; 1077 1078 if ((kauth_cred_ismember_gid(cred, node->tn_gid, 1079 &ismember) != 0 || !ismember) && (mode & S_ISGID)) 1080 return EPERM; 1081 } 1082 1083 node->tn_mode = (mode & ALLPERMS); 1084 1085 node->tn_status |= TMPFS_NODE_CHANGED; 1086 VN_KNOTE(vp, NOTE_ATTRIB); 1087 1088 KASSERT(VOP_ISLOCKED(vp)); 1089 1090 return 0; 1091 } 1092 1093 /* --------------------------------------------------------------------- */ 1094 1095 /* 1096 * Change ownership of the given vnode. At least one of uid or gid must 1097 * be different than VNOVAL. If one is set to that value, the attribute 1098 * is unchanged. 1099 * Caller should execute tmpfs_update on vp after a successful execution. 1100 * The vnode must be locked on entry and remain locked on exit. 1101 */ 1102 int 1103 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 1104 struct lwp *l) 1105 { 1106 int error, ismember = 0; 1107 struct tmpfs_node *node; 1108 1109 KASSERT(VOP_ISLOCKED(vp)); 1110 1111 node = VP_TO_TMPFS_NODE(vp); 1112 1113 /* Assign default values if they are unknown. */ 1114 KASSERT(uid != VNOVAL || gid != VNOVAL); 1115 if (uid == VNOVAL) 1116 uid = node->tn_uid; 1117 if (gid == VNOVAL) 1118 gid = node->tn_gid; 1119 KASSERT(uid != VNOVAL && gid != VNOVAL); 1120 1121 /* Disallow this operation if the file system is mounted read-only. */ 1122 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1123 return EROFS; 1124 1125 /* Immutable or append-only files cannot be modified, either. */ 1126 if (node->tn_flags & (IMMUTABLE | APPEND)) 1127 return EPERM; 1128 1129 /* XXX: The following comes from UFS code, and can be found in 1130 * several other file systems. Shouldn't this be centralized 1131 * somewhere? */ 1132 if ((kauth_cred_geteuid(cred) != node->tn_uid || uid != node->tn_uid || 1133 (gid != node->tn_gid && !(kauth_cred_getegid(cred) == node->tn_gid || 1134 (kauth_cred_ismember_gid(cred, gid, &ismember) == 0 && ismember)))) && 1135 ((error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1136 NULL)) != 0)) 1137 return error; 1138 1139 node->tn_uid = uid; 1140 node->tn_gid = gid; 1141 1142 node->tn_status |= TMPFS_NODE_CHANGED; 1143 VN_KNOTE(vp, NOTE_ATTRIB); 1144 1145 KASSERT(VOP_ISLOCKED(vp)); 1146 1147 return 0; 1148 } 1149 1150 /* --------------------------------------------------------------------- */ 1151 1152 /* 1153 * Change size of the given vnode. 1154 * Caller should execute tmpfs_update on vp after a successful execution. 1155 * The vnode must be locked on entry and remain locked on exit. 1156 */ 1157 int 1158 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred, 1159 struct lwp *l) 1160 { 1161 int error; 1162 struct tmpfs_node *node; 1163 1164 KASSERT(VOP_ISLOCKED(vp)); 1165 1166 node = VP_TO_TMPFS_NODE(vp); 1167 1168 /* Decide whether this is a valid operation based on the file type. */ 1169 error = 0; 1170 switch (vp->v_type) { 1171 case VDIR: 1172 return EISDIR; 1173 1174 case VREG: 1175 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1176 return EROFS; 1177 break; 1178 1179 case VBLK: 1180 /* FALLTHROUGH */ 1181 case VCHR: 1182 /* FALLTHROUGH */ 1183 case VFIFO: 1184 /* Allow modifications of special files even if in the file 1185 * system is mounted read-only (we are not modifying the 1186 * files themselves, but the objects they represent). */ 1187 return 0; 1188 1189 default: 1190 /* Anything else is unsupported. */ 1191 return EOPNOTSUPP; 1192 } 1193 1194 /* Immutable or append-only files cannot be modified, either. */ 1195 if (node->tn_flags & (IMMUTABLE | APPEND)) 1196 return EPERM; 1197 1198 error = tmpfs_truncate(vp, size); 1199 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1200 * for us, as will update tn_status; no need to do that here. */ 1201 1202 KASSERT(VOP_ISLOCKED(vp)); 1203 1204 return error; 1205 } 1206 1207 /* --------------------------------------------------------------------- */ 1208 1209 /* 1210 * Change access and modification times of the given vnode. 1211 * Caller should execute tmpfs_update on vp after a successful execution. 1212 * The vnode must be locked on entry and remain locked on exit. 1213 */ 1214 int 1215 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1216 int vaflags, kauth_cred_t cred, struct lwp *l) 1217 { 1218 int error; 1219 struct tmpfs_node *node; 1220 1221 KASSERT(VOP_ISLOCKED(vp)); 1222 1223 node = VP_TO_TMPFS_NODE(vp); 1224 1225 /* Disallow this operation if the file system is mounted read-only. */ 1226 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1227 return EROFS; 1228 1229 /* Immutable or append-only files cannot be modified, either. */ 1230 if (node->tn_flags & (IMMUTABLE | APPEND)) 1231 return EPERM; 1232 1233 /* XXX: The following comes from UFS code, and can be found in 1234 * several other file systems. Shouldn't this be centralized 1235 * somewhere? */ 1236 if (kauth_cred_geteuid(cred) != node->tn_uid && 1237 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 1238 NULL)) && ((vaflags & VA_UTIMES_NULL) == 0 || 1239 (error = VOP_ACCESS(vp, VWRITE, cred, l)))) 1240 return error; 1241 1242 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1243 node->tn_status |= TMPFS_NODE_ACCESSED; 1244 1245 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1246 node->tn_status |= TMPFS_NODE_MODIFIED; 1247 1248 tmpfs_update(vp, atime, mtime, 0); 1249 VN_KNOTE(vp, NOTE_ATTRIB); 1250 1251 KASSERT(VOP_ISLOCKED(vp)); 1252 1253 return 0; 1254 } 1255 1256 /* --------------------------------------------------------------------- */ 1257 1258 /* Sync timestamps */ 1259 void 1260 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1261 const struct timespec *mod) 1262 { 1263 struct timespec now; 1264 struct tmpfs_node *node; 1265 1266 node = VP_TO_TMPFS_NODE(vp); 1267 1268 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1269 TMPFS_NODE_CHANGED)) == 0) 1270 return; 1271 1272 getnanotime(&now); 1273 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1274 if (acc == NULL) 1275 acc = &now; 1276 node->tn_atime = *acc; 1277 } 1278 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1279 if (mod == NULL) 1280 mod = &now; 1281 node->tn_mtime = *mod; 1282 } 1283 if (node->tn_status & TMPFS_NODE_CHANGED) 1284 node->tn_ctime = now; 1285 1286 node->tn_status &= 1287 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1288 } 1289 1290 /* --------------------------------------------------------------------- */ 1291 1292 void 1293 tmpfs_update(struct vnode *vp, const struct timespec *acc, 1294 const struct timespec *mod, int flags) 1295 { 1296 1297 struct tmpfs_node *node; 1298 1299 KASSERT(VOP_ISLOCKED(vp)); 1300 1301 node = VP_TO_TMPFS_NODE(vp); 1302 1303 #if 0 1304 if (flags & UPDATE_CLOSE) 1305 ; /* XXX Need to do anything special? */ 1306 #endif 1307 1308 tmpfs_itimes(vp, acc, mod); 1309 1310 KASSERT(VOP_ISLOCKED(vp)); 1311 } 1312 1313 /* --------------------------------------------------------------------- */ 1314 1315 int 1316 tmpfs_truncate(struct vnode *vp, off_t length) 1317 { 1318 bool extended; 1319 int error; 1320 struct tmpfs_node *node; 1321 1322 node = VP_TO_TMPFS_NODE(vp); 1323 extended = length > node->tn_size; 1324 1325 if (length < 0) { 1326 error = EINVAL; 1327 goto out; 1328 } 1329 1330 if (node->tn_size == length) { 1331 error = 0; 1332 goto out; 1333 } 1334 1335 error = tmpfs_reg_resize(vp, length); 1336 if (error == 0) 1337 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1338 1339 out: 1340 tmpfs_update(vp, NULL, NULL, 0); 1341 1342 return error; 1343 } 1344