1 /* $NetBSD: tmpfs_subr.c,v 1.80 2013/10/04 15:14:11 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system: interfaces for inode and directory entry 35 * construction, destruction and manipulation. 36 * 37 * Reference counting 38 * 39 * The link count of inode (tmpfs_node_t::tn_links) is used as a 40 * reference counter. However, it has slightly different semantics. 41 * 42 * For directories - link count represents directory entries, which 43 * refer to the directories. In other words, it represents the count 44 * of sub-directories. It also takes into account the virtual '.' 45 * entry (which has no real entry in the list). For files - link count 46 * represents the hard links. Since only empty directories can be 47 * removed - link count aligns the reference counting requirements 48 * enough. Note: to check whether directory is not empty, the inode 49 * size (tmpfs_node_t::tn_size) can be used. 50 * 51 * The inode itself, as an object, gathers its first reference when 52 * directory entry is attached via tmpfs_dir_attach(9). For instance, 53 * after regular tmpfs_create(), a file would have a link count of 1, 54 * while directory after tmpfs_mkdir() would have 2 (due to '.'). 55 * 56 * Reclamation 57 * 58 * It should be noted that tmpfs inodes rely on a combination of vnode 59 * reference counting and link counting. That is, an inode can only be 60 * destroyed if its associated vnode is inactive. The destruction is 61 * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted 62 * that tmpfs_node_t::tn_links being 0 is a destruction criterion. 63 * 64 * If an inode has references within the file system (tn_links > 0) and 65 * its inactive vnode gets reclaimed/recycled - then the association is 66 * broken in tmpfs_reclaim(). In such case, an inode will always pass 67 * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode. 68 * 69 * Lock order 70 * 71 * tmpfs_node_t::tn_vlock -> 72 * vnode_t::v_vlock -> 73 * vnode_t::v_interlock 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.80 2013/10/04 15:14:11 rmind Exp $"); 78 79 #include <sys/param.h> 80 #include <sys/dirent.h> 81 #include <sys/event.h> 82 #include <sys/kmem.h> 83 #include <sys/mount.h> 84 #include <sys/namei.h> 85 #include <sys/time.h> 86 #include <sys/stat.h> 87 #include <sys/systm.h> 88 #include <sys/vnode.h> 89 #include <sys/kauth.h> 90 #include <sys/atomic.h> 91 92 #include <uvm/uvm.h> 93 94 #include <miscfs/specfs/specdev.h> 95 #include <miscfs/genfs/genfs.h> 96 #include <fs/tmpfs/tmpfs.h> 97 #include <fs/tmpfs/tmpfs_fifoops.h> 98 #include <fs/tmpfs/tmpfs_specops.h> 99 #include <fs/tmpfs/tmpfs_vnops.h> 100 101 /* 102 * tmpfs_alloc_node: allocate a new inode of a specified type and 103 * insert it into the list of specified mount point. 104 */ 105 int 106 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid, 107 mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node) 108 { 109 tmpfs_node_t *nnode; 110 111 nnode = tmpfs_node_get(tmp); 112 if (nnode == NULL) { 113 return ENOSPC; 114 } 115 116 /* Initially, no references and no associations. */ 117 nnode->tn_links = 0; 118 nnode->tn_vnode = NULL; 119 nnode->tn_dirent_hint = NULL; 120 121 /* 122 * XXX Where the pool is backed by a map larger than (4GB * 123 * sizeof(*nnode)), this may produce duplicate inode numbers 124 * for applications that do not understand 64-bit ino_t. 125 */ 126 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 127 nnode->tn_gen = TMPFS_NODE_GEN_MASK & random(); 128 129 /* Generic initialization. */ 130 nnode->tn_type = type; 131 nnode->tn_size = 0; 132 nnode->tn_status = 0; 133 nnode->tn_flags = 0; 134 nnode->tn_lockf = NULL; 135 136 vfs_timestamp(&nnode->tn_atime); 137 nnode->tn_birthtime = nnode->tn_atime; 138 nnode->tn_ctime = nnode->tn_atime; 139 nnode->tn_mtime = nnode->tn_atime; 140 141 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 142 nnode->tn_uid = uid; 143 nnode->tn_gid = gid; 144 nnode->tn_mode = mode; 145 146 /* Type-specific initialization. */ 147 switch (nnode->tn_type) { 148 case VBLK: 149 case VCHR: 150 /* Character/block special device. */ 151 KASSERT(rdev != VNOVAL); 152 nnode->tn_spec.tn_dev.tn_rdev = rdev; 153 break; 154 case VDIR: 155 /* Directory. */ 156 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 157 nnode->tn_spec.tn_dir.tn_parent = NULL; 158 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 159 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 160 161 /* Extra link count for the virtual '.' entry. */ 162 nnode->tn_links++; 163 break; 164 case VFIFO: 165 case VSOCK: 166 break; 167 case VLNK: 168 /* Symbolic link. Target specifies the file name. */ 169 KASSERT(target && strlen(target) < MAXPATHLEN); 170 171 nnode->tn_size = strlen(target); 172 if (nnode->tn_size == 0) { 173 nnode->tn_spec.tn_lnk.tn_link = NULL; 174 break; 175 } 176 nnode->tn_spec.tn_lnk.tn_link = 177 tmpfs_strname_alloc(tmp, nnode->tn_size); 178 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 179 tmpfs_node_put(tmp, nnode); 180 return ENOSPC; 181 } 182 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 183 break; 184 case VREG: 185 /* Regular file. Create an underlying UVM object. */ 186 nnode->tn_spec.tn_reg.tn_aobj = 187 uao_create(INT32_MAX - PAGE_SIZE, 0); 188 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 189 break; 190 default: 191 KASSERT(false); 192 } 193 194 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 195 196 mutex_enter(&tmp->tm_lock); 197 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 198 mutex_exit(&tmp->tm_lock); 199 200 *node = nnode; 201 return 0; 202 } 203 204 /* 205 * tmpfs_free_node: remove the inode from a list in the mount point and 206 * destroy the inode structures. 207 */ 208 void 209 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) 210 { 211 size_t objsz; 212 213 mutex_enter(&tmp->tm_lock); 214 LIST_REMOVE(node, tn_entries); 215 mutex_exit(&tmp->tm_lock); 216 217 switch (node->tn_type) { 218 case VLNK: 219 if (node->tn_size > 0) { 220 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 221 node->tn_size); 222 } 223 break; 224 case VREG: 225 /* 226 * Calculate the size of inode data, decrease the used-memory 227 * counter, and destroy the unerlying UVM object (if any). 228 */ 229 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 230 if (objsz != 0) { 231 tmpfs_mem_decr(tmp, objsz); 232 } 233 if (node->tn_spec.tn_reg.tn_aobj != NULL) { 234 uao_detach(node->tn_spec.tn_reg.tn_aobj); 235 } 236 break; 237 case VDIR: 238 /* 239 * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 240 * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 241 * node == tmp->tm_root); 242 */ 243 break; 244 default: 245 break; 246 } 247 248 mutex_destroy(&node->tn_vlock); 249 tmpfs_node_put(tmp, node); 250 } 251 252 /* 253 * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode. 254 * 255 * => Must be called with tmpfs_node_t::tn_vlock held. 256 * => Returns vnode (*vpp) locked. 257 */ 258 int 259 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp) 260 { 261 vnode_t *vp; 262 kmutex_t *slock; 263 int error; 264 again: 265 /* If there is already a vnode, try to reclaim it. */ 266 if ((vp = node->tn_vnode) != NULL) { 267 atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); 268 mutex_enter(vp->v_interlock); 269 mutex_exit(&node->tn_vlock); 270 error = vget(vp, LK_EXCLUSIVE); 271 if (error == ENOENT) { 272 mutex_enter(&node->tn_vlock); 273 goto again; 274 } 275 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 276 *vpp = vp; 277 return error; 278 } 279 if (TMPFS_NODE_RECLAIMING(node)) { 280 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 281 } 282 283 /* 284 * Get a new vnode and associate it with our inode. Share the 285 * lock with underlying UVM object, if there is one (VREG case). 286 */ 287 if (node->tn_type == VREG) { 288 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 289 slock = uobj->vmobjlock; 290 } else { 291 slock = NULL; 292 } 293 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, slock, &vp); 294 if (error) { 295 mutex_exit(&node->tn_vlock); 296 return error; 297 } 298 299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 300 vp->v_type = node->tn_type; 301 302 /* Type-specific initialization. */ 303 switch (node->tn_type) { 304 case VBLK: 305 case VCHR: 306 vp->v_op = tmpfs_specop_p; 307 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 308 break; 309 case VDIR: 310 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 311 VV_ROOT : 0; 312 break; 313 case VFIFO: 314 vp->v_op = tmpfs_fifoop_p; 315 break; 316 case VLNK: 317 case VREG: 318 case VSOCK: 319 break; 320 default: 321 KASSERT(false); 322 } 323 324 uvm_vnp_setsize(vp, node->tn_size); 325 vp->v_data = node; 326 node->tn_vnode = vp; 327 mutex_exit(&node->tn_vlock); 328 329 KASSERT(VOP_ISLOCKED(vp)); 330 *vpp = vp; 331 return 0; 332 } 333 334 /* 335 * tmpfs_alloc_file: allocate a new file of specified type and adds it 336 * into the parent directory. 337 * 338 * => Credentials of the caller are used. 339 */ 340 int 341 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap, 342 struct componentname *cnp, char *target) 343 { 344 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); 345 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; 346 tmpfs_dirent_t *de, *wde; 347 int error; 348 349 KASSERT(VOP_ISLOCKED(dvp)); 350 *vpp = NULL; 351 352 /* Check for the maximum number of links limit. */ 353 if (vap->va_type == VDIR) { 354 /* Check for maximum links limit. */ 355 if (dnode->tn_links == LINK_MAX) { 356 error = EMLINK; 357 goto out; 358 } 359 KASSERT(dnode->tn_links < LINK_MAX); 360 } 361 362 /* Allocate a node that represents the new file. */ 363 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 364 dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node); 365 if (error) 366 goto out; 367 368 /* Allocate a directory entry that points to the new file. */ 369 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); 370 if (error) { 371 tmpfs_free_node(tmp, node); 372 goto out; 373 } 374 375 /* Get a vnode for the new file. */ 376 mutex_enter(&node->tn_vlock); 377 error = tmpfs_vnode_get(dvp->v_mount, node, vpp); 378 if (error) { 379 tmpfs_free_dirent(tmp, de); 380 tmpfs_free_node(tmp, node); 381 goto out; 382 } 383 384 /* Remove whiteout before adding the new entry. */ 385 if (cnp->cn_flags & ISWHITEOUT) { 386 wde = tmpfs_dir_lookup(dnode, cnp); 387 KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT); 388 tmpfs_dir_detach(dvp, wde); 389 tmpfs_free_dirent(tmp, wde); 390 } 391 392 /* Associate inode and attach the entry into the directory. */ 393 tmpfs_dir_attach(dvp, de, node); 394 395 /* Make node opaque if requested. */ 396 if (cnp->cn_flags & ISWHITEOUT) 397 node->tn_flags |= UF_OPAQUE; 398 out: 399 vput(dvp); 400 return error; 401 } 402 403 /* 404 * tmpfs_alloc_dirent: allocates a new directory entry for the inode. 405 * The directory entry contains a path name component. 406 */ 407 int 408 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, 409 tmpfs_dirent_t **de) 410 { 411 tmpfs_dirent_t *nde; 412 413 nde = tmpfs_dirent_get(tmp); 414 if (nde == NULL) 415 return ENOSPC; 416 417 nde->td_name = tmpfs_strname_alloc(tmp, len); 418 if (nde->td_name == NULL) { 419 tmpfs_dirent_put(tmp, nde); 420 return ENOSPC; 421 } 422 nde->td_namelen = len; 423 memcpy(nde->td_name, name, len); 424 425 *de = nde; 426 return 0; 427 } 428 429 /* 430 * tmpfs_free_dirent: free a directory entry. 431 */ 432 void 433 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) 434 { 435 436 /* KASSERT(de->td_node == NULL); */ 437 tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 438 tmpfs_dirent_put(tmp, de); 439 } 440 441 /* 442 * tmpfs_dir_attach: associate directory entry with a specified inode, 443 * and attach the entry into the directory, specified by vnode. 444 * 445 * => Increases link count on the associated node. 446 * => Increases link count on directory node, if our node is VDIR. 447 * It is caller's responsibility to check for the LINK_MAX limit. 448 * => Triggers kqueue events here. 449 */ 450 void 451 tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node) 452 { 453 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 454 int events = NOTE_WRITE; 455 456 KASSERT(VOP_ISLOCKED(dvp)); 457 458 /* Associate directory entry and the inode. */ 459 de->td_node = node; 460 if (node != TMPFS_NODE_WHITEOUT) { 461 KASSERT(node->tn_links < LINK_MAX); 462 node->tn_links++; 463 464 /* Save the hint (might overwrite). */ 465 node->tn_dirent_hint = de; 466 } 467 468 /* Insert the entry to the directory (parent of inode). */ 469 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 470 dnode->tn_size += sizeof(tmpfs_dirent_t); 471 dnode->tn_status |= TMPFS_NODE_STATUSALL; 472 uvm_vnp_setsize(dvp, dnode->tn_size); 473 474 if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) { 475 /* Set parent. */ 476 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); 477 node->tn_spec.tn_dir.tn_parent = dnode; 478 479 /* Increase the link count of parent. */ 480 KASSERT(dnode->tn_links < LINK_MAX); 481 dnode->tn_links++; 482 events |= NOTE_LINK; 483 484 TMPFS_VALIDATE_DIR(node); 485 } 486 VN_KNOTE(dvp, events); 487 } 488 489 /* 490 * tmpfs_dir_detach: disassociate directory entry and its inode, 491 * and detach the entry from the directory, specified by vnode. 492 * 493 * => Decreases link count on the associated node. 494 * => Decreases the link count on directory node, if our node is VDIR. 495 * => Triggers kqueue events here. 496 */ 497 void 498 tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de) 499 { 500 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 501 tmpfs_node_t *node = de->td_node; 502 int events = NOTE_WRITE; 503 504 KASSERT(VOP_ISLOCKED(dvp)); 505 506 if (node != TMPFS_NODE_WHITEOUT) { 507 vnode_t *vp = node->tn_vnode; 508 509 KASSERT(VOP_ISLOCKED(vp)); 510 511 /* Deassociate the inode and entry. */ 512 de->td_node = NULL; 513 node->tn_dirent_hint = NULL; 514 515 KASSERT(node->tn_links > 0); 516 node->tn_links--; 517 VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE); 518 519 /* If directory - decrease the link count of parent. */ 520 if (node->tn_type == VDIR) { 521 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); 522 node->tn_spec.tn_dir.tn_parent = NULL; 523 524 KASSERT(dnode->tn_links > 0); 525 dnode->tn_links--; 526 events |= NOTE_LINK; 527 } 528 } 529 530 /* Remove the entry from the directory. */ 531 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 532 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 533 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 534 } 535 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 536 537 dnode->tn_size -= sizeof(tmpfs_dirent_t); 538 dnode->tn_status |= TMPFS_NODE_STATUSALL; 539 uvm_vnp_setsize(dvp, dnode->tn_size); 540 VN_KNOTE(dvp, events); 541 } 542 543 /* 544 * tmpfs_dir_lookup: find a directory entry in the specified inode. 545 * 546 * Note that the . and .. components are not allowed as they do not 547 * physically exist within directories. 548 */ 549 tmpfs_dirent_t * 550 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) 551 { 552 const char *name = cnp->cn_nameptr; 553 const uint16_t nlen = cnp->cn_namelen; 554 tmpfs_dirent_t *de; 555 556 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 557 KASSERT(nlen != 1 || !(name[0] == '.')); 558 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); 559 TMPFS_VALIDATE_DIR(node); 560 561 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 562 if (de->td_namelen != nlen) 563 continue; 564 if (memcmp(de->td_name, name, nlen) != 0) 565 continue; 566 break; 567 } 568 node->tn_status |= TMPFS_NODE_ACCESSED; 569 return de; 570 } 571 572 /* 573 * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to 574 * avoid unnecessary tmpds_dir_lookup(). 575 * 576 * => The vnode must be locked. 577 */ 578 tmpfs_dirent_t * 579 tmpfs_dir_cached(tmpfs_node_t *node) 580 { 581 tmpfs_dirent_t *de = node->tn_dirent_hint; 582 583 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 584 585 if (de == NULL) { 586 return NULL; 587 } 588 KASSERT(de->td_node == node); 589 590 /* 591 * Directories always have a valid hint. For files, check if there 592 * are any hard links. If there are - hint might be invalid. 593 */ 594 return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; 595 } 596 597 /* 598 * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a 599 * '.' entry for the given directory and returns it in the uio space. 600 */ 601 int 602 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio) 603 { 604 struct dirent *dentp; 605 int error; 606 607 TMPFS_VALIDATE_DIR(node); 608 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 609 610 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 611 dentp->d_fileno = node->tn_id; 612 dentp->d_type = DT_DIR; 613 dentp->d_namlen = 1; 614 dentp->d_name[0] = '.'; 615 dentp->d_name[1] = '\0'; 616 dentp->d_reclen = _DIRENT_SIZE(dentp); 617 618 if (dentp->d_reclen > uio->uio_resid) 619 error = -1; 620 else { 621 error = uiomove(dentp, dentp->d_reclen, uio); 622 if (error == 0) 623 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 624 } 625 node->tn_status |= TMPFS_NODE_ACCESSED; 626 kmem_free(dentp, sizeof(struct dirent)); 627 return error; 628 } 629 630 /* 631 * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a 632 * '..' entry for the given directory and returns it in the uio space. 633 */ 634 int 635 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio) 636 { 637 struct dirent *dentp; 638 int error; 639 640 TMPFS_VALIDATE_DIR(node); 641 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 642 643 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 644 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 645 dentp->d_type = DT_DIR; 646 dentp->d_namlen = 2; 647 dentp->d_name[0] = '.'; 648 dentp->d_name[1] = '.'; 649 dentp->d_name[2] = '\0'; 650 dentp->d_reclen = _DIRENT_SIZE(dentp); 651 652 if (dentp->d_reclen > uio->uio_resid) 653 error = -1; 654 else { 655 error = uiomove(dentp, dentp->d_reclen, uio); 656 if (error == 0) { 657 tmpfs_dirent_t *de; 658 659 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 660 if (de == NULL) 661 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 662 else 663 uio->uio_offset = tmpfs_dircookie(de); 664 } 665 } 666 node->tn_status |= TMPFS_NODE_ACCESSED; 667 kmem_free(dentp, sizeof(struct dirent)); 668 return error; 669 } 670 671 /* 672 * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie. 673 */ 674 tmpfs_dirent_t * 675 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie) 676 { 677 tmpfs_dirent_t *de; 678 679 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 680 681 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 682 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 683 return node->tn_spec.tn_dir.tn_readdir_lastp; 684 } 685 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 686 if (tmpfs_dircookie(de) == cookie) { 687 break; 688 } 689 } 690 return de; 691 } 692 693 /* 694 * tmpfs_dir_getdents: relper function for tmpfs_readdir. 695 * 696 * => Returns as much directory entries as can fit in the uio space. 697 * => The read starts at uio->uio_offset. 698 */ 699 int 700 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp) 701 { 702 tmpfs_dirent_t *de; 703 struct dirent *dentp; 704 off_t startcookie; 705 int error; 706 707 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 708 TMPFS_VALIDATE_DIR(node); 709 710 /* 711 * Locate the first directory entry we have to return. We have cached 712 * the last readdir in the node, so use those values if appropriate. 713 * Otherwise do a linear scan to find the requested entry. 714 */ 715 startcookie = uio->uio_offset; 716 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 717 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 718 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 719 return 0; 720 } else { 721 de = tmpfs_dir_lookupbycookie(node, startcookie); 722 } 723 if (de == NULL) { 724 return EINVAL; 725 } 726 727 /* 728 * Read as much entries as possible; i.e., until we reach the end 729 * of the directory or we exhaust uio space. 730 */ 731 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 732 do { 733 /* 734 * Create a dirent structure representing the current 735 * inode and fill it. 736 */ 737 if (de->td_node == TMPFS_NODE_WHITEOUT) { 738 dentp->d_fileno = 1; 739 dentp->d_type = DT_WHT; 740 } else { 741 dentp->d_fileno = de->td_node->tn_id; 742 switch (de->td_node->tn_type) { 743 case VBLK: 744 dentp->d_type = DT_BLK; 745 break; 746 case VCHR: 747 dentp->d_type = DT_CHR; 748 break; 749 case VDIR: 750 dentp->d_type = DT_DIR; 751 break; 752 case VFIFO: 753 dentp->d_type = DT_FIFO; 754 break; 755 case VLNK: 756 dentp->d_type = DT_LNK; 757 break; 758 case VREG: 759 dentp->d_type = DT_REG; 760 break; 761 case VSOCK: 762 dentp->d_type = DT_SOCK; 763 break; 764 default: 765 KASSERT(false); 766 } 767 } 768 dentp->d_namlen = de->td_namelen; 769 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 770 memcpy(dentp->d_name, de->td_name, de->td_namelen); 771 dentp->d_name[de->td_namelen] = '\0'; 772 dentp->d_reclen = _DIRENT_SIZE(dentp); 773 774 /* Stop reading if the directory entry we are treating is 775 * bigger than the amount of data that can be returned. */ 776 if (dentp->d_reclen > uio->uio_resid) { 777 error = -1; 778 break; 779 } 780 781 /* 782 * Copy the new dirent structure into the output buffer and 783 * advance pointers. 784 */ 785 error = uiomove(dentp, dentp->d_reclen, uio); 786 787 (*cntp)++; 788 de = TAILQ_NEXT(de, td_entries); 789 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 790 791 /* Update the offset and cache. */ 792 if (de == NULL) { 793 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 794 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 795 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 796 } else { 797 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 798 tmpfs_dircookie(de); 799 node->tn_spec.tn_dir.tn_readdir_lastp = de; 800 } 801 node->tn_status |= TMPFS_NODE_ACCESSED; 802 kmem_free(dentp, sizeof(struct dirent)); 803 return error; 804 } 805 806 /* 807 * tmpfs_reg_resize: resize the underlying UVM object associated with the 808 * specified regular file. 809 */ 810 int 811 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 812 { 813 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); 814 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 815 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 816 size_t newpages, oldpages; 817 off_t oldsize; 818 819 KASSERT(vp->v_type == VREG); 820 KASSERT(newsize >= 0); 821 822 oldsize = node->tn_size; 823 oldpages = round_page(oldsize) >> PAGE_SHIFT; 824 newpages = round_page(newsize) >> PAGE_SHIFT; 825 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 826 827 if (newpages > oldpages) { 828 /* Increase the used-memory counter if getting extra pages. */ 829 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) { 830 return ENOSPC; 831 } 832 } else if (newsize < oldsize) { 833 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 834 835 ubc_zerorange(uobj, newsize, zerolen, UBC_UNMAP_FLAG(vp)); 836 } 837 838 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 839 node->tn_size = newsize; 840 uvm_vnp_setsize(vp, newsize); 841 842 /* 843 * Free "backing store". 844 */ 845 if (newpages < oldpages) { 846 KASSERT(uobj->vmobjlock == vp->v_interlock); 847 848 mutex_enter(uobj->vmobjlock); 849 uao_dropswap_range(uobj, newpages, oldpages); 850 mutex_exit(uobj->vmobjlock); 851 852 /* Decrease the used-memory counter. */ 853 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 854 } 855 if (newsize > oldsize) { 856 VN_KNOTE(vp, NOTE_EXTEND); 857 } 858 return 0; 859 } 860 861 /* 862 * tmpfs_chflags: change flags of the given vnode. 863 * 864 * => Caller should perform tmpfs_update(). 865 */ 866 int 867 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l) 868 { 869 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 870 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 871 int error; 872 bool changing_sysflags = false; 873 874 KASSERT(VOP_ISLOCKED(vp)); 875 876 /* Disallow this operation if the file system is mounted read-only. */ 877 if (vp->v_mount->mnt_flag & MNT_RDONLY) 878 return EROFS; 879 880 /* 881 * If the new flags have non-user flags that are different than 882 * those on the node, we need special permission to change them. 883 */ 884 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 885 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 886 changing_sysflags = true; 887 } 888 889 /* 890 * Indicate that this node's flags have system attributes in them if 891 * that's the case. 892 */ 893 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 894 action |= KAUTH_VNODE_HAS_SYSFLAGS; 895 } 896 897 error = kauth_authorize_vnode(cred, action, vp, NULL, 898 genfs_can_chflags(cred, vp->v_type, node->tn_uid, 899 changing_sysflags)); 900 if (error) 901 return error; 902 903 /* 904 * Set the flags. If we're not setting non-user flags, be careful not 905 * to overwrite them. 906 * 907 * XXX: Can't we always assign here? if the system flags are different, 908 * the code above should catch attempts to change them without 909 * proper permissions, and if we're here it means it's okay to 910 * change them... 911 */ 912 if (!changing_sysflags) { 913 /* Clear all user-settable flags and re-set them. */ 914 node->tn_flags &= SF_SETTABLE; 915 node->tn_flags |= (flags & UF_SETTABLE); 916 } else { 917 node->tn_flags = flags; 918 } 919 node->tn_status |= TMPFS_NODE_CHANGED; 920 VN_KNOTE(vp, NOTE_ATTRIB); 921 return 0; 922 } 923 924 /* 925 * tmpfs_chmod: change access mode on the given vnode. 926 * 927 * => Caller should perform tmpfs_update(). 928 */ 929 int 930 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l) 931 { 932 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 933 int error; 934 935 KASSERT(VOP_ISLOCKED(vp)); 936 937 /* Disallow this operation if the file system is mounted read-only. */ 938 if (vp->v_mount->mnt_flag & MNT_RDONLY) 939 return EROFS; 940 941 /* Immutable or append-only files cannot be modified, either. */ 942 if (node->tn_flags & (IMMUTABLE | APPEND)) 943 return EPERM; 944 945 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 946 NULL, genfs_can_chmod(vp->v_type, cred, node->tn_uid, node->tn_gid, mode)); 947 if (error) { 948 return error; 949 } 950 node->tn_mode = (mode & ALLPERMS); 951 node->tn_status |= TMPFS_NODE_CHANGED; 952 VN_KNOTE(vp, NOTE_ATTRIB); 953 return 0; 954 } 955 956 /* 957 * tmpfs_chown: change ownership of the given vnode. 958 * 959 * => At least one of uid or gid must be different than VNOVAL. 960 * => Attribute is unchanged for VNOVAL case. 961 * => Caller should perform tmpfs_update(). 962 */ 963 int 964 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l) 965 { 966 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 967 int error; 968 969 KASSERT(VOP_ISLOCKED(vp)); 970 971 /* Assign default values if they are unknown. */ 972 KASSERT(uid != VNOVAL || gid != VNOVAL); 973 if (uid == VNOVAL) { 974 uid = node->tn_uid; 975 } 976 if (gid == VNOVAL) { 977 gid = node->tn_gid; 978 } 979 980 /* Disallow this operation if the file system is mounted read-only. */ 981 if (vp->v_mount->mnt_flag & MNT_RDONLY) 982 return EROFS; 983 984 /* Immutable or append-only files cannot be modified, either. */ 985 if (node->tn_flags & (IMMUTABLE | APPEND)) 986 return EPERM; 987 988 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 989 NULL, genfs_can_chown(cred, node->tn_uid, node->tn_gid, uid, 990 gid)); 991 if (error) { 992 return error; 993 } 994 node->tn_uid = uid; 995 node->tn_gid = gid; 996 node->tn_status |= TMPFS_NODE_CHANGED; 997 VN_KNOTE(vp, NOTE_ATTRIB); 998 return 0; 999 } 1000 1001 /* 1002 * tmpfs_chsize: change size of the given vnode. 1003 */ 1004 int 1005 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l) 1006 { 1007 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1008 1009 KASSERT(VOP_ISLOCKED(vp)); 1010 1011 /* Decide whether this is a valid operation based on the file type. */ 1012 switch (vp->v_type) { 1013 case VDIR: 1014 return EISDIR; 1015 case VREG: 1016 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 1017 return EROFS; 1018 } 1019 break; 1020 case VBLK: 1021 case VCHR: 1022 case VFIFO: 1023 /* 1024 * Allow modifications of special files even if in the file 1025 * system is mounted read-only (we are not modifying the 1026 * files themselves, but the objects they represent). 1027 */ 1028 return 0; 1029 default: 1030 return EOPNOTSUPP; 1031 } 1032 1033 /* Immutable or append-only files cannot be modified, either. */ 1034 if (node->tn_flags & (IMMUTABLE | APPEND)) { 1035 return EPERM; 1036 } 1037 1038 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */ 1039 return tmpfs_truncate(vp, size); 1040 } 1041 1042 /* 1043 * tmpfs_chtimes: change access and modification times for vnode. 1044 */ 1045 int 1046 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime, 1047 const struct timespec *mtime, const struct timespec *btime, 1048 int vaflags, kauth_cred_t cred, lwp_t *l) 1049 { 1050 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1051 int error; 1052 1053 KASSERT(VOP_ISLOCKED(vp)); 1054 1055 /* Disallow this operation if the file system is mounted read-only. */ 1056 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1057 return EROFS; 1058 1059 /* Immutable or append-only files cannot be modified, either. */ 1060 if (node->tn_flags & (IMMUTABLE | APPEND)) 1061 return EPERM; 1062 1063 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1064 genfs_can_chtimes(vp, vaflags, node->tn_uid, cred)); 1065 if (error) 1066 return error; 1067 1068 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1069 node->tn_status |= TMPFS_NODE_ACCESSED; 1070 1071 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1072 node->tn_status |= TMPFS_NODE_MODIFIED; 1073 1074 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL) 1075 btime = NULL; 1076 1077 tmpfs_update(vp, atime, mtime, btime, 0); 1078 VN_KNOTE(vp, NOTE_ATTRIB); 1079 return 0; 1080 } 1081 1082 /* 1083 * tmpfs_update: update timestamps, et al. 1084 */ 1085 void 1086 tmpfs_update(vnode_t *vp, const struct timespec *acc, 1087 const struct timespec *mod, const struct timespec *birth, int flags) 1088 { 1089 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1090 struct timespec nowtm; 1091 1092 /* KASSERT(VOP_ISLOCKED(vp)); */ 1093 1094 if (flags & UPDATE_CLOSE) { 1095 /* XXX Need to do anything special? */ 1096 } 1097 if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) { 1098 return; 1099 } 1100 if (birth != NULL) { 1101 node->tn_birthtime = *birth; 1102 } 1103 vfs_timestamp(&nowtm); 1104 1105 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1106 node->tn_atime = acc ? *acc : nowtm; 1107 } 1108 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1109 node->tn_mtime = mod ? *mod : nowtm; 1110 } 1111 if (node->tn_status & TMPFS_NODE_CHANGED) { 1112 node->tn_ctime = nowtm; 1113 } 1114 1115 node->tn_status &= ~TMPFS_NODE_STATUSALL; 1116 } 1117 1118 int 1119 tmpfs_truncate(vnode_t *vp, off_t length) 1120 { 1121 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1122 int error; 1123 1124 if (length < 0) { 1125 error = EINVAL; 1126 goto out; 1127 } 1128 if (node->tn_size == length) { 1129 error = 0; 1130 goto out; 1131 } 1132 error = tmpfs_reg_resize(vp, length); 1133 if (error == 0) { 1134 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1135 } 1136 out: 1137 tmpfs_update(vp, NULL, NULL, NULL, 0); 1138 return error; 1139 } 1140