1 /* $NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $ */ 2 3 /* 4 * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system: interfaces for inode and directory entry 35 * construction, destruction and manipulation. 36 * 37 * Reference counting 38 * 39 * The link count of inode (tmpfs_node_t::tn_links) is used as a 40 * reference counter. However, it has slightly different semantics. 41 * 42 * For directories - link count represents directory entries, which 43 * refer to the directories. In other words, it represents the count 44 * of sub-directories. It also takes into account the virtual '.' 45 * entry (which has no real entry in the list). For files - link count 46 * represents the hard links. Since only empty directories can be 47 * removed - link count aligns the reference counting requirements 48 * enough. Note: to check whether directory is not empty, the inode 49 * size (tmpfs_node_t::tn_size) can be used. 50 * 51 * The inode itself, as an object, gathers its first reference when 52 * directory entry is attached via tmpfs_dir_attach(9). For instance, 53 * after regular tmpfs_create(), a file would have a link count of 1, 54 * while directory after tmpfs_mkdir() would have 2 (due to '.'). 55 * 56 * Reclamation 57 * 58 * It should be noted that tmpfs inodes rely on a combination of vnode 59 * reference counting and link counting. That is, an inode can only be 60 * destroyed if its associated vnode is inactive. The destruction is 61 * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted 62 * that tmpfs_node_t::tn_links being 0 is a destruction criterion. 63 * 64 * If an inode has references within the file system (tn_links > 0) and 65 * its inactive vnode gets reclaimed/recycled - then the association is 66 * broken in tmpfs_reclaim(). In such case, an inode will always pass 67 * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode. 68 * 69 * Lock order 70 * 71 * tmpfs_node_t::tn_vlock -> 72 * vnode_t::v_vlock -> 73 * vnode_t::v_interlock 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $"); 78 79 #include <sys/param.h> 80 #include <sys/dirent.h> 81 #include <sys/event.h> 82 #include <sys/kmem.h> 83 #include <sys/mount.h> 84 #include <sys/namei.h> 85 #include <sys/time.h> 86 #include <sys/stat.h> 87 #include <sys/systm.h> 88 #include <sys/vnode.h> 89 #include <sys/kauth.h> 90 #include <sys/atomic.h> 91 92 #include <uvm/uvm.h> 93 94 #include <miscfs/specfs/specdev.h> 95 #include <miscfs/genfs/genfs.h> 96 #include <fs/tmpfs/tmpfs.h> 97 #include <fs/tmpfs/tmpfs_fifoops.h> 98 #include <fs/tmpfs/tmpfs_specops.h> 99 #include <fs/tmpfs/tmpfs_vnops.h> 100 101 /* 102 * tmpfs_alloc_node: allocate a new inode of a specified type and 103 * insert it into the list of specified mount point. 104 */ 105 int 106 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid, 107 mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node) 108 { 109 tmpfs_node_t *nnode; 110 111 nnode = tmpfs_node_get(tmp); 112 if (nnode == NULL) { 113 return ENOSPC; 114 } 115 116 /* Initially, no references and no associations. */ 117 nnode->tn_links = 0; 118 nnode->tn_vnode = NULL; 119 nnode->tn_dirent_hint = NULL; 120 121 /* 122 * XXX Where the pool is backed by a map larger than (4GB * 123 * sizeof(*nnode)), this may produce duplicate inode numbers 124 * for applications that do not understand 64-bit ino_t. 125 */ 126 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 127 nnode->tn_gen = TMPFS_NODE_GEN_MASK & random(); 128 129 /* Generic initialization. */ 130 nnode->tn_type = type; 131 nnode->tn_size = 0; 132 nnode->tn_status = 0; 133 nnode->tn_flags = 0; 134 nnode->tn_lockf = NULL; 135 136 vfs_timestamp(&nnode->tn_atime); 137 nnode->tn_birthtime = nnode->tn_atime; 138 nnode->tn_ctime = nnode->tn_atime; 139 nnode->tn_mtime = nnode->tn_atime; 140 141 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 142 nnode->tn_uid = uid; 143 nnode->tn_gid = gid; 144 nnode->tn_mode = mode; 145 146 /* Type-specific initialization. */ 147 switch (nnode->tn_type) { 148 case VBLK: 149 case VCHR: 150 /* Character/block special device. */ 151 KASSERT(rdev != VNOVAL); 152 nnode->tn_spec.tn_dev.tn_rdev = rdev; 153 break; 154 case VDIR: 155 /* Directory. */ 156 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 157 nnode->tn_spec.tn_dir.tn_parent = NULL; 158 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 159 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 160 161 /* Extra link count for the virtual '.' entry. */ 162 nnode->tn_links++; 163 break; 164 case VFIFO: 165 case VSOCK: 166 break; 167 case VLNK: 168 /* Symbolic link. Target specifies the file name. */ 169 KASSERT(target && strlen(target) < MAXPATHLEN); 170 171 nnode->tn_size = strlen(target); 172 if (nnode->tn_size == 0) { 173 nnode->tn_spec.tn_lnk.tn_link = NULL; 174 break; 175 } 176 nnode->tn_spec.tn_lnk.tn_link = 177 tmpfs_strname_alloc(tmp, nnode->tn_size); 178 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 179 tmpfs_node_put(tmp, nnode); 180 return ENOSPC; 181 } 182 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 183 break; 184 case VREG: 185 /* Regular file. Create an underlying UVM object. */ 186 nnode->tn_spec.tn_reg.tn_aobj = 187 uao_create(INT32_MAX - PAGE_SIZE, 0); 188 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 189 break; 190 default: 191 KASSERT(false); 192 } 193 194 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 195 196 mutex_enter(&tmp->tm_lock); 197 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 198 mutex_exit(&tmp->tm_lock); 199 200 *node = nnode; 201 return 0; 202 } 203 204 /* 205 * tmpfs_free_node: remove the inode from a list in the mount point and 206 * destroy the inode structures. 207 */ 208 void 209 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) 210 { 211 size_t objsz; 212 213 mutex_enter(&tmp->tm_lock); 214 LIST_REMOVE(node, tn_entries); 215 mutex_exit(&tmp->tm_lock); 216 217 switch (node->tn_type) { 218 case VLNK: 219 if (node->tn_size > 0) { 220 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 221 node->tn_size); 222 } 223 break; 224 case VREG: 225 /* 226 * Calculate the size of inode data, decrease the used-memory 227 * counter, and destroy the unerlying UVM object (if any). 228 */ 229 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 230 if (objsz != 0) { 231 tmpfs_mem_decr(tmp, objsz); 232 } 233 if (node->tn_spec.tn_reg.tn_aobj != NULL) { 234 uao_detach(node->tn_spec.tn_reg.tn_aobj); 235 } 236 break; 237 case VDIR: 238 /* 239 * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 240 * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 241 * node == tmp->tm_root); 242 */ 243 break; 244 default: 245 break; 246 } 247 248 mutex_destroy(&node->tn_vlock); 249 tmpfs_node_put(tmp, node); 250 } 251 252 /* 253 * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode. 254 * 255 * => Must be called with tmpfs_node_t::tn_vlock held. 256 * => Returns vnode (*vpp) locked. 257 */ 258 int 259 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp) 260 { 261 vnode_t *vp; 262 kmutex_t *slock; 263 int error; 264 again: 265 /* If there is already a vnode, try to reclaim it. */ 266 if ((vp = node->tn_vnode) != NULL) { 267 atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); 268 mutex_enter(vp->v_interlock); 269 mutex_exit(&node->tn_vlock); 270 error = vget(vp, LK_EXCLUSIVE); 271 if (error == ENOENT) { 272 mutex_enter(&node->tn_vlock); 273 goto again; 274 } 275 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 276 *vpp = vp; 277 return error; 278 } 279 if (TMPFS_NODE_RECLAIMING(node)) { 280 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 281 } 282 283 /* 284 * Get a new vnode and associate it with our inode. Share the 285 * lock with underlying UVM object, if there is one (VREG case). 286 */ 287 if (node->tn_type == VREG) { 288 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 289 slock = uobj->vmobjlock; 290 } else { 291 slock = NULL; 292 } 293 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, slock, &vp); 294 if (error) { 295 mutex_exit(&node->tn_vlock); 296 return error; 297 } 298 299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 300 vp->v_type = node->tn_type; 301 302 /* Type-specific initialization. */ 303 switch (node->tn_type) { 304 case VBLK: 305 case VCHR: 306 vp->v_op = tmpfs_specop_p; 307 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 308 break; 309 case VDIR: 310 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 311 VV_ROOT : 0; 312 break; 313 case VFIFO: 314 vp->v_op = tmpfs_fifoop_p; 315 break; 316 case VLNK: 317 case VREG: 318 case VSOCK: 319 break; 320 default: 321 KASSERT(false); 322 } 323 324 uvm_vnp_setsize(vp, node->tn_size); 325 vp->v_data = node; 326 node->tn_vnode = vp; 327 mutex_exit(&node->tn_vlock); 328 329 KASSERT(VOP_ISLOCKED(vp)); 330 *vpp = vp; 331 return 0; 332 } 333 334 /* 335 * tmpfs_alloc_file: allocate a new file of specified type and adds it 336 * into the parent directory. 337 * 338 * => Credentials of the caller are used. 339 */ 340 int 341 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap, 342 struct componentname *cnp, char *target) 343 { 344 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); 345 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; 346 tmpfs_dirent_t *de, *wde; 347 int error; 348 349 KASSERT(VOP_ISLOCKED(dvp)); 350 *vpp = NULL; 351 352 /* Check for the maximum number of links limit. */ 353 if (vap->va_type == VDIR) { 354 /* Check for maximum links limit. */ 355 if (dnode->tn_links == LINK_MAX) { 356 error = EMLINK; 357 goto out; 358 } 359 KASSERT(dnode->tn_links < LINK_MAX); 360 } 361 362 /* Allocate a node that represents the new file. */ 363 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 364 dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node); 365 if (error) 366 goto out; 367 368 /* Allocate a directory entry that points to the new file. */ 369 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); 370 if (error) { 371 tmpfs_free_node(tmp, node); 372 goto out; 373 } 374 375 /* Get a vnode for the new file. */ 376 mutex_enter(&node->tn_vlock); 377 error = tmpfs_vnode_get(dvp->v_mount, node, vpp); 378 if (error) { 379 tmpfs_free_dirent(tmp, de); 380 tmpfs_free_node(tmp, node); 381 goto out; 382 } 383 384 /* Remove whiteout before adding the new entry. */ 385 if (cnp->cn_flags & ISWHITEOUT) { 386 wde = tmpfs_dir_lookup(dnode, cnp); 387 KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT); 388 tmpfs_dir_detach(dvp, wde); 389 tmpfs_free_dirent(tmp, wde); 390 } 391 392 /* Associate inode and attach the entry into the directory. */ 393 tmpfs_dir_attach(dvp, de, node); 394 395 /* Make node opaque if requested. */ 396 if (cnp->cn_flags & ISWHITEOUT) 397 node->tn_flags |= UF_OPAQUE; 398 out: 399 vput(dvp); 400 return error; 401 } 402 403 /* 404 * tmpfs_alloc_dirent: allocates a new directory entry for the inode. 405 * The directory entry contains a path name component. 406 */ 407 int 408 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, 409 tmpfs_dirent_t **de) 410 { 411 tmpfs_dirent_t *nde; 412 413 nde = tmpfs_dirent_get(tmp); 414 if (nde == NULL) 415 return ENOSPC; 416 417 nde->td_name = tmpfs_strname_alloc(tmp, len); 418 if (nde->td_name == NULL) { 419 tmpfs_dirent_put(tmp, nde); 420 return ENOSPC; 421 } 422 nde->td_namelen = len; 423 memcpy(nde->td_name, name, len); 424 425 *de = nde; 426 return 0; 427 } 428 429 /* 430 * tmpfs_free_dirent: free a directory entry. 431 */ 432 void 433 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) 434 { 435 436 /* KASSERT(de->td_node == NULL); */ 437 tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 438 tmpfs_dirent_put(tmp, de); 439 } 440 441 /* 442 * tmpfs_dir_attach: associate directory entry with a specified inode, 443 * and attach the entry into the directory, specified by vnode. 444 * 445 * => Increases link count on the associated node. 446 * => Increases link count on directory node, if our node is VDIR. 447 * It is caller's responsibility to check for the LINK_MAX limit. 448 * => Triggers kqueue events here. 449 */ 450 void 451 tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node) 452 { 453 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 454 int events = NOTE_WRITE; 455 456 KASSERT(VOP_ISLOCKED(dvp)); 457 458 /* Associate directory entry and the inode. */ 459 de->td_node = node; 460 if (node != TMPFS_NODE_WHITEOUT) { 461 KASSERT(node->tn_links < LINK_MAX); 462 node->tn_links++; 463 464 /* Save the hint (might overwrite). */ 465 node->tn_dirent_hint = de; 466 } 467 468 /* Insert the entry to the directory (parent of inode). */ 469 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 470 dnode->tn_size += sizeof(tmpfs_dirent_t); 471 dnode->tn_status |= TMPFS_NODE_STATUSALL; 472 uvm_vnp_setsize(dvp, dnode->tn_size); 473 474 if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) { 475 /* Set parent. */ 476 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); 477 node->tn_spec.tn_dir.tn_parent = dnode; 478 479 /* Increase the link count of parent. */ 480 KASSERT(dnode->tn_links < LINK_MAX); 481 dnode->tn_links++; 482 events |= NOTE_LINK; 483 484 TMPFS_VALIDATE_DIR(node); 485 } 486 VN_KNOTE(dvp, events); 487 } 488 489 /* 490 * tmpfs_dir_detach: disassociate directory entry and its inode, 491 * and detach the entry from the directory, specified by vnode. 492 * 493 * => Decreases link count on the associated node. 494 * => Decreases the link count on directory node, if our node is VDIR. 495 * => Triggers kqueue events here. 496 */ 497 void 498 tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de) 499 { 500 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 501 tmpfs_node_t *node = de->td_node; 502 int events = NOTE_WRITE; 503 504 KASSERT(VOP_ISLOCKED(dvp)); 505 506 if (node != TMPFS_NODE_WHITEOUT) { 507 vnode_t *vp = node->tn_vnode; 508 509 KASSERT(VOP_ISLOCKED(vp)); 510 511 /* Deassociate the inode and entry. */ 512 de->td_node = NULL; 513 node->tn_dirent_hint = NULL; 514 515 KASSERT(node->tn_links > 0); 516 node->tn_links--; 517 if (vp) { 518 VN_KNOTE(vp, node->tn_links ? 519 NOTE_LINK : NOTE_DELETE); 520 } 521 522 /* If directory - decrease the link count of parent. */ 523 if (node->tn_type == VDIR) { 524 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); 525 node->tn_spec.tn_dir.tn_parent = NULL; 526 527 KASSERT(dnode->tn_links > 0); 528 dnode->tn_links--; 529 events |= NOTE_LINK; 530 } 531 } 532 533 /* Remove the entry from the directory. */ 534 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 535 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 536 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 537 } 538 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 539 540 dnode->tn_size -= sizeof(tmpfs_dirent_t); 541 dnode->tn_status |= TMPFS_NODE_STATUSALL; 542 uvm_vnp_setsize(dvp, dnode->tn_size); 543 VN_KNOTE(dvp, events); 544 } 545 546 /* 547 * tmpfs_dir_lookup: find a directory entry in the specified inode. 548 * 549 * Note that the . and .. components are not allowed as they do not 550 * physically exist within directories. 551 */ 552 tmpfs_dirent_t * 553 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) 554 { 555 const char *name = cnp->cn_nameptr; 556 const uint16_t nlen = cnp->cn_namelen; 557 tmpfs_dirent_t *de; 558 559 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 560 KASSERT(nlen != 1 || !(name[0] == '.')); 561 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); 562 TMPFS_VALIDATE_DIR(node); 563 564 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 565 if (de->td_namelen != nlen) 566 continue; 567 if (memcmp(de->td_name, name, nlen) != 0) 568 continue; 569 break; 570 } 571 node->tn_status |= TMPFS_NODE_ACCESSED; 572 return de; 573 } 574 575 /* 576 * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to 577 * avoid unnecessary tmpds_dir_lookup(). 578 * 579 * => The vnode must be locked. 580 */ 581 tmpfs_dirent_t * 582 tmpfs_dir_cached(tmpfs_node_t *node) 583 { 584 tmpfs_dirent_t *de = node->tn_dirent_hint; 585 586 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 587 588 if (de == NULL) { 589 return NULL; 590 } 591 KASSERT(de->td_node == node); 592 593 /* 594 * Directories always have a valid hint. For files, check if there 595 * are any hard links. If there are - hint might be invalid. 596 */ 597 return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; 598 } 599 600 /* 601 * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a 602 * '.' entry for the given directory and returns it in the uio space. 603 */ 604 int 605 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio) 606 { 607 struct dirent *dentp; 608 int error; 609 610 TMPFS_VALIDATE_DIR(node); 611 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 612 613 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 614 dentp->d_fileno = node->tn_id; 615 dentp->d_type = DT_DIR; 616 dentp->d_namlen = 1; 617 dentp->d_name[0] = '.'; 618 dentp->d_name[1] = '\0'; 619 dentp->d_reclen = _DIRENT_SIZE(dentp); 620 621 if (dentp->d_reclen > uio->uio_resid) 622 error = -1; 623 else { 624 error = uiomove(dentp, dentp->d_reclen, uio); 625 if (error == 0) 626 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 627 } 628 node->tn_status |= TMPFS_NODE_ACCESSED; 629 kmem_free(dentp, sizeof(struct dirent)); 630 return error; 631 } 632 633 /* 634 * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a 635 * '..' entry for the given directory and returns it in the uio space. 636 */ 637 int 638 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio) 639 { 640 struct dirent *dentp; 641 int error; 642 643 TMPFS_VALIDATE_DIR(node); 644 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 645 646 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 647 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 648 dentp->d_type = DT_DIR; 649 dentp->d_namlen = 2; 650 dentp->d_name[0] = '.'; 651 dentp->d_name[1] = '.'; 652 dentp->d_name[2] = '\0'; 653 dentp->d_reclen = _DIRENT_SIZE(dentp); 654 655 if (dentp->d_reclen > uio->uio_resid) 656 error = -1; 657 else { 658 error = uiomove(dentp, dentp->d_reclen, uio); 659 if (error == 0) { 660 tmpfs_dirent_t *de; 661 662 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 663 if (de == NULL) 664 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 665 else 666 uio->uio_offset = tmpfs_dircookie(de); 667 } 668 } 669 node->tn_status |= TMPFS_NODE_ACCESSED; 670 kmem_free(dentp, sizeof(struct dirent)); 671 return error; 672 } 673 674 /* 675 * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie. 676 */ 677 tmpfs_dirent_t * 678 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie) 679 { 680 tmpfs_dirent_t *de; 681 682 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 683 684 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 685 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 686 return node->tn_spec.tn_dir.tn_readdir_lastp; 687 } 688 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 689 if (tmpfs_dircookie(de) == cookie) { 690 break; 691 } 692 } 693 return de; 694 } 695 696 /* 697 * tmpfs_dir_getdents: relper function for tmpfs_readdir. 698 * 699 * => Returns as much directory entries as can fit in the uio space. 700 * => The read starts at uio->uio_offset. 701 */ 702 int 703 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp) 704 { 705 tmpfs_dirent_t *de; 706 struct dirent *dentp; 707 off_t startcookie; 708 int error; 709 710 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 711 TMPFS_VALIDATE_DIR(node); 712 713 /* 714 * Locate the first directory entry we have to return. We have cached 715 * the last readdir in the node, so use those values if appropriate. 716 * Otherwise do a linear scan to find the requested entry. 717 */ 718 startcookie = uio->uio_offset; 719 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 720 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 721 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 722 return 0; 723 } else { 724 de = tmpfs_dir_lookupbycookie(node, startcookie); 725 } 726 if (de == NULL) { 727 return EINVAL; 728 } 729 730 /* 731 * Read as much entries as possible; i.e., until we reach the end 732 * of the directory or we exhaust uio space. 733 */ 734 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 735 do { 736 /* 737 * Create a dirent structure representing the current 738 * inode and fill it. 739 */ 740 if (de->td_node == TMPFS_NODE_WHITEOUT) { 741 dentp->d_fileno = 1; 742 dentp->d_type = DT_WHT; 743 } else { 744 dentp->d_fileno = de->td_node->tn_id; 745 switch (de->td_node->tn_type) { 746 case VBLK: 747 dentp->d_type = DT_BLK; 748 break; 749 case VCHR: 750 dentp->d_type = DT_CHR; 751 break; 752 case VDIR: 753 dentp->d_type = DT_DIR; 754 break; 755 case VFIFO: 756 dentp->d_type = DT_FIFO; 757 break; 758 case VLNK: 759 dentp->d_type = DT_LNK; 760 break; 761 case VREG: 762 dentp->d_type = DT_REG; 763 break; 764 case VSOCK: 765 dentp->d_type = DT_SOCK; 766 break; 767 default: 768 KASSERT(false); 769 } 770 } 771 dentp->d_namlen = de->td_namelen; 772 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 773 memcpy(dentp->d_name, de->td_name, de->td_namelen); 774 dentp->d_name[de->td_namelen] = '\0'; 775 dentp->d_reclen = _DIRENT_SIZE(dentp); 776 777 /* Stop reading if the directory entry we are treating is 778 * bigger than the amount of data that can be returned. */ 779 if (dentp->d_reclen > uio->uio_resid) { 780 error = -1; 781 break; 782 } 783 784 /* 785 * Copy the new dirent structure into the output buffer and 786 * advance pointers. 787 */ 788 error = uiomove(dentp, dentp->d_reclen, uio); 789 790 (*cntp)++; 791 de = TAILQ_NEXT(de, td_entries); 792 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 793 794 /* Update the offset and cache. */ 795 if (de == NULL) { 796 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 797 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 798 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 799 } else { 800 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 801 tmpfs_dircookie(de); 802 node->tn_spec.tn_dir.tn_readdir_lastp = de; 803 } 804 node->tn_status |= TMPFS_NODE_ACCESSED; 805 kmem_free(dentp, sizeof(struct dirent)); 806 return error; 807 } 808 809 /* 810 * tmpfs_reg_resize: resize the underlying UVM object associated with the 811 * specified regular file. 812 */ 813 int 814 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 815 { 816 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); 817 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 818 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 819 size_t newpages, oldpages; 820 off_t oldsize; 821 822 KASSERT(vp->v_type == VREG); 823 KASSERT(newsize >= 0); 824 825 oldsize = node->tn_size; 826 oldpages = round_page(oldsize) >> PAGE_SHIFT; 827 newpages = round_page(newsize) >> PAGE_SHIFT; 828 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 829 830 if (newpages > oldpages) { 831 /* Increase the used-memory counter if getting extra pages. */ 832 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) { 833 return ENOSPC; 834 } 835 } else if (newsize < oldsize) { 836 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 837 838 ubc_zerorange(uobj, newsize, zerolen, UBC_UNMAP_FLAG(vp)); 839 } 840 841 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 842 node->tn_size = newsize; 843 uvm_vnp_setsize(vp, newsize); 844 845 /* 846 * Free "backing store". 847 */ 848 if (newpages < oldpages) { 849 KASSERT(uobj->vmobjlock == vp->v_interlock); 850 851 mutex_enter(uobj->vmobjlock); 852 uao_dropswap_range(uobj, newpages, oldpages); 853 mutex_exit(uobj->vmobjlock); 854 855 /* Decrease the used-memory counter. */ 856 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 857 } 858 if (newsize > oldsize) { 859 VN_KNOTE(vp, NOTE_EXTEND); 860 } 861 return 0; 862 } 863 864 /* 865 * tmpfs_chflags: change flags of the given vnode. 866 * 867 * => Caller should perform tmpfs_update(). 868 */ 869 int 870 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l) 871 { 872 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 873 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 874 int error; 875 bool changing_sysflags = false; 876 877 KASSERT(VOP_ISLOCKED(vp)); 878 879 /* Disallow this operation if the file system is mounted read-only. */ 880 if (vp->v_mount->mnt_flag & MNT_RDONLY) 881 return EROFS; 882 883 /* 884 * If the new flags have non-user flags that are different than 885 * those on the node, we need special permission to change them. 886 */ 887 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 888 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 889 changing_sysflags = true; 890 } 891 892 /* 893 * Indicate that this node's flags have system attributes in them if 894 * that's the case. 895 */ 896 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 897 action |= KAUTH_VNODE_HAS_SYSFLAGS; 898 } 899 900 error = kauth_authorize_vnode(cred, action, vp, NULL, 901 genfs_can_chflags(cred, vp->v_type, node->tn_uid, 902 changing_sysflags)); 903 if (error) 904 return error; 905 906 /* 907 * Set the flags. If we're not setting non-user flags, be careful not 908 * to overwrite them. 909 * 910 * XXX: Can't we always assign here? if the system flags are different, 911 * the code above should catch attempts to change them without 912 * proper permissions, and if we're here it means it's okay to 913 * change them... 914 */ 915 if (!changing_sysflags) { 916 /* Clear all user-settable flags and re-set them. */ 917 node->tn_flags &= SF_SETTABLE; 918 node->tn_flags |= (flags & UF_SETTABLE); 919 } else { 920 node->tn_flags = flags; 921 } 922 node->tn_status |= TMPFS_NODE_CHANGED; 923 VN_KNOTE(vp, NOTE_ATTRIB); 924 return 0; 925 } 926 927 /* 928 * tmpfs_chmod: change access mode on the given vnode. 929 * 930 * => Caller should perform tmpfs_update(). 931 */ 932 int 933 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l) 934 { 935 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 936 int error; 937 938 KASSERT(VOP_ISLOCKED(vp)); 939 940 /* Disallow this operation if the file system is mounted read-only. */ 941 if (vp->v_mount->mnt_flag & MNT_RDONLY) 942 return EROFS; 943 944 /* Immutable or append-only files cannot be modified, either. */ 945 if (node->tn_flags & (IMMUTABLE | APPEND)) 946 return EPERM; 947 948 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 949 NULL, genfs_can_chmod(vp->v_type, cred, node->tn_uid, node->tn_gid, mode)); 950 if (error) { 951 return error; 952 } 953 node->tn_mode = (mode & ALLPERMS); 954 node->tn_status |= TMPFS_NODE_CHANGED; 955 VN_KNOTE(vp, NOTE_ATTRIB); 956 return 0; 957 } 958 959 /* 960 * tmpfs_chown: change ownership of the given vnode. 961 * 962 * => At least one of uid or gid must be different than VNOVAL. 963 * => Attribute is unchanged for VNOVAL case. 964 * => Caller should perform tmpfs_update(). 965 */ 966 int 967 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l) 968 { 969 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 970 int error; 971 972 KASSERT(VOP_ISLOCKED(vp)); 973 974 /* Assign default values if they are unknown. */ 975 KASSERT(uid != VNOVAL || gid != VNOVAL); 976 if (uid == VNOVAL) { 977 uid = node->tn_uid; 978 } 979 if (gid == VNOVAL) { 980 gid = node->tn_gid; 981 } 982 983 /* Disallow this operation if the file system is mounted read-only. */ 984 if (vp->v_mount->mnt_flag & MNT_RDONLY) 985 return EROFS; 986 987 /* Immutable or append-only files cannot be modified, either. */ 988 if (node->tn_flags & (IMMUTABLE | APPEND)) 989 return EPERM; 990 991 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 992 NULL, genfs_can_chown(cred, node->tn_uid, node->tn_gid, uid, 993 gid)); 994 if (error) { 995 return error; 996 } 997 node->tn_uid = uid; 998 node->tn_gid = gid; 999 node->tn_status |= TMPFS_NODE_CHANGED; 1000 VN_KNOTE(vp, NOTE_ATTRIB); 1001 return 0; 1002 } 1003 1004 /* 1005 * tmpfs_chsize: change size of the given vnode. 1006 */ 1007 int 1008 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l) 1009 { 1010 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1011 1012 KASSERT(VOP_ISLOCKED(vp)); 1013 1014 /* Decide whether this is a valid operation based on the file type. */ 1015 switch (vp->v_type) { 1016 case VDIR: 1017 return EISDIR; 1018 case VREG: 1019 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 1020 return EROFS; 1021 } 1022 break; 1023 case VBLK: 1024 case VCHR: 1025 case VFIFO: 1026 /* 1027 * Allow modifications of special files even if in the file 1028 * system is mounted read-only (we are not modifying the 1029 * files themselves, but the objects they represent). 1030 */ 1031 return 0; 1032 default: 1033 return EOPNOTSUPP; 1034 } 1035 1036 /* Immutable or append-only files cannot be modified, either. */ 1037 if (node->tn_flags & (IMMUTABLE | APPEND)) { 1038 return EPERM; 1039 } 1040 1041 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */ 1042 return tmpfs_truncate(vp, size); 1043 } 1044 1045 /* 1046 * tmpfs_chtimes: change access and modification times for vnode. 1047 */ 1048 int 1049 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime, 1050 const struct timespec *mtime, const struct timespec *btime, 1051 int vaflags, kauth_cred_t cred, lwp_t *l) 1052 { 1053 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1054 int error; 1055 1056 KASSERT(VOP_ISLOCKED(vp)); 1057 1058 /* Disallow this operation if the file system is mounted read-only. */ 1059 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1060 return EROFS; 1061 1062 /* Immutable or append-only files cannot be modified, either. */ 1063 if (node->tn_flags & (IMMUTABLE | APPEND)) 1064 return EPERM; 1065 1066 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1067 genfs_can_chtimes(vp, vaflags, node->tn_uid, cred)); 1068 if (error) 1069 return error; 1070 1071 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1072 node->tn_status |= TMPFS_NODE_ACCESSED; 1073 1074 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1075 node->tn_status |= TMPFS_NODE_MODIFIED; 1076 1077 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL) 1078 btime = NULL; 1079 1080 tmpfs_update(vp, atime, mtime, btime, 0); 1081 VN_KNOTE(vp, NOTE_ATTRIB); 1082 return 0; 1083 } 1084 1085 /* 1086 * tmpfs_update: update timestamps, et al. 1087 */ 1088 void 1089 tmpfs_update(vnode_t *vp, const struct timespec *acc, 1090 const struct timespec *mod, const struct timespec *birth, int flags) 1091 { 1092 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1093 struct timespec nowtm; 1094 1095 /* KASSERT(VOP_ISLOCKED(vp)); */ 1096 1097 if (flags & UPDATE_CLOSE) { 1098 /* XXX Need to do anything special? */ 1099 } 1100 if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) { 1101 return; 1102 } 1103 if (birth != NULL) { 1104 node->tn_birthtime = *birth; 1105 } 1106 vfs_timestamp(&nowtm); 1107 1108 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1109 node->tn_atime = acc ? *acc : nowtm; 1110 } 1111 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1112 node->tn_mtime = mod ? *mod : nowtm; 1113 } 1114 if (node->tn_status & TMPFS_NODE_CHANGED) { 1115 node->tn_ctime = nowtm; 1116 } 1117 1118 node->tn_status &= ~TMPFS_NODE_STATUSALL; 1119 } 1120 1121 int 1122 tmpfs_truncate(vnode_t *vp, off_t length) 1123 { 1124 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1125 int error; 1126 1127 if (length < 0) { 1128 error = EINVAL; 1129 goto out; 1130 } 1131 if (node->tn_size == length) { 1132 error = 0; 1133 goto out; 1134 } 1135 error = tmpfs_reg_resize(vp, length); 1136 if (error == 0) { 1137 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1138 } 1139 out: 1140 tmpfs_update(vp, NULL, NULL, NULL, 0); 1141 return error; 1142 } 1143