1 /* $NetBSD: tmpfs_subr.c,v 1.76 2011/06/30 00:37:07 enami Exp $ */ 2 3 /* 4 * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system: interfaces for inode and directory entry 35 * construction, destruction and manipulation. 36 * 37 * Reference counting 38 * 39 * The link count of inode (tmpfs_node_t::tn_links) is used as a 40 * reference counter. However, it has slightly different semantics. 41 * 42 * For directories - link count represents directory entries, which 43 * refer to the directories. In other words, it represents the count 44 * of sub-directories. It also takes into account the virtual '.' 45 * entry (which has no real entry in the list). For files - link count 46 * represents the hard links. Since only empty directories can be 47 * removed - link count aligns the reference counting requirements 48 * enough. Note: to check whether directory is not empty, the inode 49 * size (tmpfs_node_t::tn_size) can be used. 50 * 51 * The inode itself, as an object, gathers its first reference when 52 * directory entry is attached via tmpfs_dir_attach(9). For instance, 53 * after regular tmpfs_create(), a file would have a link count of 1, 54 * while directory after tmpfs_mkdir() would have 2 (due to '.'). 55 * 56 * Reclamation 57 * 58 * It should be noted that tmpfs inodes rely on a combination of vnode 59 * reference counting and link counting. That is, an inode can only be 60 * destroyed if its associated vnode is inactive. The destruction is 61 * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted 62 * that tmpfs_node_t::tn_links being 0 is a destruction criterion. 63 * 64 * If an inode has references within the file system (tn_links > 0) and 65 * its inactive vnode gets reclaimed/recycled - then the association is 66 * broken in tmpfs_reclaim(). In such case, an inode will always pass 67 * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode. 68 * 69 * Lock order 70 * 71 * tmpfs_node_t::tn_vlock -> 72 * vnode_t::v_vlock -> 73 * vnode_t::v_interlock 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.76 2011/06/30 00:37:07 enami Exp $"); 78 79 #include <sys/param.h> 80 #include <sys/dirent.h> 81 #include <sys/event.h> 82 #include <sys/kmem.h> 83 #include <sys/mount.h> 84 #include <sys/namei.h> 85 #include <sys/time.h> 86 #include <sys/stat.h> 87 #include <sys/systm.h> 88 #include <sys/vnode.h> 89 #include <sys/kauth.h> 90 #include <sys/atomic.h> 91 92 #include <uvm/uvm.h> 93 94 #include <miscfs/specfs/specdev.h> 95 #include <miscfs/genfs/genfs.h> 96 #include <fs/tmpfs/tmpfs.h> 97 #include <fs/tmpfs/tmpfs_fifoops.h> 98 #include <fs/tmpfs/tmpfs_specops.h> 99 #include <fs/tmpfs/tmpfs_vnops.h> 100 101 /* 102 * tmpfs_alloc_node: allocate a new inode of a specified type and 103 * insert it into the list of specified mount point. 104 */ 105 int 106 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid, 107 mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node) 108 { 109 tmpfs_node_t *nnode; 110 111 nnode = tmpfs_node_get(tmp); 112 if (nnode == NULL) { 113 return ENOSPC; 114 } 115 116 /* Initially, no references and no associations. */ 117 nnode->tn_links = 0; 118 nnode->tn_vnode = NULL; 119 nnode->tn_dirent_hint = NULL; 120 121 /* 122 * XXX Where the pool is backed by a map larger than (4GB * 123 * sizeof(*nnode)), this may produce duplicate inode numbers 124 * for applications that do not understand 64-bit ino_t. 125 */ 126 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); 127 nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random(); 128 129 /* Generic initialization. */ 130 nnode->tn_type = type; 131 nnode->tn_size = 0; 132 nnode->tn_status = 0; 133 nnode->tn_flags = 0; 134 nnode->tn_lockf = NULL; 135 136 vfs_timestamp(&nnode->tn_atime); 137 nnode->tn_birthtime = nnode->tn_atime; 138 nnode->tn_ctime = nnode->tn_atime; 139 nnode->tn_mtime = nnode->tn_atime; 140 141 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 142 nnode->tn_uid = uid; 143 nnode->tn_gid = gid; 144 nnode->tn_mode = mode; 145 146 /* Type-specific initialization. */ 147 switch (nnode->tn_type) { 148 case VBLK: 149 case VCHR: 150 /* Character/block special device. */ 151 KASSERT(rdev != VNOVAL); 152 nnode->tn_spec.tn_dev.tn_rdev = rdev; 153 break; 154 case VDIR: 155 /* Directory. */ 156 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 157 nnode->tn_spec.tn_dir.tn_parent = NULL; 158 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 159 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 160 161 /* Extra link count for the virtual '.' entry. */ 162 nnode->tn_links++; 163 break; 164 case VFIFO: 165 case VSOCK: 166 break; 167 case VLNK: 168 /* Symbolic link. Target specifies the file name. */ 169 KASSERT(target && strlen(target) < MAXPATHLEN); 170 171 nnode->tn_size = strlen(target); 172 if (nnode->tn_size == 0) { 173 nnode->tn_spec.tn_lnk.tn_link = NULL; 174 break; 175 } 176 nnode->tn_spec.tn_lnk.tn_link = 177 tmpfs_strname_alloc(tmp, nnode->tn_size); 178 if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 179 tmpfs_node_put(tmp, nnode); 180 return ENOSPC; 181 } 182 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 183 break; 184 case VREG: 185 /* Regular file. Create an underlying UVM object. */ 186 nnode->tn_spec.tn_reg.tn_aobj = 187 uao_create(INT32_MAX - PAGE_SIZE, 0); 188 nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 189 break; 190 default: 191 KASSERT(false); 192 } 193 194 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); 195 196 mutex_enter(&tmp->tm_lock); 197 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 198 mutex_exit(&tmp->tm_lock); 199 200 *node = nnode; 201 return 0; 202 } 203 204 /* 205 * tmpfs_free_node: remove the inode from a list in the mount point and 206 * destroy the inode structures. 207 */ 208 void 209 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) 210 { 211 size_t objsz; 212 213 mutex_enter(&tmp->tm_lock); 214 LIST_REMOVE(node, tn_entries); 215 mutex_exit(&tmp->tm_lock); 216 217 switch (node->tn_type) { 218 case VLNK: 219 if (node->tn_size > 0) { 220 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 221 node->tn_size); 222 } 223 break; 224 case VREG: 225 /* 226 * Calculate the size of inode data, decrease the used-memory 227 * counter, and destroy the unerlying UVM object (if any). 228 */ 229 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 230 if (objsz != 0) { 231 tmpfs_mem_decr(tmp, objsz); 232 } 233 if (node->tn_spec.tn_reg.tn_aobj != NULL) { 234 uao_detach(node->tn_spec.tn_reg.tn_aobj); 235 } 236 break; 237 case VDIR: 238 /* 239 * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 240 * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 241 * node == tmp->tm_root); 242 */ 243 break; 244 default: 245 break; 246 } 247 248 mutex_destroy(&node->tn_vlock); 249 tmpfs_node_put(tmp, node); 250 } 251 252 /* 253 * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode. 254 * 255 * => Must be called with tmpfs_node_t::tn_vlock held. 256 * => Returns vnode (*vpp) locked. 257 */ 258 int 259 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp) 260 { 261 vnode_t *vp; 262 kmutex_t *slock; 263 int error; 264 again: 265 /* If there is already a vnode, try to reclaim it. */ 266 if ((vp = node->tn_vnode) != NULL) { 267 atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); 268 mutex_enter(vp->v_interlock); 269 mutex_exit(&node->tn_vlock); 270 error = vget(vp, LK_EXCLUSIVE); 271 if (error == ENOENT) { 272 mutex_enter(&node->tn_vlock); 273 goto again; 274 } 275 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 276 *vpp = vp; 277 return error; 278 } 279 if (TMPFS_NODE_RECLAIMING(node)) { 280 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); 281 } 282 283 /* 284 * Get a new vnode and associate it with our inode. Share the 285 * lock with underlying UVM object, if there is one (VREG case). 286 */ 287 if (node->tn_type == VREG) { 288 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 289 slock = uobj->vmobjlock; 290 } else { 291 slock = NULL; 292 } 293 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, slock, &vp); 294 if (error) { 295 mutex_exit(&node->tn_vlock); 296 return error; 297 } 298 299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 300 vp->v_type = node->tn_type; 301 302 /* Type-specific initialization. */ 303 switch (node->tn_type) { 304 case VBLK: 305 case VCHR: 306 vp->v_op = tmpfs_specop_p; 307 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 308 break; 309 case VDIR: 310 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ? 311 VV_ROOT : 0; 312 break; 313 case VFIFO: 314 vp->v_op = tmpfs_fifoop_p; 315 break; 316 case VLNK: 317 case VREG: 318 case VSOCK: 319 break; 320 default: 321 KASSERT(false); 322 } 323 324 uvm_vnp_setsize(vp, node->tn_size); 325 vp->v_data = node; 326 node->tn_vnode = vp; 327 mutex_exit(&node->tn_vlock); 328 329 KASSERT(VOP_ISLOCKED(vp)); 330 *vpp = vp; 331 return 0; 332 } 333 334 /* 335 * tmpfs_alloc_file: allocate a new file of specified type and adds it 336 * into the parent directory. 337 * 338 * => Credentials of the caller are used. 339 */ 340 int 341 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap, 342 struct componentname *cnp, char *target) 343 { 344 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); 345 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; 346 tmpfs_dirent_t *de; 347 int error; 348 349 KASSERT(VOP_ISLOCKED(dvp)); 350 *vpp = NULL; 351 352 /* Check for the maximum number of links limit. */ 353 if (vap->va_type == VDIR) { 354 /* Check for maximum links limit. */ 355 if (dnode->tn_links == LINK_MAX) { 356 error = EMLINK; 357 goto out; 358 } 359 KASSERT(dnode->tn_links < LINK_MAX); 360 } 361 362 /* Allocate a node that represents the new file. */ 363 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred), 364 dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node); 365 if (error) 366 goto out; 367 368 /* Allocate a directory entry that points to the new file. */ 369 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); 370 if (error) { 371 tmpfs_free_node(tmp, node); 372 goto out; 373 } 374 375 /* Get a vnode for the new file. */ 376 mutex_enter(&node->tn_vlock); 377 error = tmpfs_vnode_get(dvp->v_mount, node, vpp); 378 if (error) { 379 tmpfs_free_dirent(tmp, de); 380 tmpfs_free_node(tmp, node); 381 goto out; 382 } 383 384 /* Associate inode and attach the entry into the directory. */ 385 tmpfs_dir_attach(dvp, de, node); 386 out: 387 vput(dvp); 388 return error; 389 } 390 391 /* 392 * tmpfs_alloc_dirent: allocates a new directory entry for the inode. 393 * The directory entry contains a path name component. 394 */ 395 int 396 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, 397 tmpfs_dirent_t **de) 398 { 399 tmpfs_dirent_t *nde; 400 401 nde = tmpfs_dirent_get(tmp); 402 if (nde == NULL) 403 return ENOSPC; 404 405 nde->td_name = tmpfs_strname_alloc(tmp, len); 406 if (nde->td_name == NULL) { 407 tmpfs_dirent_put(tmp, nde); 408 return ENOSPC; 409 } 410 nde->td_namelen = len; 411 memcpy(nde->td_name, name, len); 412 413 *de = nde; 414 return 0; 415 } 416 417 /* 418 * tmpfs_free_dirent: free a directory entry. 419 */ 420 void 421 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) 422 { 423 424 /* KASSERT(de->td_node == NULL); */ 425 tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 426 tmpfs_dirent_put(tmp, de); 427 } 428 429 /* 430 * tmpfs_dir_attach: associate directory entry with a specified inode, 431 * and attach the entry into the directory, specified by vnode. 432 * 433 * => Increases link count on the associated node. 434 * => Increases link count on directory node, if our node is VDIR. 435 * It is caller's responsibility to check for the LINK_MAX limit. 436 * => Triggers kqueue events here. 437 */ 438 void 439 tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node) 440 { 441 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 442 int events = NOTE_WRITE; 443 444 KASSERT(VOP_ISLOCKED(dvp)); 445 446 /* Associate directory entry and the inode. */ 447 if (node != TMPFS_NODE_WHITEOUT) { 448 de->td_node = node; 449 KASSERT(node->tn_links < LINK_MAX); 450 node->tn_links++; 451 452 /* Save the hint (might overwrite). */ 453 node->tn_dirent_hint = de; 454 } 455 456 /* Insert the entry to the directory (parent of inode). */ 457 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 458 dnode->tn_size += sizeof(tmpfs_dirent_t); 459 dnode->tn_status |= TMPFS_NODE_STATUSALL; 460 uvm_vnp_setsize(dvp, dnode->tn_size); 461 462 if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) { 463 /* Set parent. */ 464 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); 465 node->tn_spec.tn_dir.tn_parent = dnode; 466 467 /* Increase the link count of parent. */ 468 KASSERT(dnode->tn_links < LINK_MAX); 469 dnode->tn_links++; 470 events |= NOTE_LINK; 471 472 TMPFS_VALIDATE_DIR(node); 473 } 474 VN_KNOTE(dvp, events); 475 } 476 477 /* 478 * tmpfs_dir_detach: disassociate directory entry and its inode, 479 * and detach the entry from the directory, specified by vnode. 480 * 481 * => Decreases link count on the associated node. 482 * => Decreases the link count on directory node, if our node is VDIR. 483 * => Triggers kqueue events here. 484 */ 485 void 486 tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de) 487 { 488 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); 489 tmpfs_node_t *node = de->td_node; 490 int events = NOTE_WRITE; 491 492 KASSERT(VOP_ISLOCKED(dvp)); 493 494 if (node != TMPFS_NODE_WHITEOUT) { 495 vnode_t *vp = node->tn_vnode; 496 497 KASSERT(VOP_ISLOCKED(vp)); 498 499 /* Deassociate the inode and entry. */ 500 de->td_node = NULL; 501 node->tn_dirent_hint = NULL; 502 503 KASSERT(node->tn_links > 0); 504 node->tn_links--; 505 if (vp) { 506 VN_KNOTE(vp, node->tn_links ? 507 NOTE_LINK : NOTE_DELETE); 508 } 509 510 /* If directory - decrease the link count of parent. */ 511 if (node->tn_type == VDIR) { 512 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); 513 node->tn_spec.tn_dir.tn_parent = NULL; 514 515 KASSERT(dnode->tn_links > 0); 516 dnode->tn_links--; 517 events |= NOTE_LINK; 518 } 519 } 520 521 /* Remove the entry from the directory. */ 522 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 523 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; 524 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 525 } 526 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 527 528 dnode->tn_size -= sizeof(tmpfs_dirent_t); 529 dnode->tn_status |= TMPFS_NODE_STATUSALL; 530 uvm_vnp_setsize(dvp, dnode->tn_size); 531 VN_KNOTE(dvp, events); 532 } 533 534 /* 535 * tmpfs_dir_lookup: find a directory entry in the specified inode. 536 * 537 * Note that the . and .. components are not allowed as they do not 538 * physically exist within directories. 539 */ 540 tmpfs_dirent_t * 541 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) 542 { 543 const char *name = cnp->cn_nameptr; 544 const uint16_t nlen = cnp->cn_namelen; 545 tmpfs_dirent_t *de; 546 547 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 548 KASSERT(nlen != 1 || !(name[0] == '.')); 549 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); 550 TMPFS_VALIDATE_DIR(node); 551 552 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 553 if (de->td_namelen != nlen) 554 continue; 555 if (memcmp(de->td_name, name, nlen) != 0) 556 continue; 557 break; 558 } 559 node->tn_status |= TMPFS_NODE_ACCESSED; 560 return de; 561 } 562 563 /* 564 * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to 565 * avoid unnecessary tmpds_dir_lookup(). 566 * 567 * => The vnode must be locked. 568 */ 569 tmpfs_dirent_t * 570 tmpfs_dir_cached(tmpfs_node_t *node) 571 { 572 tmpfs_dirent_t *de = node->tn_dirent_hint; 573 574 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 575 576 if (de == NULL) { 577 return NULL; 578 } 579 KASSERT(de->td_node == node); 580 581 /* 582 * Directories always have a valid hint. For files, check if there 583 * are any hard links. If there are - hint might be invalid. 584 */ 585 return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; 586 } 587 588 /* 589 * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a 590 * '.' entry for the given directory and returns it in the uio space. 591 */ 592 int 593 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio) 594 { 595 struct dirent *dentp; 596 int error; 597 598 TMPFS_VALIDATE_DIR(node); 599 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 600 601 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 602 dentp->d_fileno = node->tn_id; 603 dentp->d_type = DT_DIR; 604 dentp->d_namlen = 1; 605 dentp->d_name[0] = '.'; 606 dentp->d_name[1] = '\0'; 607 dentp->d_reclen = _DIRENT_SIZE(dentp); 608 609 if (dentp->d_reclen > uio->uio_resid) 610 error = -1; 611 else { 612 error = uiomove(dentp, dentp->d_reclen, uio); 613 if (error == 0) 614 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 615 } 616 node->tn_status |= TMPFS_NODE_ACCESSED; 617 kmem_free(dentp, sizeof(struct dirent)); 618 return error; 619 } 620 621 /* 622 * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a 623 * '..' entry for the given directory and returns it in the uio space. 624 */ 625 int 626 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio) 627 { 628 struct dirent *dentp; 629 int error; 630 631 TMPFS_VALIDATE_DIR(node); 632 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 633 634 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 635 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 636 dentp->d_type = DT_DIR; 637 dentp->d_namlen = 2; 638 dentp->d_name[0] = '.'; 639 dentp->d_name[1] = '.'; 640 dentp->d_name[2] = '\0'; 641 dentp->d_reclen = _DIRENT_SIZE(dentp); 642 643 if (dentp->d_reclen > uio->uio_resid) 644 error = -1; 645 else { 646 error = uiomove(dentp, dentp->d_reclen, uio); 647 if (error == 0) { 648 tmpfs_dirent_t *de; 649 650 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 651 if (de == NULL) 652 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 653 else 654 uio->uio_offset = tmpfs_dircookie(de); 655 } 656 } 657 node->tn_status |= TMPFS_NODE_ACCESSED; 658 kmem_free(dentp, sizeof(struct dirent)); 659 return error; 660 } 661 662 /* 663 * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie. 664 */ 665 tmpfs_dirent_t * 666 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie) 667 { 668 tmpfs_dirent_t *de; 669 670 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 671 672 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && 673 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { 674 return node->tn_spec.tn_dir.tn_readdir_lastp; 675 } 676 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 677 if (tmpfs_dircookie(de) == cookie) { 678 break; 679 } 680 } 681 return de; 682 } 683 684 /* 685 * tmpfs_dir_getdents: relper function for tmpfs_readdir. 686 * 687 * => Returns as much directory entries as can fit in the uio space. 688 * => The read starts at uio->uio_offset. 689 */ 690 int 691 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp) 692 { 693 tmpfs_dirent_t *de; 694 struct dirent *dentp; 695 off_t startcookie; 696 int error; 697 698 KASSERT(VOP_ISLOCKED(node->tn_vnode)); 699 TMPFS_VALIDATE_DIR(node); 700 701 /* 702 * Locate the first directory entry we have to return. We have cached 703 * the last readdir in the node, so use those values if appropriate. 704 * Otherwise do a linear scan to find the requested entry. 705 */ 706 startcookie = uio->uio_offset; 707 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 708 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 709 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 710 return 0; 711 } else { 712 de = tmpfs_dir_lookupbycookie(node, startcookie); 713 } 714 if (de == NULL) { 715 return EINVAL; 716 } 717 718 /* 719 * Read as much entries as possible; i.e., until we reach the end 720 * of the directory or we exhaust uio space. 721 */ 722 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); 723 do { 724 /* 725 * Create a dirent structure representing the current 726 * inode and fill it. 727 */ 728 if (de->td_node == TMPFS_NODE_WHITEOUT) { 729 dentp->d_fileno = 1; 730 dentp->d_type = DT_WHT; 731 } else { 732 dentp->d_fileno = de->td_node->tn_id; 733 switch (de->td_node->tn_type) { 734 case VBLK: 735 dentp->d_type = DT_BLK; 736 break; 737 case VCHR: 738 dentp->d_type = DT_CHR; 739 break; 740 case VDIR: 741 dentp->d_type = DT_DIR; 742 break; 743 case VFIFO: 744 dentp->d_type = DT_FIFO; 745 break; 746 case VLNK: 747 dentp->d_type = DT_LNK; 748 break; 749 case VREG: 750 dentp->d_type = DT_REG; 751 break; 752 case VSOCK: 753 dentp->d_type = DT_SOCK; 754 break; 755 default: 756 KASSERT(false); 757 } 758 } 759 dentp->d_namlen = de->td_namelen; 760 KASSERT(de->td_namelen < sizeof(dentp->d_name)); 761 memcpy(dentp->d_name, de->td_name, de->td_namelen); 762 dentp->d_name[de->td_namelen] = '\0'; 763 dentp->d_reclen = _DIRENT_SIZE(dentp); 764 765 /* Stop reading if the directory entry we are treating is 766 * bigger than the amount of data that can be returned. */ 767 if (dentp->d_reclen > uio->uio_resid) { 768 error = -1; 769 break; 770 } 771 772 /* 773 * Copy the new dirent structure into the output buffer and 774 * advance pointers. 775 */ 776 error = uiomove(dentp, dentp->d_reclen, uio); 777 778 (*cntp)++; 779 de = TAILQ_NEXT(de, td_entries); 780 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 781 782 /* Update the offset and cache. */ 783 if (de == NULL) { 784 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 785 node->tn_spec.tn_dir.tn_readdir_lastn = 0; 786 node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 787 } else { 788 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = 789 tmpfs_dircookie(de); 790 node->tn_spec.tn_dir.tn_readdir_lastp = de; 791 } 792 node->tn_status |= TMPFS_NODE_ACCESSED; 793 kmem_free(dentp, sizeof(struct dirent)); 794 return error; 795 } 796 797 /* 798 * tmpfs_reg_resize: resize the underlying UVM object associated with the 799 * specified regular file. 800 */ 801 int 802 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 803 { 804 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); 805 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 806 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 807 size_t newpages, oldpages; 808 off_t oldsize; 809 810 KASSERT(vp->v_type == VREG); 811 KASSERT(newsize >= 0); 812 813 oldsize = node->tn_size; 814 oldpages = round_page(oldsize) >> PAGE_SHIFT; 815 newpages = round_page(newsize) >> PAGE_SHIFT; 816 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 817 818 if (newpages > oldpages) { 819 /* Increase the used-memory counter if getting extra pages. */ 820 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) { 821 return ENOSPC; 822 } 823 } else if (newsize < oldsize) { 824 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 825 826 ubc_zerorange(uobj, newsize, zerolen, UBC_UNMAP_FLAG(vp)); 827 } 828 829 node->tn_spec.tn_reg.tn_aobj_pages = newpages; 830 node->tn_size = newsize; 831 uvm_vnp_setsize(vp, newsize); 832 833 /* 834 * Free "backing store". 835 */ 836 if (newpages < oldpages) { 837 KASSERT(uobj->vmobjlock == vp->v_interlock); 838 839 mutex_enter(uobj->vmobjlock); 840 uao_dropswap_range(uobj, newpages, oldpages); 841 mutex_exit(uobj->vmobjlock); 842 843 /* Decrease the used-memory counter. */ 844 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 845 } 846 if (newsize > oldsize) { 847 VN_KNOTE(vp, NOTE_EXTEND); 848 } 849 return 0; 850 } 851 852 /* 853 * tmpfs_chflags: change flags of the given vnode. 854 * 855 * => Caller should perform tmpfs_update(). 856 */ 857 int 858 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l) 859 { 860 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 861 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 862 int error, fs_decision = 0; 863 864 KASSERT(VOP_ISLOCKED(vp)); 865 866 /* Disallow this operation if the file system is mounted read-only. */ 867 if (vp->v_mount->mnt_flag & MNT_RDONLY) 868 return EROFS; 869 870 if (kauth_cred_geteuid(cred) != node->tn_uid) { 871 fs_decision = EACCES; 872 } 873 874 /* 875 * If the new flags have non-user flags that are different than 876 * those on the node, we need special permission to change them. 877 */ 878 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 879 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 880 if (!fs_decision) { 881 fs_decision = EPERM; 882 } 883 } 884 885 /* 886 * Indicate that this node's flags have system attributes in them if 887 * that's the case. 888 */ 889 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 890 action |= KAUTH_VNODE_HAS_SYSFLAGS; 891 } 892 893 error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision); 894 if (error) 895 return error; 896 897 /* 898 * Set the flags. If we're not setting non-user flags, be careful not 899 * to overwrite them. 900 * 901 * XXX: Can't we always assign here? if the system flags are different, 902 * the code above should catch attempts to change them without 903 * proper permissions, and if we're here it means it's okay to 904 * change them... 905 */ 906 if ((action & KAUTH_VNODE_WRITE_SYSFLAGS) == 0) { 907 /* Clear all user-settable flags and re-set them. */ 908 node->tn_flags &= SF_SETTABLE; 909 node->tn_flags |= (flags & UF_SETTABLE); 910 } else { 911 node->tn_flags = flags; 912 } 913 node->tn_status |= TMPFS_NODE_CHANGED; 914 VN_KNOTE(vp, NOTE_ATTRIB); 915 return 0; 916 } 917 918 /* 919 * tmpfs_chmod: change access mode on the given vnode. 920 * 921 * => Caller should perform tmpfs_update(). 922 */ 923 int 924 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l) 925 { 926 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 927 int error; 928 929 KASSERT(VOP_ISLOCKED(vp)); 930 931 /* Disallow this operation if the file system is mounted read-only. */ 932 if (vp->v_mount->mnt_flag & MNT_RDONLY) 933 return EROFS; 934 935 /* Immutable or append-only files cannot be modified, either. */ 936 if (node->tn_flags & (IMMUTABLE | APPEND)) 937 return EPERM; 938 939 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 940 NULL, genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, mode)); 941 if (error) { 942 return error; 943 } 944 node->tn_mode = (mode & ALLPERMS); 945 node->tn_status |= TMPFS_NODE_CHANGED; 946 VN_KNOTE(vp, NOTE_ATTRIB); 947 return 0; 948 } 949 950 /* 951 * tmpfs_chown: change ownership of the given vnode. 952 * 953 * => At least one of uid or gid must be different than VNOVAL. 954 * => Attribute is unchanged for VNOVAL case. 955 * => Caller should perform tmpfs_update(). 956 */ 957 int 958 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l) 959 { 960 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 961 int error; 962 963 KASSERT(VOP_ISLOCKED(vp)); 964 965 /* Assign default values if they are unknown. */ 966 KASSERT(uid != VNOVAL || gid != VNOVAL); 967 if (uid == VNOVAL) { 968 uid = node->tn_uid; 969 } 970 if (gid == VNOVAL) { 971 gid = node->tn_gid; 972 } 973 974 /* Disallow this operation if the file system is mounted read-only. */ 975 if (vp->v_mount->mnt_flag & MNT_RDONLY) 976 return EROFS; 977 978 /* Immutable or append-only files cannot be modified, either. */ 979 if (node->tn_flags & (IMMUTABLE | APPEND)) 980 return EPERM; 981 982 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 983 NULL, genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid, 984 gid)); 985 if (error) { 986 return error; 987 } 988 node->tn_uid = uid; 989 node->tn_gid = gid; 990 node->tn_status |= TMPFS_NODE_CHANGED; 991 VN_KNOTE(vp, NOTE_ATTRIB); 992 return 0; 993 } 994 995 /* 996 * tmpfs_chsize: change size of the given vnode. 997 */ 998 int 999 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l) 1000 { 1001 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1002 1003 KASSERT(VOP_ISLOCKED(vp)); 1004 1005 /* Decide whether this is a valid operation based on the file type. */ 1006 switch (vp->v_type) { 1007 case VDIR: 1008 return EISDIR; 1009 case VREG: 1010 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 1011 return EROFS; 1012 } 1013 break; 1014 case VBLK: 1015 case VCHR: 1016 case VFIFO: 1017 /* 1018 * Allow modifications of special files even if in the file 1019 * system is mounted read-only (we are not modifying the 1020 * files themselves, but the objects they represent). 1021 */ 1022 return 0; 1023 default: 1024 return EOPNOTSUPP; 1025 } 1026 1027 /* Immutable or append-only files cannot be modified, either. */ 1028 if (node->tn_flags & (IMMUTABLE | APPEND)) { 1029 return EPERM; 1030 } 1031 1032 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */ 1033 return tmpfs_truncate(vp, size); 1034 } 1035 1036 /* 1037 * tmpfs_chtimes: change access and modification times for vnode. 1038 */ 1039 int 1040 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime, 1041 const struct timespec *mtime, const struct timespec *btime, 1042 int vaflags, kauth_cred_t cred, lwp_t *l) 1043 { 1044 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1045 int error; 1046 1047 KASSERT(VOP_ISLOCKED(vp)); 1048 1049 /* Disallow this operation if the file system is mounted read-only. */ 1050 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1051 return EROFS; 1052 1053 /* Immutable or append-only files cannot be modified, either. */ 1054 if (node->tn_flags & (IMMUTABLE | APPEND)) 1055 return EPERM; 1056 1057 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1058 genfs_can_chtimes(vp, vaflags, node->tn_uid, cred)); 1059 if (error) 1060 return error; 1061 1062 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1063 node->tn_status |= TMPFS_NODE_ACCESSED; 1064 1065 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1066 node->tn_status |= TMPFS_NODE_MODIFIED; 1067 1068 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL) 1069 btime = NULL; 1070 1071 tmpfs_update(vp, atime, mtime, btime, 0); 1072 VN_KNOTE(vp, NOTE_ATTRIB); 1073 return 0; 1074 } 1075 1076 /* 1077 * tmpfs_update: update timestamps, et al. 1078 */ 1079 void 1080 tmpfs_update(vnode_t *vp, const struct timespec *acc, 1081 const struct timespec *mod, const struct timespec *birth, int flags) 1082 { 1083 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1084 struct timespec nowtm; 1085 1086 /* KASSERT(VOP_ISLOCKED(vp)); */ 1087 1088 if (flags & UPDATE_CLOSE) { 1089 /* XXX Need to do anything special? */ 1090 } 1091 if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) { 1092 return; 1093 } 1094 if (birth != NULL) { 1095 node->tn_birthtime = *birth; 1096 } 1097 vfs_timestamp(&nowtm); 1098 1099 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1100 node->tn_atime = acc ? *acc : nowtm; 1101 } 1102 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1103 node->tn_mtime = mod ? *mod : nowtm; 1104 } 1105 if (node->tn_status & TMPFS_NODE_CHANGED) { 1106 node->tn_ctime = nowtm; 1107 } 1108 1109 node->tn_status &= ~TMPFS_NODE_STATUSALL; 1110 } 1111 1112 int 1113 tmpfs_truncate(vnode_t *vp, off_t length) 1114 { 1115 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1116 int error; 1117 1118 if (length < 0) { 1119 error = EINVAL; 1120 goto out; 1121 } 1122 if (node->tn_size == length) { 1123 error = 0; 1124 goto out; 1125 } 1126 error = tmpfs_reg_resize(vp, length); 1127 if (error == 0) { 1128 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1129 } 1130 out: 1131 tmpfs_update(vp, NULL, NULL, NULL, 0); 1132 return error; 1133 } 1134