1*46f7109aSclaudio /* $OpenBSD: tmpfs_subr.c,v 1.27 2024/09/12 09:04:51 claudio Exp $ */ 27013b092Sespie /* $NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $ */ 37013b092Sespie 47013b092Sespie /* 57013b092Sespie * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. 67013b092Sespie * Copyright (c) 2013 Pedro Martelletto 77013b092Sespie * All rights reserved. 87013b092Sespie * 97013b092Sespie * This code is derived from software contributed to The NetBSD Foundation 107013b092Sespie * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 117013b092Sespie * 2005 program, and by Mindaugas Rasiukevicius. 127013b092Sespie * 137013b092Sespie * Redistribution and use in source and binary forms, with or without 147013b092Sespie * modification, are permitted provided that the following conditions 157013b092Sespie * are met: 167013b092Sespie * 1. Redistributions of source code must retain the above copyright 177013b092Sespie * notice, this list of conditions and the following disclaimer. 187013b092Sespie * 2. Redistributions in binary form must reproduce the above copyright 197013b092Sespie * notice, this list of conditions and the following disclaimer in the 207013b092Sespie * documentation and/or other materials provided with the distribution. 217013b092Sespie * 227013b092Sespie * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 237013b092Sespie * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 247013b092Sespie * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 257013b092Sespie * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 267013b092Sespie * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 277013b092Sespie * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 287013b092Sespie * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 297013b092Sespie * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 307013b092Sespie * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 317013b092Sespie * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 327013b092Sespie * POSSIBILITY OF SUCH DAMAGE. 337013b092Sespie */ 347013b092Sespie 357013b092Sespie /* 367013b092Sespie * Efficient memory file system: interfaces for inode and directory entry 377013b092Sespie * construction, destruction and manipulation. 387013b092Sespie * 397013b092Sespie * Reference counting 407013b092Sespie * 417013b092Sespie * The link count of inode (tmpfs_node_t::tn_links) is used as a 427013b092Sespie * reference counter. However, it has slightly different semantics. 437013b092Sespie * 447013b092Sespie * For directories - link count represents directory entries, which 457013b092Sespie * refer to the directories. In other words, it represents the count 467013b092Sespie * of sub-directories. It also takes into account the virtual '.' 477013b092Sespie * entry (which has no real entry in the list). For files - link count 487013b092Sespie * represents the hard links. Since only empty directories can be 497013b092Sespie * removed - link count aligns the reference counting requirements 507013b092Sespie * enough. Note: to check whether directory is not empty, the inode 517013b092Sespie * size (tmpfs_node_t::tn_size) can be used. 527013b092Sespie * 537013b092Sespie * The inode itself, as an object, gathers its first reference when 547013b092Sespie * directory entry is attached via tmpfs_dir_attach(9). For instance, 557013b092Sespie * after regular tmpfs_create(), a file would have a link count of 1, 567013b092Sespie * while directory after tmpfs_mkdir() would have 2 (due to '.'). 577013b092Sespie * 587013b092Sespie * Reclamation 597013b092Sespie * 607013b092Sespie * It should be noted that tmpfs inodes rely on a combination of vnode 617013b092Sespie * reference counting and link counting. That is, an inode can only be 627013b092Sespie * destroyed if its associated vnode is inactive. The destruction is 637013b092Sespie * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted 647013b092Sespie * that tmpfs_node_t::tn_links being 0 is a destruction criterion. 657013b092Sespie * 667013b092Sespie * If an inode has references within the file system (tn_links > 0) and 677013b092Sespie * its inactive vnode gets reclaimed/recycled - then the association is 687013b092Sespie * broken in tmpfs_reclaim(). In such case, an inode will always pass 697013b092Sespie * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode. 707013b092Sespie * 717013b092Sespie * Lock order 727013b092Sespie * 737013b092Sespie * tmpfs_node_t::tn_nlock -> 747013b092Sespie * struct vnode::v_vlock -> 757013b092Sespie * struct vnode::v_interlock 767013b092Sespie */ 777013b092Sespie 787013b092Sespie #include <sys/param.h> 797013b092Sespie #include <sys/dirent.h> 807013b092Sespie #include <sys/event.h> 817013b092Sespie #include <sys/mount.h> 827013b092Sespie #include <sys/namei.h> 837013b092Sespie #include <sys/time.h> 847013b092Sespie #include <sys/proc.h> 857013b092Sespie #include <sys/stat.h> 867013b092Sespie #include <sys/systm.h> 877013b092Sespie #include <sys/vnode.h> 887013b092Sespie 894bb7a4f6Smpi #include <uvm/uvm_aobj.h> 907013b092Sespie 917013b092Sespie #include <tmpfs/tmpfs.h> 927013b092Sespie #include <tmpfs/tmpfs_vnops.h> 937013b092Sespie 9457331246Sespie 9557331246Sespie /* Local functions. */ 9657331246Sespie void tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *); 9757331246Sespie int tmpfs_dir_getdotents(tmpfs_node_t *, struct dirent *, struct uio *); 9857331246Sespie 997013b092Sespie /* 1007013b092Sespie * tmpfs_alloc_node: allocate a new inode of a specified type and 1017013b092Sespie * insert it into the list of specified mount point. 1027013b092Sespie */ 1037013b092Sespie int 1047013b092Sespie tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid, 1057013b092Sespie mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node) 1067013b092Sespie { 1077013b092Sespie tmpfs_node_t *nnode; 1087013b092Sespie struct uvm_object *uobj; 1097013b092Sespie 1107013b092Sespie nnode = tmpfs_node_get(tmp); 1117013b092Sespie if (nnode == NULL) { 1127013b092Sespie return ENOSPC; 1137013b092Sespie } 1147013b092Sespie 1157013b092Sespie /* Initially, no references and no associations. */ 1167013b092Sespie nnode->tn_links = 0; 1177013b092Sespie nnode->tn_vnode = NULL; 1187013b092Sespie nnode->tn_dirent_hint = NULL; 1197013b092Sespie 12057331246Sespie rw_enter_write(&tmp->tm_acc_lock); 12157331246Sespie nnode->tn_id = ++tmp->tm_highest_inode; 12257331246Sespie if (nnode->tn_id == 0) { 12357331246Sespie --tmp->tm_highest_inode; 12457331246Sespie rw_exit_write(&tmp->tm_acc_lock); 12557331246Sespie tmpfs_node_put(tmp, nnode); 12657331246Sespie return ENOSPC; 12757331246Sespie } 12857331246Sespie rw_exit_write(&tmp->tm_acc_lock); 1297013b092Sespie 1307013b092Sespie /* Generic initialization. */ 1317013b092Sespie nnode->tn_type = type; 1327013b092Sespie nnode->tn_size = 0; 1337013b092Sespie nnode->tn_flags = 0; 1347013b092Sespie nnode->tn_lockf = NULL; 13557331246Sespie nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random(); 1367013b092Sespie 1377013b092Sespie nanotime(&nnode->tn_atime); 1387013b092Sespie nnode->tn_birthtime = nnode->tn_atime; 1397013b092Sespie nnode->tn_ctime = nnode->tn_atime; 1407013b092Sespie nnode->tn_mtime = nnode->tn_atime; 1417013b092Sespie 1427013b092Sespie KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); 1437013b092Sespie 1447013b092Sespie nnode->tn_uid = uid; 1457013b092Sespie nnode->tn_gid = gid; 1467013b092Sespie nnode->tn_mode = mode; 1477013b092Sespie 1487013b092Sespie /* Type-specific initialization. */ 1497013b092Sespie switch (nnode->tn_type) { 1507013b092Sespie case VBLK: 1517013b092Sespie case VCHR: 1527013b092Sespie /* Character/block special device. */ 1537013b092Sespie KASSERT(rdev != VNOVAL); 1547013b092Sespie nnode->tn_spec.tn_dev.tn_rdev = rdev; 1557013b092Sespie break; 1567013b092Sespie case VDIR: 1577013b092Sespie /* Directory. */ 1587013b092Sespie TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); 1597013b092Sespie nnode->tn_spec.tn_dir.tn_parent = NULL; 16057331246Sespie nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; 1617013b092Sespie nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 1627013b092Sespie 1637013b092Sespie /* Extra link count for the virtual '.' entry. */ 1647013b092Sespie nnode->tn_links++; 1657013b092Sespie break; 1667013b092Sespie case VFIFO: 1677013b092Sespie case VSOCK: 1687013b092Sespie break; 1697013b092Sespie case VLNK: 1707013b092Sespie /* Symbolic link. Target specifies the file name. */ 1717013b092Sespie KASSERT(target && strlen(target) < MAXPATHLEN); 1727013b092Sespie 1737013b092Sespie nnode->tn_size = strlen(target); 1747013b092Sespie if (nnode->tn_size == 0) { 1757013b092Sespie nnode->tn_spec.tn_lnk.tn_link = NULL; 1767013b092Sespie break; 1777013b092Sespie } 1787013b092Sespie nnode->tn_spec.tn_lnk.tn_link = 1797013b092Sespie tmpfs_strname_alloc(tmp, nnode->tn_size); 1807013b092Sespie if (nnode->tn_spec.tn_lnk.tn_link == NULL) { 1817013b092Sespie tmpfs_node_put(tmp, nnode); 1827013b092Sespie return ENOSPC; 1837013b092Sespie } 1847013b092Sespie memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); 1857013b092Sespie break; 1867013b092Sespie case VREG: 1877013b092Sespie /* Regular file. Create an underlying UVM object. */ 1887013b092Sespie uobj = uao_create(0, UAO_FLAG_CANFAIL); 1897013b092Sespie if (uobj == NULL) { 1907013b092Sespie tmpfs_node_put(tmp, nnode); 1917013b092Sespie return ENOSPC; 1927013b092Sespie } 1937013b092Sespie nnode->tn_spec.tn_reg.tn_aobj = uobj; 1947013b092Sespie nnode->tn_spec.tn_reg.tn_aobj_pages = 0; 1957013b092Sespie nnode->tn_spec.tn_reg.tn_aobj_pgptr = (vaddr_t)NULL; 1967013b092Sespie nnode->tn_spec.tn_reg.tn_aobj_pgnum = (voff_t)-1; 1977013b092Sespie break; 1987013b092Sespie default: 1997013b092Sespie KASSERT(0); 2007013b092Sespie } 2017013b092Sespie 2027013b092Sespie rw_init(&nnode->tn_nlock, "tvlk"); 2037013b092Sespie 2047013b092Sespie rw_enter_write(&tmp->tm_lock); 2057013b092Sespie LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); 2067013b092Sespie rw_exit_write(&tmp->tm_lock); 2077013b092Sespie 2087013b092Sespie *node = nnode; 2097013b092Sespie return 0; 2107013b092Sespie } 2117013b092Sespie 2127013b092Sespie /* 2137013b092Sespie * tmpfs_free_node: remove the inode from a list in the mount point and 2147013b092Sespie * destroy the inode structures. 2157013b092Sespie */ 2167013b092Sespie void 2177013b092Sespie tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) 2187013b092Sespie { 2197013b092Sespie size_t objsz; 2207013b092Sespie 2217013b092Sespie rw_enter_write(&tmp->tm_lock); 2227013b092Sespie LIST_REMOVE(node, tn_entries); 2237013b092Sespie rw_exit_write(&tmp->tm_lock); 2247013b092Sespie 2257013b092Sespie switch (node->tn_type) { 2267013b092Sespie case VLNK: 2277013b092Sespie if (node->tn_size > 0) { 2287013b092Sespie KASSERT(node->tn_size <= SIZE_MAX); 2297013b092Sespie tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 2307013b092Sespie node->tn_size); 2317013b092Sespie } 2327013b092Sespie break; 2337013b092Sespie case VREG: 2347013b092Sespie /* 2357013b092Sespie * Calculate the size of inode data, decrease the used-memory 2367013b092Sespie * counter, and destroy the underlying UVM object (if any). 2377013b092Sespie */ 2387013b092Sespie objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 2397013b092Sespie if (objsz != 0) { 2407013b092Sespie tmpfs_mem_decr(tmp, objsz); 2417013b092Sespie } 2427013b092Sespie if (node->tn_spec.tn_reg.tn_aobj != NULL) { 2437013b092Sespie uao_detach(node->tn_spec.tn_reg.tn_aobj); 2447013b092Sespie node->tn_spec.tn_reg.tn_aobj = NULL; 2457013b092Sespie } 2467013b092Sespie break; 2477013b092Sespie case VDIR: 24857331246Sespie KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 24957331246Sespie KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 25057331246Sespie node == tmp->tm_root); 2517013b092Sespie break; 2527013b092Sespie default: 2537013b092Sespie break; 2547013b092Sespie } 2557013b092Sespie 25657331246Sespie rw_enter_write(&tmp->tm_acc_lock); 25757331246Sespie if (node->tn_id == tmp->tm_highest_inode) 25857331246Sespie --tmp->tm_highest_inode; 25957331246Sespie rw_exit_write(&tmp->tm_acc_lock); 26057331246Sespie 2617013b092Sespie /* mutex_destroy(&node->tn_nlock); */ 2627013b092Sespie tmpfs_node_put(tmp, node); 2637013b092Sespie } 2647013b092Sespie 2657013b092Sespie /* 2667013b092Sespie * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode. 2677013b092Sespie * 2687013b092Sespie * => Must be called with tmpfs_node_t::tn_nlock held. 2697013b092Sespie * => Returns vnode (*vpp) locked. 2707013b092Sespie */ 2717013b092Sespie int 2727013b092Sespie tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, struct vnode **vpp) 2737013b092Sespie { 2747013b092Sespie struct vnode *vp, *nvp; 2757013b092Sespie /* kmutex_t *slock; */ 2767013b092Sespie int error; 2777013b092Sespie again: 2787013b092Sespie /* If there is already a vnode, try to reclaim it. */ 2797013b092Sespie if ((vp = node->tn_vnode) != NULL) { 2807013b092Sespie /* atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); */ 2817013b092Sespie node->tn_gen |= TMPFS_RECLAIMING_BIT; 2827013b092Sespie rw_exit_write(&node->tn_nlock); 28308107a0bSvisa error = vget(vp, LK_EXCLUSIVE); 2847013b092Sespie if (error == ENOENT) { 2857013b092Sespie rw_enter_write(&node->tn_nlock); 2867013b092Sespie goto again; 2877013b092Sespie } 2887013b092Sespie /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */ 2897013b092Sespie node->tn_gen &= ~TMPFS_RECLAIMING_BIT; 2907013b092Sespie *vpp = vp; 2917013b092Sespie return error; 2927013b092Sespie } 2937013b092Sespie if (TMPFS_NODE_RECLAIMING(node)) { 2947013b092Sespie /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */ 2957013b092Sespie node->tn_gen &= ~TMPFS_RECLAIMING_BIT; 2967013b092Sespie } 2977013b092Sespie 2987013b092Sespie /* 2997013b092Sespie * Get a new vnode and associate it with our inode. Share the 3007013b092Sespie * lock with underlying UVM object, if there is one (VREG case). 3017013b092Sespie */ 3027013b092Sespie #if 0 3037013b092Sespie if (node->tn_type == VREG) { 3047013b092Sespie struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 3057013b092Sespie slock = uobj->vmobjlock; 3067013b092Sespie } else { 3077013b092Sespie slock = NULL; 3087013b092Sespie } 3097013b092Sespie #endif 3107013b092Sespie error = getnewvnode(VT_TMPFS, mp, &tmpfs_vops, &vp); 3117013b092Sespie if (error) { 3127013b092Sespie rw_exit_write(&node->tn_nlock); 3137013b092Sespie return error; 3147013b092Sespie } 3157013b092Sespie 316d78cb2ffSvisa rrw_init_flags(&node->tn_vlock, "tnode", RWL_DUPOK | RWL_IS_VNODE); 3177013b092Sespie vp->v_type = node->tn_type; 3187013b092Sespie 3197013b092Sespie /* Type-specific initialization. */ 3207013b092Sespie switch (node->tn_type) { 3217013b092Sespie case VBLK: 3227013b092Sespie case VCHR: 3237013b092Sespie vp->v_op = &tmpfs_specvops; 3247013b092Sespie if ((nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp))) { 3257013b092Sespie nvp->v_data = vp->v_data; 3267013b092Sespie vp->v_data = NULL; 3277013b092Sespie vp->v_op = &spec_vops; 3287013b092Sespie vrele(vp); 3297013b092Sespie vgone(vp); 3307013b092Sespie vp = nvp; 3317013b092Sespie node->tn_vnode = vp; 3327013b092Sespie } 3337013b092Sespie break; 3347013b092Sespie case VDIR: 3357013b092Sespie vp->v_flag |= node->tn_spec.tn_dir.tn_parent == node ? 3367013b092Sespie VROOT : 0; 3377013b092Sespie break; 3387013b092Sespie #ifdef FIFO 3397013b092Sespie case VFIFO: 3407013b092Sespie vp->v_op = &tmpfs_fifovops; 3417013b092Sespie break; 3427013b092Sespie #endif 3437013b092Sespie case VLNK: 3447013b092Sespie case VREG: 3457013b092Sespie case VSOCK: 3467013b092Sespie break; 3477013b092Sespie default: 3487013b092Sespie KASSERT(0); 3497013b092Sespie } 3507013b092Sespie 3517013b092Sespie uvm_vnp_setsize(vp, node->tn_size); 3527013b092Sespie vp->v_data = node; 3537013b092Sespie node->tn_vnode = vp; 3546e880534Svisa vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3557013b092Sespie rw_exit_write(&node->tn_nlock); 3567013b092Sespie 3577013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 3587013b092Sespie *vpp = vp; 3597013b092Sespie return 0; 3607013b092Sespie } 3617013b092Sespie 3627013b092Sespie /* 3637013b092Sespie * tmpfs_alloc_file: allocate a new file of specified type and adds it 3647013b092Sespie * into the parent directory. 3657013b092Sespie * 3667013b092Sespie * => Credentials of the caller are used. 3677013b092Sespie */ 3687013b092Sespie int 3697013b092Sespie tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 3707013b092Sespie struct componentname *cnp, char *target) 3717013b092Sespie { 3727013b092Sespie tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); 3737013b092Sespie tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; 3747013b092Sespie tmpfs_dirent_t *de; 3757013b092Sespie int error; 3767013b092Sespie 3777013b092Sespie KASSERT(VOP_ISLOCKED(dvp)); 3787013b092Sespie *vpp = NULL; 3797013b092Sespie 3807013b092Sespie /* Check for the maximum number of links limit. */ 3817013b092Sespie if (vap->va_type == VDIR) { 3827013b092Sespie /* Check for maximum links limit. */ 3837013b092Sespie if (dnode->tn_links == LINK_MAX) { 3847013b092Sespie error = EMLINK; 3857013b092Sespie goto out; 3867013b092Sespie } 3877013b092Sespie KASSERT(dnode->tn_links < LINK_MAX); 3887013b092Sespie } 3897013b092Sespie 39057331246Sespie if (TMPFS_DIRSEQ_FULL(dnode)) { 39157331246Sespie error = ENOSPC; 39257331246Sespie goto out; 39357331246Sespie } 39457331246Sespie 395a2e5b9cfSdcoppa if (dnode->tn_links == 0) { 396a2e5b9cfSdcoppa error = ENOENT; 397a2e5b9cfSdcoppa goto out; 398a2e5b9cfSdcoppa } 399a2e5b9cfSdcoppa 4007013b092Sespie /* Allocate a node that represents the new file. */ 4017013b092Sespie error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, 4027013b092Sespie dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node); 4037013b092Sespie if (error) 4047013b092Sespie goto out; 4057013b092Sespie 4067013b092Sespie /* Allocate a directory entry that points to the new file. */ 4077013b092Sespie error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); 4087013b092Sespie if (error) { 4097013b092Sespie tmpfs_free_node(tmp, node); 4107013b092Sespie goto out; 4117013b092Sespie } 4127013b092Sespie 4137013b092Sespie /* Get a vnode for the new file. */ 4147013b092Sespie rw_enter_write(&node->tn_nlock); 4157013b092Sespie error = tmpfs_vnode_get(dvp->v_mount, node, vpp); 4167013b092Sespie if (error) { 4177013b092Sespie tmpfs_free_dirent(tmp, de); 4187013b092Sespie tmpfs_free_node(tmp, node); 4197013b092Sespie goto out; 4207013b092Sespie } 4217013b092Sespie 4227013b092Sespie /* Associate inode and attach the entry into the directory. */ 42357331246Sespie tmpfs_dir_attach(dnode, de, node); 4247013b092Sespie 4257013b092Sespie out: 4267013b092Sespie if (error == 0 && (cnp->cn_flags & SAVESTART) == 0) 4277013b092Sespie pool_put(&namei_pool, cnp->cn_pnbuf); 4287013b092Sespie return error; 4297013b092Sespie } 4307013b092Sespie 4317013b092Sespie /* 4327013b092Sespie * tmpfs_alloc_dirent: allocates a new directory entry for the inode. 4337013b092Sespie * The directory entry contains a path name component. 4347013b092Sespie */ 4357013b092Sespie int 4367013b092Sespie tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, 4377013b092Sespie tmpfs_dirent_t **de) 4387013b092Sespie { 4397013b092Sespie tmpfs_dirent_t *nde; 4407013b092Sespie 4417013b092Sespie nde = tmpfs_dirent_get(tmp); 4427013b092Sespie if (nde == NULL) 4437013b092Sespie return ENOSPC; 4447013b092Sespie 4457013b092Sespie nde->td_name = tmpfs_strname_alloc(tmp, len); 4467013b092Sespie if (nde->td_name == NULL) { 4477013b092Sespie tmpfs_dirent_put(tmp, nde); 4487013b092Sespie return ENOSPC; 4497013b092Sespie } 4507013b092Sespie nde->td_namelen = len; 4517013b092Sespie memcpy(nde->td_name, name, len); 45257331246Sespie nde->td_seq = TMPFS_DIRSEQ_NONE; 4537013b092Sespie 4547013b092Sespie *de = nde; 4557013b092Sespie return 0; 4567013b092Sespie } 4577013b092Sespie 4587013b092Sespie /* 4597013b092Sespie * tmpfs_free_dirent: free a directory entry. 4607013b092Sespie */ 4617013b092Sespie void 4627013b092Sespie tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) 4637013b092Sespie { 46457331246Sespie KASSERT(de->td_node == NULL); 46557331246Sespie KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); 4667013b092Sespie tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 4677013b092Sespie tmpfs_dirent_put(tmp, de); 4687013b092Sespie } 4697013b092Sespie 4707013b092Sespie /* 4717013b092Sespie * tmpfs_dir_attach: associate directory entry with a specified inode, 4727013b092Sespie * and attach the entry into the directory, specified by vnode. 4737013b092Sespie * 4747013b092Sespie * => Increases link count on the associated node. 4757013b092Sespie * => Increases link count on directory node, if our node is VDIR. 4767013b092Sespie * It is caller's responsibility to check for the LINK_MAX limit. 4777013b092Sespie * => Triggers kqueue events here. 4787013b092Sespie */ 4797013b092Sespie void 48057331246Sespie tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node) 4817013b092Sespie { 48257331246Sespie struct vnode *dvp = dnode->tn_vnode; 4837013b092Sespie int events = NOTE_WRITE; 4847013b092Sespie 48557331246Sespie KASSERT(dvp != NULL); 4867013b092Sespie KASSERT(VOP_ISLOCKED(dvp)); 4877013b092Sespie 48857331246Sespie /* Get a new sequence number. */ 48957331246Sespie KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); 49057331246Sespie de->td_seq = tmpfs_dir_getseq(dnode, de); 49157331246Sespie 4927013b092Sespie /* Associate directory entry and the inode. */ 4937013b092Sespie de->td_node = node; 4947013b092Sespie KASSERT(node->tn_links < LINK_MAX); 4957013b092Sespie node->tn_links++; 4967013b092Sespie 4977013b092Sespie /* Save the hint (might overwrite). */ 4987013b092Sespie node->tn_dirent_hint = de; 4997013b092Sespie 5007013b092Sespie /* Insert the entry to the directory (parent of inode). */ 5017013b092Sespie TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 5027013b092Sespie dnode->tn_size += sizeof(tmpfs_dirent_t); 5037013b092Sespie tmpfs_update(dnode, TMPFS_NODE_STATUSALL); 5047013b092Sespie uvm_vnp_setsize(dvp, dnode->tn_size); 5057013b092Sespie 50657331246Sespie if (node->tn_type == VDIR) { 5077013b092Sespie /* Set parent. */ 5087013b092Sespie KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); 5097013b092Sespie node->tn_spec.tn_dir.tn_parent = dnode; 5107013b092Sespie 5117013b092Sespie /* Increase the link count of parent. */ 5127013b092Sespie KASSERT(dnode->tn_links < LINK_MAX); 5137013b092Sespie dnode->tn_links++; 5147013b092Sespie events |= NOTE_LINK; 5157013b092Sespie 5167013b092Sespie TMPFS_VALIDATE_DIR(node); 5177013b092Sespie } 5187013b092Sespie VN_KNOTE(dvp, events); 5197013b092Sespie } 5207013b092Sespie 5217013b092Sespie /* 5227013b092Sespie * tmpfs_dir_detach: disassociate directory entry and its inode, 5237013b092Sespie * and detach the entry from the directory, specified by vnode. 5247013b092Sespie * 5257013b092Sespie * => Decreases link count on the associated node. 5267013b092Sespie * => Decreases the link count on directory node, if our node is VDIR. 5277013b092Sespie * => Triggers kqueue events here. 5287013b092Sespie */ 5297013b092Sespie void 53057331246Sespie tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 5317013b092Sespie { 5327013b092Sespie tmpfs_node_t *node = de->td_node; 53357331246Sespie struct vnode *vp, *dvp = dnode->tn_vnode; 5347013b092Sespie int events = NOTE_WRITE; 5357013b092Sespie 53657331246Sespie KASSERT(dvp == NULL || VOP_ISLOCKED(dvp)); 5377013b092Sespie 5387013b092Sespie /* Deassociate the inode and entry. */ 5397013b092Sespie de->td_node = NULL; 5407013b092Sespie node->tn_dirent_hint = NULL; 5417013b092Sespie 5427013b092Sespie KASSERT(node->tn_links > 0); 5437013b092Sespie node->tn_links--; 54457331246Sespie if ((vp = node->tn_vnode) != NULL) { 54557331246Sespie KASSERT(VOP_ISLOCKED(vp)); 54657331246Sespie VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE); 5477013b092Sespie } 5487013b092Sespie 5497013b092Sespie /* If directory - decrease the link count of parent. */ 5507013b092Sespie if (node->tn_type == VDIR) { 5517013b092Sespie KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); 5527013b092Sespie node->tn_spec.tn_dir.tn_parent = NULL; 5537013b092Sespie 5547013b092Sespie KASSERT(dnode->tn_links > 0); 5557013b092Sespie dnode->tn_links--; 5567013b092Sespie events |= NOTE_LINK; 5577013b092Sespie } 5587013b092Sespie 5597013b092Sespie /* Remove the entry from the directory. */ 5607013b092Sespie if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 5617013b092Sespie dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 5627013b092Sespie } 5637013b092Sespie TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 5647013b092Sespie 5657013b092Sespie dnode->tn_size -= sizeof(tmpfs_dirent_t); 56657331246Sespie tmpfs_update(dnode, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 56757331246Sespie tmpfs_dir_putseq(dnode, de); 56857331246Sespie if (dvp) { 56957331246Sespie tmpfs_update(dnode, 0); 5707013b092Sespie uvm_vnp_setsize(dvp, dnode->tn_size); 5717013b092Sespie VN_KNOTE(dvp, events); 5727013b092Sespie } 57357331246Sespie } 5747013b092Sespie 5757013b092Sespie /* 5767013b092Sespie * tmpfs_dir_lookup: find a directory entry in the specified inode. 5777013b092Sespie * 5787013b092Sespie * Note that the . and .. components are not allowed as they do not 5797013b092Sespie * physically exist within directories. 5807013b092Sespie */ 5817013b092Sespie tmpfs_dirent_t * 5827013b092Sespie tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) 5837013b092Sespie { 5847013b092Sespie const char *name = cnp->cn_nameptr; 5857013b092Sespie const uint16_t nlen = cnp->cn_namelen; 5867013b092Sespie tmpfs_dirent_t *de; 5877013b092Sespie 5887013b092Sespie KASSERT(VOP_ISLOCKED(node->tn_vnode)); 5897013b092Sespie KASSERT(nlen != 1 || !(name[0] == '.')); 5907013b092Sespie KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); 5917013b092Sespie TMPFS_VALIDATE_DIR(node); 5927013b092Sespie 5937013b092Sespie TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 5947013b092Sespie if (de->td_namelen != nlen) 5957013b092Sespie continue; 5967013b092Sespie if (memcmp(de->td_name, name, nlen) != 0) 5977013b092Sespie continue; 5987013b092Sespie break; 5997013b092Sespie } 6007013b092Sespie tmpfs_update(node, TMPFS_NODE_ACCESSED); 6017013b092Sespie return de; 6027013b092Sespie } 6037013b092Sespie 6047013b092Sespie /* 6057013b092Sespie * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to 60657331246Sespie * avoid unnecessary tmpfs_dir_lookup(). 6077013b092Sespie * 6087013b092Sespie * => The vnode must be locked. 6097013b092Sespie */ 6107013b092Sespie tmpfs_dirent_t * 6117013b092Sespie tmpfs_dir_cached(tmpfs_node_t *node) 6127013b092Sespie { 6137013b092Sespie tmpfs_dirent_t *de = node->tn_dirent_hint; 6147013b092Sespie 6157013b092Sespie KASSERT(VOP_ISLOCKED(node->tn_vnode)); 6167013b092Sespie 6177013b092Sespie if (de == NULL) { 6187013b092Sespie return NULL; 6197013b092Sespie } 6207013b092Sespie KASSERT(de->td_node == node); 6217013b092Sespie 6227013b092Sespie /* 6237013b092Sespie * Directories always have a valid hint. For files, check if there 6247013b092Sespie * are any hard links. If there are - hint might be invalid. 6257013b092Sespie */ 6267013b092Sespie return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; 6277013b092Sespie } 6287013b092Sespie 6297013b092Sespie /* 63057331246Sespie * tmpfs_dir_getseq: get a per-directory sequence number for the entry. 6317013b092Sespie */ 63257331246Sespie uint64_t 63357331246Sespie tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 6347013b092Sespie { 63557331246Sespie uint64_t seq = de->td_seq; 6367013b092Sespie 63757331246Sespie TMPFS_VALIDATE_DIR(dnode); 6387013b092Sespie 63957331246Sespie if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) { 64057331246Sespie /* Already set. */ 64157331246Sespie KASSERT(seq >= TMPFS_DIRSEQ_START); 64257331246Sespie return seq; 6437013b092Sespie } 6447013b092Sespie 6457013b092Sespie /* 64657331246Sespie * The "." and ".." and the end-of-directory have reserved numbers. 64757331246Sespie * The other sequence numbers are allocated incrementally. 6487013b092Sespie */ 6497013b092Sespie 65057331246Sespie seq = dnode->tn_spec.tn_dir.tn_next_seq; 65157331246Sespie KASSERT(seq >= TMPFS_DIRSEQ_START); 65257331246Sespie KASSERT(seq < TMPFS_DIRSEQ_END); 65357331246Sespie dnode->tn_spec.tn_dir.tn_next_seq++; 65457331246Sespie return seq; 6557013b092Sespie } 65657331246Sespie 65757331246Sespie void 65857331246Sespie tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 65957331246Sespie { 66057331246Sespie uint64_t seq = de->td_seq; 66157331246Sespie 66257331246Sespie TMPFS_VALIDATE_DIR(dnode); 66357331246Sespie KASSERT(seq == TMPFS_DIRSEQ_NONE || seq >= TMPFS_DIRSEQ_START); 66457331246Sespie KASSERT(seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END); 66557331246Sespie 66657331246Sespie de->td_seq = TMPFS_DIRSEQ_NONE; 66757331246Sespie 66857331246Sespie /* Empty? We can reset. */ 66957331246Sespie if (dnode->tn_size == 0) { 67057331246Sespie dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; 67157331246Sespie } else if (seq != TMPFS_DIRSEQ_NONE && 67257331246Sespie seq == dnode->tn_spec.tn_dir.tn_next_seq - 1) { 67357331246Sespie dnode->tn_spec.tn_dir.tn_next_seq--; 67457331246Sespie } 6757013b092Sespie } 6767013b092Sespie 6777013b092Sespie /* 67857331246Sespie * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number. 6797013b092Sespie */ 6807013b092Sespie tmpfs_dirent_t * 68157331246Sespie tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq) 68257331246Sespie { 68357331246Sespie tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp; 68457331246Sespie 68557331246Sespie TMPFS_VALIDATE_DIR(node); 68657331246Sespie 68757331246Sespie /* 68857331246Sespie * First, check the cache. If does not match - perform a lookup. 68957331246Sespie */ 69057331246Sespie if (de && de->td_seq == seq) { 69157331246Sespie KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); 69257331246Sespie KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); 69357331246Sespie return de; 69457331246Sespie } 69557331246Sespie TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 69657331246Sespie KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); 69757331246Sespie KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); 69857331246Sespie if (de->td_seq == seq) 69957331246Sespie return de; 70057331246Sespie } 70157331246Sespie return NULL; 70257331246Sespie } 70357331246Sespie 70457331246Sespie /* 70557331246Sespie * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the 70657331246Sespie * dot meta entries, that is, "." or "..". Copy it to the UIO space. 70757331246Sespie */ 70857331246Sespie int 70957331246Sespie tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio) 7107013b092Sespie { 7117013b092Sespie tmpfs_dirent_t *de; 71257331246Sespie off_t next = 0; 71357331246Sespie int error; 7147013b092Sespie 71557331246Sespie switch (uio->uio_offset) { 71657331246Sespie case TMPFS_DIRSEQ_DOT: 71757331246Sespie dp->d_fileno = node->tn_id; 71857331246Sespie strlcpy(dp->d_name, ".", sizeof(dp->d_name)); 71957331246Sespie next = TMPFS_DIRSEQ_DOTDOT; 7207013b092Sespie break; 72157331246Sespie case TMPFS_DIRSEQ_DOTDOT: 72257331246Sespie dp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 72357331246Sespie strlcpy(dp->d_name, "..", sizeof(dp->d_name)); 72457331246Sespie de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 72557331246Sespie next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; 72657331246Sespie break; 72757331246Sespie default: 72857331246Sespie KASSERT(false); 7297013b092Sespie } 73057331246Sespie dp->d_type = DT_DIR; 73157331246Sespie dp->d_namlen = strlen(dp->d_name); 73257331246Sespie dp->d_reclen = DIRENT_SIZE(dp); 73357331246Sespie dp->d_off = next; 73457331246Sespie 73557331246Sespie if (dp->d_reclen > uio->uio_resid) { 73657331246Sespie return EJUSTRETURN; 7377013b092Sespie } 73857331246Sespie 739653068caSstefan if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) { 74057331246Sespie return error; 74157331246Sespie } 74257331246Sespie 74357331246Sespie uio->uio_offset = next; 74457331246Sespie return error; 7457013b092Sespie } 7467013b092Sespie 7477013b092Sespie /* 748318e64ccSguenther * tmpfs_dir_getdents: helper function for tmpfs_readdir. 7497013b092Sespie * 7507013b092Sespie * => Returns as much directory entries as can fit in the uio space. 7517013b092Sespie * => The read starts at uio->uio_offset. 7527013b092Sespie */ 7537013b092Sespie int 7543544b002Sguenther tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio) 7557013b092Sespie { 7563544b002Sguenther tmpfs_dirent_t *de, *next_de; 75757331246Sespie struct dirent dent; 75857331246Sespie int error = 0; 7597013b092Sespie 7607013b092Sespie KASSERT(VOP_ISLOCKED(node->tn_vnode)); 7617013b092Sespie TMPFS_VALIDATE_DIR(node); 76257331246Sespie memset(&dent, 0, sizeof(dent)); 7637013b092Sespie 76457331246Sespie if (uio->uio_offset == TMPFS_DIRSEQ_DOT) { 76557331246Sespie if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) { 76657331246Sespie goto done; 7677013b092Sespie } 76857331246Sespie } 76957331246Sespie if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) { 77057331246Sespie if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) { 77157331246Sespie goto done; 77257331246Sespie } 77357331246Sespie } 77457331246Sespie /* Done if we reached the end. */ 77557331246Sespie if (uio->uio_offset == TMPFS_DIRSEQ_EOF) { 77657331246Sespie goto done; 77757331246Sespie } 77857331246Sespie 77957331246Sespie /* Locate the directory entry given by the given sequence number. */ 78057331246Sespie de = tmpfs_dir_lookupbyseq(node, uio->uio_offset); 7817013b092Sespie if (de == NULL) { 78257331246Sespie error = EINVAL; 78357331246Sespie goto done; 7847013b092Sespie } 7857013b092Sespie 7867013b092Sespie /* 78757331246Sespie * Read as many entries as possible; i.e., until we reach the end 78857331246Sespie * of the directory or we exhaust UIO space. 7897013b092Sespie */ 7907013b092Sespie do { 79157331246Sespie dent.d_fileno = de->td_node->tn_id; 7927013b092Sespie switch (de->td_node->tn_type) { 7937013b092Sespie case VBLK: 79457331246Sespie dent.d_type = DT_BLK; 7957013b092Sespie break; 7967013b092Sespie case VCHR: 79757331246Sespie dent.d_type = DT_CHR; 7987013b092Sespie break; 7997013b092Sespie case VDIR: 80057331246Sespie dent.d_type = DT_DIR; 8017013b092Sespie break; 8027013b092Sespie case VFIFO: 80357331246Sespie dent.d_type = DT_FIFO; 8047013b092Sespie break; 8057013b092Sespie case VLNK: 80657331246Sespie dent.d_type = DT_LNK; 8077013b092Sespie break; 8087013b092Sespie case VREG: 80957331246Sespie dent.d_type = DT_REG; 8107013b092Sespie break; 8117013b092Sespie case VSOCK: 81257331246Sespie dent.d_type = DT_SOCK; 8137013b092Sespie break; 8147013b092Sespie default: 8157013b092Sespie KASSERT(0); 8167013b092Sespie } 81757331246Sespie dent.d_namlen = de->td_namelen; 81857331246Sespie KASSERT(de->td_namelen < sizeof(dent.d_name)); 81957331246Sespie memcpy(dent.d_name, de->td_name, de->td_namelen); 82057331246Sespie dent.d_name[de->td_namelen] = '\0'; 82157331246Sespie dent.d_reclen = DIRENT_SIZE(&dent); 8227013b092Sespie 823*46f7109aSclaudio if (memchr(dent.d_name, '/', dent.d_namlen) != NULL) { 824*46f7109aSclaudio error = EINVAL; 825*46f7109aSclaudio break; 826*46f7109aSclaudio } 827*46f7109aSclaudio 8283544b002Sguenther next_de = TAILQ_NEXT(de, td_entries); 8293544b002Sguenther if (next_de == NULL) 83057331246Sespie dent.d_off = TMPFS_DIRSEQ_EOF; 831318e64ccSguenther else 83257331246Sespie dent.d_off = tmpfs_dir_getseq(node, next_de); 833318e64ccSguenther 83457331246Sespie if (dent.d_reclen > uio->uio_resid) { 83557331246Sespie /* Exhausted UIO space. */ 83657331246Sespie error = EJUSTRETURN; 8377013b092Sespie break; 8387013b092Sespie } 8397013b092Sespie 84057331246Sespie /* Copy out the directory entry and continue. */ 841653068caSstefan error = uiomove(&dent, dent.d_reclen, uio); 84257331246Sespie if (error) { 84357331246Sespie break; 8443544b002Sguenther } 84557331246Sespie de = TAILQ_NEXT(de, td_entries); 8467013b092Sespie 84757331246Sespie } while (uio->uio_resid > 0 && de); 84857331246Sespie 84957331246Sespie /* Cache the last entry or clear and mark EOF. */ 85057331246Sespie uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; 8517013b092Sespie node->tn_spec.tn_dir.tn_readdir_lastp = de; 85257331246Sespie done: 8537013b092Sespie tmpfs_update(node, TMPFS_NODE_ACCESSED); 85457331246Sespie 85557331246Sespie if (error == EJUSTRETURN) { 85657331246Sespie /* Exhausted UIO space - just return. */ 85757331246Sespie error = 0; 85857331246Sespie } 85957331246Sespie KASSERT(error >= 0); 8607013b092Sespie return error; 8617013b092Sespie } 8627013b092Sespie 8637013b092Sespie /* 8647013b092Sespie * tmpfs_reg_resize: resize the underlying UVM object associated with the 8657013b092Sespie * specified regular file. 8667013b092Sespie */ 8677013b092Sespie 8687013b092Sespie int 8697013b092Sespie tmpfs_reg_resize(struct vnode *vp, off_t newsize) 8707013b092Sespie { 8717013b092Sespie tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); 8727013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 8737013b092Sespie struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 8747013b092Sespie size_t newpages, oldpages, bytes; 8757013b092Sespie off_t oldsize; 8767013b092Sespie vaddr_t pgoff; 8777013b092Sespie int error; 8787013b092Sespie 8797013b092Sespie KASSERT(vp->v_type == VREG); 8807013b092Sespie KASSERT(newsize >= 0); 8817013b092Sespie 8827013b092Sespie oldsize = node->tn_size; 8837013b092Sespie oldpages = round_page(oldsize) >> PAGE_SHIFT; 8847013b092Sespie newpages = round_page(newsize) >> PAGE_SHIFT; 8857013b092Sespie KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 8867013b092Sespie 8877013b092Sespie if (newpages > oldpages) { 8887013b092Sespie /* Increase the used-memory counter if getting extra pages. */ 8897013b092Sespie bytes = (newpages - oldpages) << PAGE_SHIFT; 8907013b092Sespie if (tmpfs_mem_incr(tmp, bytes) == 0) 8917013b092Sespie return ENOSPC; 892af1a0040Smvs rw_enter(uobj->vmobjlock, RW_WRITE); 893af1a0040Smvs error = uao_grow(uobj, newpages); 894af1a0040Smvs rw_exit(uobj->vmobjlock); 895af1a0040Smvs if (error) { 8967013b092Sespie tmpfs_mem_decr(tmp, bytes); 8977013b092Sespie return ENOSPC; 8987013b092Sespie } 8997013b092Sespie } 9007013b092Sespie 9017013b092Sespie node->tn_spec.tn_reg.tn_aobj_pages = newpages; 9027013b092Sespie node->tn_size = newsize; 9037013b092Sespie uvm_vnp_setsize(vp, newsize); 9047013b092Sespie uvm_vnp_uncache(vp); 9057013b092Sespie 9067013b092Sespie /* 9077013b092Sespie * Free "backing store". 9087013b092Sespie */ 9097013b092Sespie if (newpages < oldpages) { 9107013b092Sespie if (tmpfs_uio_cached(node)) 9117013b092Sespie tmpfs_uio_uncache(node); 912af1a0040Smvs rw_enter(uobj->vmobjlock, RW_WRITE); 9137013b092Sespie if (uao_shrink(uobj, newpages)) 9147013b092Sespie panic("shrink failed"); 915af1a0040Smvs rw_exit(uobj->vmobjlock); 9167013b092Sespie /* Decrease the used-memory counter. */ 9177013b092Sespie tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 9187013b092Sespie } 9197013b092Sespie if (newsize > oldsize) { 9207013b092Sespie if (tmpfs_uio_cached(node)) 9217013b092Sespie tmpfs_uio_uncache(node); 9227013b092Sespie pgoff = oldsize & PAGE_MASK; 9237013b092Sespie if (pgoff != 0) { 9247013b092Sespie /* 9257013b092Sespie * Growing from an offset which is not at a page 9267013b092Sespie * boundary; zero out unused bytes in current page. 9277013b092Sespie */ 9287013b092Sespie error = tmpfs_zeropg(node, trunc_page(oldsize), pgoff); 9297013b092Sespie if (error) 9307013b092Sespie panic("tmpfs_zeropg: error %d", error); 9317013b092Sespie } 9327013b092Sespie VN_KNOTE(vp, NOTE_EXTEND); 9337013b092Sespie } 9347013b092Sespie return 0; 9357013b092Sespie } 9367013b092Sespie 9377013b092Sespie /* 9387013b092Sespie * tmpfs_chflags: change flags of the given vnode. 9397013b092Sespie * 9407013b092Sespie */ 9417013b092Sespie int 9427013b092Sespie tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) 9437013b092Sespie { 9447013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 9457013b092Sespie int error; 9467013b092Sespie 9477013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 9487013b092Sespie 9497013b092Sespie /* Disallow this operation if the file system is mounted read-only. */ 9507013b092Sespie if (vp->v_mount->mnt_flag & MNT_RDONLY) 9517013b092Sespie return EROFS; 9527013b092Sespie 9537013b092Sespie if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred))) 9547013b092Sespie return error; 9557013b092Sespie 9567013b092Sespie if (cred->cr_uid == 0) { 9577013b092Sespie if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) && 9587013b092Sespie securelevel > 0) 9597013b092Sespie return EPERM; 9607013b092Sespie node->tn_flags = flags; 9617013b092Sespie } else { 9627013b092Sespie if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) || 9637013b092Sespie (flags & UF_SETTABLE) != flags) 9647013b092Sespie return EPERM; 9657013b092Sespie node->tn_flags &= SF_SETTABLE; 9667013b092Sespie node->tn_flags |= (flags & UF_SETTABLE); 9677013b092Sespie } 9687013b092Sespie 9697013b092Sespie tmpfs_update(node, TMPFS_NODE_CHANGED); 9707013b092Sespie VN_KNOTE(vp, NOTE_ATTRIB); 9717013b092Sespie return 0; 9727013b092Sespie } 9737013b092Sespie 9747013b092Sespie /* 9757013b092Sespie * tmpfs_chmod: change access mode on the given vnode. 9767013b092Sespie * 9777013b092Sespie */ 9787013b092Sespie int 9797013b092Sespie tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p) 9807013b092Sespie { 9817013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 9827013b092Sespie int error; 9837013b092Sespie 9847013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 9857013b092Sespie 9867013b092Sespie /* Disallow this operation if the file system is mounted read-only. */ 9877013b092Sespie if (vp->v_mount->mnt_flag & MNT_RDONLY) 9887013b092Sespie return EROFS; 9897013b092Sespie 9907013b092Sespie /* Immutable or append-only files cannot be modified, either. */ 9917013b092Sespie if (node->tn_flags & (IMMUTABLE | APPEND)) 9927013b092Sespie return EPERM; 9937013b092Sespie 9947013b092Sespie if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred))) 9957013b092Sespie return error; 9967013b092Sespie if (cred->cr_uid != 0) { 9977013b092Sespie if (vp->v_type != VDIR && (mode & S_ISTXT)) 9987013b092Sespie return EFTYPE; 9997013b092Sespie if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) 10007013b092Sespie return EPERM; 10017013b092Sespie } 10027013b092Sespie 10037013b092Sespie node->tn_mode = (mode & ALLPERMS); 10047013b092Sespie tmpfs_update(node, TMPFS_NODE_CHANGED); 10057013b092Sespie if ((vp->v_flag & VTEXT) && (node->tn_mode & S_ISTXT) == 0) 10067013b092Sespie uvm_vnp_uncache(vp); 10077013b092Sespie VN_KNOTE(vp, NOTE_ATTRIB); 10087013b092Sespie return 0; 10097013b092Sespie } 10107013b092Sespie 10117013b092Sespie /* 10127013b092Sespie * tmpfs_chown: change ownership of the given vnode. 10137013b092Sespie * 10147013b092Sespie * => At least one of uid or gid must be different than VNOVAL. 10157013b092Sespie * => Attribute is unchanged for VNOVAL case. 10167013b092Sespie */ 10177013b092Sespie int 10187013b092Sespie tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p) 10197013b092Sespie { 10207013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 10217013b092Sespie int error; 10227013b092Sespie 10237013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 10247013b092Sespie 10257013b092Sespie /* Assign default values if they are unknown. */ 10267013b092Sespie KASSERT(uid != VNOVAL || gid != VNOVAL); 10277013b092Sespie if (uid == VNOVAL) { 10287013b092Sespie uid = node->tn_uid; 10297013b092Sespie } 10307013b092Sespie if (gid == VNOVAL) { 10317013b092Sespie gid = node->tn_gid; 10327013b092Sespie } 10337013b092Sespie 10347013b092Sespie /* Disallow this operation if the file system is mounted read-only. */ 10357013b092Sespie if (vp->v_mount->mnt_flag & MNT_RDONLY) 10367013b092Sespie return EROFS; 10377013b092Sespie 10387013b092Sespie /* Immutable or append-only files cannot be modified, either. */ 10397013b092Sespie if (node->tn_flags & (IMMUTABLE | APPEND)) 10407013b092Sespie return EPERM; 10417013b092Sespie 10427013b092Sespie if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid || 10437013b092Sespie (gid != node->tn_gid && !groupmember(gid, cred))) && 10447013b092Sespie (error = suser_ucred(cred))) 10457013b092Sespie return error; 10467013b092Sespie 10477013b092Sespie node->tn_uid = uid; 10487013b092Sespie node->tn_gid = gid; 10497013b092Sespie tmpfs_update(node, TMPFS_NODE_CHANGED); 10507013b092Sespie VN_KNOTE(vp, NOTE_ATTRIB); 10517013b092Sespie return 0; 10527013b092Sespie } 10537013b092Sespie 10547013b092Sespie /* 10557013b092Sespie * tmpfs_chsize: change size of the given vnode. 10567013b092Sespie */ 10577013b092Sespie int 10587013b092Sespie tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct proc *p) 10597013b092Sespie { 10607013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 10617013b092Sespie 10627013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 10637013b092Sespie 10647013b092Sespie /* Decide whether this is a valid operation based on the file type. */ 10657013b092Sespie switch (vp->v_type) { 10667013b092Sespie case VDIR: 10677013b092Sespie return EISDIR; 10687013b092Sespie case VREG: 10697013b092Sespie if (vp->v_mount->mnt_flag & MNT_RDONLY) { 10707013b092Sespie return EROFS; 10717013b092Sespie } 10727013b092Sespie break; 10737013b092Sespie case VBLK: 10747013b092Sespie case VCHR: 10757013b092Sespie case VFIFO: 10767013b092Sespie /* 10777013b092Sespie * Allow modifications of special files even if in the file 10787013b092Sespie * system is mounted read-only (we are not modifying the 10797013b092Sespie * files themselves, but the objects they represent). 10807013b092Sespie */ 10817013b092Sespie return 0; 10827013b092Sespie default: 10837013b092Sespie return EOPNOTSUPP; 10847013b092Sespie } 10857013b092Sespie 10867013b092Sespie /* Immutable or append-only files cannot be modified, either. */ 10877013b092Sespie if (node->tn_flags & (IMMUTABLE | APPEND)) { 10887013b092Sespie return EPERM; 10897013b092Sespie } 10907013b092Sespie 10917013b092Sespie /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */ 10927013b092Sespie return tmpfs_truncate(vp, size); 10937013b092Sespie } 10947013b092Sespie 10957013b092Sespie /* 10967013b092Sespie * tmpfs_chtimes: change access and modification times for vnode. 10977013b092Sespie */ 10987013b092Sespie int 10997013b092Sespie tmpfs_chtimes(struct vnode *vp, const struct timespec *atime, 11007013b092Sespie const struct timespec *mtime, int vaflags, struct ucred *cred, 11017013b092Sespie struct proc *p) 11027013b092Sespie { 11037013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 11047013b092Sespie int error; 11057013b092Sespie 11067013b092Sespie KASSERT(VOP_ISLOCKED(vp)); 11077013b092Sespie 11087013b092Sespie /* Disallow this operation if the file system is mounted read-only. */ 11097013b092Sespie if (vp->v_mount->mnt_flag & MNT_RDONLY) 11107013b092Sespie return EROFS; 11117013b092Sespie 11127013b092Sespie /* Immutable or append-only files cannot be modified, either. */ 11137013b092Sespie if (node->tn_flags & (IMMUTABLE | APPEND)) 11147013b092Sespie return EPERM; 11157013b092Sespie 11167013b092Sespie if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)) && 11177013b092Sespie ((vaflags & VA_UTIMES_NULL) == 0 || 11187013b092Sespie (error = VOP_ACCESS(vp, VWRITE, cred, p)))) 11197013b092Sespie return error; 11207013b092Sespie 11214707cbe3Sguenther if (atime->tv_nsec != VNOVAL) 11227013b092Sespie node->tn_atime = *atime; 11237013b092Sespie 11244707cbe3Sguenther if (mtime->tv_nsec != VNOVAL) 11257013b092Sespie node->tn_mtime = *mtime; 11264707cbe3Sguenther 11274707cbe3Sguenther if (mtime->tv_nsec != VNOVAL || (vaflags & VA_UTIMES_CHANGE)) 11284707cbe3Sguenther tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_CHANGED); 11294707cbe3Sguenther 11307013b092Sespie VN_KNOTE(vp, NOTE_ATTRIB); 11314707cbe3Sguenther 11327013b092Sespie return 0; 11337013b092Sespie } 11347013b092Sespie 11357013b092Sespie /* 11367013b092Sespie * tmpfs_update: update timestamps, et al. 11377013b092Sespie */ 11387013b092Sespie void 11397013b092Sespie tmpfs_update(tmpfs_node_t *node, int flags) 11407013b092Sespie { 11417013b092Sespie struct timespec nowtm; 11427013b092Sespie 11437013b092Sespie nanotime(&nowtm); 11447013b092Sespie 11457013b092Sespie if (flags & TMPFS_NODE_ACCESSED) { 11467013b092Sespie node->tn_atime = nowtm; 11477013b092Sespie } 11487013b092Sespie if (flags & TMPFS_NODE_MODIFIED) { 11497013b092Sespie node->tn_mtime = nowtm; 11507013b092Sespie } 11517013b092Sespie if (flags & TMPFS_NODE_CHANGED) { 11527013b092Sespie node->tn_ctime = nowtm; 11537013b092Sespie } 11547013b092Sespie } 11557013b092Sespie 11567013b092Sespie int 11577013b092Sespie tmpfs_truncate(struct vnode *vp, off_t length) 11587013b092Sespie { 11597013b092Sespie tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 11607013b092Sespie int error; 11617013b092Sespie 11627013b092Sespie if (length < 0) { 11637013b092Sespie error = EINVAL; 11647013b092Sespie goto out; 11657013b092Sespie } 11667013b092Sespie if (node->tn_size == length) { 11677013b092Sespie error = 0; 11687013b092Sespie goto out; 11697013b092Sespie } 11707013b092Sespie error = tmpfs_reg_resize(vp, length); 11717013b092Sespie if (error == 0) { 11727013b092Sespie tmpfs_update(node, TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED); 11737013b092Sespie } 11747013b092Sespie out: 11757013b092Sespie return error; 11767013b092Sespie } 11777013b092Sespie 11787013b092Sespie int 11797013b092Sespie tmpfs_uio_cached(tmpfs_node_t *node) 11807013b092Sespie { 11817013b092Sespie int pgnum_valid = (node->tn_pgnum != (voff_t)-1); 11827013b092Sespie int pgptr_valid = (node->tn_pgptr != (vaddr_t)NULL); 11837013b092Sespie KASSERT(pgnum_valid == pgptr_valid); 11847013b092Sespie return pgnum_valid && pgptr_valid; 11857013b092Sespie } 11867013b092Sespie 11877013b092Sespie vaddr_t 11887013b092Sespie tmpfs_uio_lookup(tmpfs_node_t *node, voff_t pgnum) 11897013b092Sespie { 11907013b092Sespie if (tmpfs_uio_cached(node) == 1 && node->tn_pgnum == pgnum) 11917013b092Sespie return node->tn_pgptr; 11927013b092Sespie 11937013b092Sespie return (vaddr_t)NULL; 11947013b092Sespie } 11957013b092Sespie 11967013b092Sespie void 11977013b092Sespie tmpfs_uio_uncache(tmpfs_node_t *node) 11987013b092Sespie { 11997013b092Sespie KASSERT(node->tn_pgnum != (voff_t)-1); 12007013b092Sespie KASSERT(node->tn_pgptr != (vaddr_t)NULL); 12017013b092Sespie uvm_unmap(kernel_map, node->tn_pgptr, node->tn_pgptr + PAGE_SIZE); 12027013b092Sespie node->tn_pgnum = (voff_t)-1; 12037013b092Sespie node->tn_pgptr = (vaddr_t)NULL; 12047013b092Sespie } 12057013b092Sespie 12067013b092Sespie void 12077013b092Sespie tmpfs_uio_cache(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgptr) 12087013b092Sespie { 12097013b092Sespie KASSERT(node->tn_pgnum == (voff_t)-1); 12107013b092Sespie KASSERT(node->tn_pgptr == (vaddr_t)NULL); 12117013b092Sespie node->tn_pgnum = pgnum; 12127013b092Sespie node->tn_pgptr = pgptr; 12137013b092Sespie } 12147013b092Sespie 12157013b092Sespie /* 12167013b092Sespie * Be gentle to kernel_map, don't allow more than 4MB in a single transaction. 12177013b092Sespie */ 12187013b092Sespie #define TMPFS_UIO_MAXBYTES ((1 << 22) - PAGE_SIZE) 12197013b092Sespie 12207013b092Sespie int 12217013b092Sespie tmpfs_uiomove(tmpfs_node_t *node, struct uio *uio, vsize_t len) 12227013b092Sespie { 12237013b092Sespie vaddr_t va, pgoff; 12247013b092Sespie int error, adv; 12257013b092Sespie voff_t pgnum; 12267013b092Sespie vsize_t sz; 12277013b092Sespie 12287013b092Sespie pgnum = trunc_page(uio->uio_offset); 12297013b092Sespie pgoff = uio->uio_offset & PAGE_MASK; 12307013b092Sespie 12317013b092Sespie if (pgoff + len < PAGE_SIZE) { 12327013b092Sespie va = tmpfs_uio_lookup(node, pgnum); 12337013b092Sespie if (va != (vaddr_t)NULL) 1234653068caSstefan return uiomove((void *)va + pgoff, len, uio); 12357013b092Sespie } 12367013b092Sespie 12377013b092Sespie if (len >= TMPFS_UIO_MAXBYTES) { 12387013b092Sespie sz = TMPFS_UIO_MAXBYTES; 123915cd8707Sguenther adv = MADV_NORMAL; 12407013b092Sespie } else { 12417013b092Sespie sz = len; 124215cd8707Sguenther adv = MADV_SEQUENTIAL; 12437013b092Sespie } 12447013b092Sespie 12457013b092Sespie if (tmpfs_uio_cached(node)) 12467013b092Sespie tmpfs_uio_uncache(node); 12477013b092Sespie 12487013b092Sespie uao_reference(node->tn_uobj); 12497013b092Sespie 12507013b092Sespie error = uvm_map(kernel_map, &va, round_page(pgoff + sz), node->tn_uobj, 12511e8cdc2eSderaadt trunc_page(uio->uio_offset), 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1252e087cc70Sguenther PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, adv, 0)); 12537013b092Sespie if (error) { 12547013b092Sespie uao_detach(node->tn_uobj); /* Drop reference. */ 12557013b092Sespie return error; 12567013b092Sespie } 12577013b092Sespie 1258653068caSstefan error = uiomove((void *)va + pgoff, sz, uio); 12597013b092Sespie if (error == 0 && pgoff + sz < PAGE_SIZE) 12607013b092Sespie tmpfs_uio_cache(node, pgnum, va); 12617013b092Sespie else 12627013b092Sespie uvm_unmap(kernel_map, va, va + round_page(pgoff + sz)); 12637013b092Sespie 12647013b092Sespie return error; 12657013b092Sespie } 12667013b092Sespie 12677013b092Sespie int 12687013b092Sespie tmpfs_zeropg(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgoff) 12697013b092Sespie { 12707013b092Sespie vaddr_t va; 12717013b092Sespie int error; 12727013b092Sespie 12737013b092Sespie KASSERT(tmpfs_uio_cached(node) == 0); 12747013b092Sespie 12757013b092Sespie uao_reference(node->tn_uobj); 12767013b092Sespie 12777013b092Sespie error = uvm_map(kernel_map, &va, PAGE_SIZE, node->tn_uobj, pgnum, 0, 12781e8cdc2eSderaadt UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 127915cd8707Sguenther MAP_INHERIT_NONE, MADV_NORMAL, 0)); 12807013b092Sespie if (error) { 12817013b092Sespie uao_detach(node->tn_uobj); /* Drop reference. */ 12827013b092Sespie return error; 12837013b092Sespie } 12847013b092Sespie 12857013b092Sespie bzero((void *)va + pgoff, PAGE_SIZE - pgoff); 12867013b092Sespie uvm_unmap(kernel_map, va, va + PAGE_SIZE); 12877013b092Sespie 12887013b092Sespie return 0; 12897013b092Sespie } 1290