1ad3638eeSXin LI /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2d1fa59e9SXin LI 3e08d5567SXin LI /*- 4b61a5730SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 5d63027b6SPedro F. Giffuni * 6d1fa59e9SXin LI * Copyright (c) 2005 The NetBSD Foundation, Inc. 7d1fa59e9SXin LI * All rights reserved. 8d1fa59e9SXin LI * 9d1fa59e9SXin LI * This code is derived from software contributed to The NetBSD Foundation 10d1fa59e9SXin LI * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11d1fa59e9SXin LI * 2005 program. 12d1fa59e9SXin LI * 13d1fa59e9SXin LI * Redistribution and use in source and binary forms, with or without 14d1fa59e9SXin LI * modification, are permitted provided that the following conditions 15d1fa59e9SXin LI * are met: 16d1fa59e9SXin LI * 1. Redistributions of source code must retain the above copyright 17d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer. 18d1fa59e9SXin LI * 2. Redistributions in binary form must reproduce the above copyright 19d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer in the 20d1fa59e9SXin LI * documentation and/or other materials provided with the distribution. 21d1fa59e9SXin LI * 22d1fa59e9SXin LI * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23d1fa59e9SXin LI * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24d1fa59e9SXin LI * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25d1fa59e9SXin LI * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26d1fa59e9SXin LI * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27d1fa59e9SXin LI * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28d1fa59e9SXin LI * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29d1fa59e9SXin LI * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30d1fa59e9SXin LI * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31d1fa59e9SXin LI * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32d1fa59e9SXin LI * POSSIBILITY OF SUCH DAMAGE. 33d1fa59e9SXin LI */ 34d1fa59e9SXin LI 35d1fa59e9SXin LI /* 36d1fa59e9SXin LI * Efficient memory file system supporting functions. 37d1fa59e9SXin LI */ 38fdafd315SWarner Losh 39d1fa59e9SXin LI #include <sys/param.h> 406d2e2df7SMark Johnston #include <sys/systm.h> 41135beaf6SGleb Smirnoff #include <sys/dirent.h> 424fd5efe7SGleb Kurtsou #include <sys/fnv_hash.h> 4389f6b863SAttilio Rao #include <sys/lock.h> 44135beaf6SGleb Smirnoff #include <sys/limits.h> 45135beaf6SGleb Smirnoff #include <sys/mount.h> 46d1fa59e9SXin LI #include <sys/namei.h> 47d1fa59e9SXin LI #include <sys/priv.h> 48d1fa59e9SXin LI #include <sys/proc.h> 49d1b06863SMark Murray #include <sys/random.h> 504601f5f5SKonstantin Belousov #include <sys/refcount.h> 5189f6b863SAttilio Rao #include <sys/rwlock.h> 52081e36e7SKonstantin Belousov #include <sys/smr.h> 53d1fa59e9SXin LI #include <sys/stat.h> 54db94ad12SGleb Kurtsou #include <sys/sysctl.h> 5528bc23abSKonstantin Belousov #include <sys/user.h> 56d1fa59e9SXin LI #include <sys/vnode.h> 57d1fa59e9SXin LI #include <sys/vmmeter.h> 58d1fa59e9SXin LI 59d1fa59e9SXin LI #include <vm/vm.h> 601c771f92SKonstantin Belousov #include <vm/vm_param.h> 61d1fa59e9SXin LI #include <vm/vm_object.h> 62d1fa59e9SXin LI #include <vm/vm_page.h> 632971897dSAlan Cox #include <vm/vm_pageout.h> 64d1fa59e9SXin LI #include <vm/vm_pager.h> 65d1fa59e9SXin LI #include <vm/vm_extern.h> 66135beaf6SGleb Smirnoff #include <vm/swap_pager.h> 676bb132baSBrooks Davis #include <vm/uma.h> 68d1fa59e9SXin LI 69d1fa59e9SXin LI #include <fs/tmpfs/tmpfs.h> 70d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_fifoops.h> 71d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_vnops.h> 72d1fa59e9SXin LI 737029da5cSPawel Biernacki SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 747029da5cSPawel Biernacki "tmpfs file system"); 75db94ad12SGleb Kurtsou 76da7aa277SGleb Kurtsou static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 7763659234SMike Karels static long tmpfs_pages_avail_init; 7863659234SMike Karels static int tmpfs_mem_percent = TMPFS_MEM_PERCENT; 7963659234SMike Karels static void tmpfs_set_reserve_from_percent(void); 80da7aa277SGleb Kurtsou 817c58c37eSMateusz Guzik MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure"); 82a51c8071SKonstantin Belousov static uma_zone_t tmpfs_node_pool; 83172ffe70SMateusz Guzik VFS_SMR_DECLARE; 84a51c8071SKonstantin Belousov 8528bc23abSKonstantin Belousov int tmpfs_pager_type = -1; 8628bc23abSKonstantin Belousov 8728bc23abSKonstantin Belousov static vm_object_t 8828bc23abSKonstantin Belousov tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 8928bc23abSKonstantin Belousov vm_ooffset_t offset, struct ucred *cred) 9028bc23abSKonstantin Belousov { 9128bc23abSKonstantin Belousov vm_object_t object; 9228bc23abSKonstantin Belousov 9328bc23abSKonstantin Belousov MPASS(handle == NULL); 9428bc23abSKonstantin Belousov MPASS(offset == 0); 9528bc23abSKonstantin Belousov object = vm_object_allocate_dyn(tmpfs_pager_type, size, 9628bc23abSKonstantin Belousov OBJ_COLORED | OBJ_SWAP); 9728bc23abSKonstantin Belousov if (!swap_pager_init_object(object, NULL, NULL, size, 0)) { 9828bc23abSKonstantin Belousov vm_object_deallocate(object); 9928bc23abSKonstantin Belousov object = NULL; 10028bc23abSKonstantin Belousov } 10128bc23abSKonstantin Belousov return (object); 10228bc23abSKonstantin Belousov } 10328bc23abSKonstantin Belousov 104eec2e4efSMateusz Guzik /* 105eec2e4efSMateusz Guzik * Make sure tmpfs vnodes with writable mappings can be found on the lazy list. 106eec2e4efSMateusz Guzik * 107eec2e4efSMateusz Guzik * This allows for periodic mtime updates while only scanning vnodes which are 108eec2e4efSMateusz Guzik * plausibly dirty, see tmpfs_update_mtime_lazy. 109eec2e4efSMateusz Guzik */ 110eec2e4efSMateusz Guzik static void 111eec2e4efSMateusz Guzik tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, 112eec2e4efSMateusz Guzik vm_offset_t new) 113eec2e4efSMateusz Guzik { 114eec2e4efSMateusz Guzik struct vnode *vp; 115eec2e4efSMateusz Guzik 116eec2e4efSMateusz Guzik VM_OBJECT_ASSERT_WLOCKED(object); 117eec2e4efSMateusz Guzik 118d9dc64f1SKonstantin Belousov vp = VM_TO_TMPFS_VP(object); 119eec2e4efSMateusz Guzik 120eec2e4efSMateusz Guzik /* 121eec2e4efSMateusz Guzik * Forced unmount? 122eec2e4efSMateusz Guzik */ 12346811949SKonstantin Belousov if (vp == NULL || vp->v_object == NULL) { 124eec2e4efSMateusz Guzik KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 1250f01fb01SKonstantin Belousov ("object %p with OBJ_TMPFS_VREF but without vnode", 1260f01fb01SKonstantin Belousov object)); 127eec2e4efSMateusz Guzik VM_OBJECT_WUNLOCK(object); 128eec2e4efSMateusz Guzik return; 129eec2e4efSMateusz Guzik } 130eec2e4efSMateusz Guzik 131eec2e4efSMateusz Guzik if (old == 0) { 132eec2e4efSMateusz Guzik VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 133eec2e4efSMateusz Guzik ("object without writable mappings has a reference")); 134eec2e4efSMateusz Guzik VNPASS(vp->v_usecount > 0, vp); 135eec2e4efSMateusz Guzik } else { 136eec2e4efSMateusz Guzik VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, 1370f01fb01SKonstantin Belousov ("object with writable mappings does not " 1380f01fb01SKonstantin Belousov "have a reference")); 139eec2e4efSMateusz Guzik } 140eec2e4efSMateusz Guzik 141eec2e4efSMateusz Guzik if (old == new) { 142eec2e4efSMateusz Guzik VM_OBJECT_WUNLOCK(object); 143eec2e4efSMateusz Guzik return; 144eec2e4efSMateusz Guzik } 145eec2e4efSMateusz Guzik 146eec2e4efSMateusz Guzik if (new == 0) { 147eec2e4efSMateusz Guzik vm_object_clear_flag(object, OBJ_TMPFS_VREF); 148eec2e4efSMateusz Guzik VM_OBJECT_WUNLOCK(object); 149eec2e4efSMateusz Guzik vrele(vp); 150eec2e4efSMateusz Guzik } else { 151eec2e4efSMateusz Guzik if ((object->flags & OBJ_TMPFS_VREF) == 0) { 152eec2e4efSMateusz Guzik vref(vp); 153eec2e4efSMateusz Guzik vlazy(vp); 154eec2e4efSMateusz Guzik vm_object_set_flag(object, OBJ_TMPFS_VREF); 155eec2e4efSMateusz Guzik } 156eec2e4efSMateusz Guzik VM_OBJECT_WUNLOCK(object); 157eec2e4efSMateusz Guzik } 158eec2e4efSMateusz Guzik } 159eec2e4efSMateusz Guzik 160eec2e4efSMateusz Guzik static void 161eec2e4efSMateusz Guzik tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start, 162eec2e4efSMateusz Guzik vm_offset_t end) 163eec2e4efSMateusz Guzik { 164eec2e4efSMateusz Guzik vm_offset_t new, old; 165eec2e4efSMateusz Guzik 166eec2e4efSMateusz Guzik VM_OBJECT_WLOCK(object); 167eec2e4efSMateusz Guzik KASSERT((object->flags & OBJ_ANON) == 0, 168eec2e4efSMateusz Guzik ("%s: object %p with OBJ_ANON", __func__, object)); 169eec2e4efSMateusz Guzik old = object->un_pager.swp.writemappings; 170eec2e4efSMateusz Guzik object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; 171eec2e4efSMateusz Guzik new = object->un_pager.swp.writemappings; 172eec2e4efSMateusz Guzik tmpfs_pager_writecount_recalc(object, old, new); 173eec2e4efSMateusz Guzik VM_OBJECT_ASSERT_UNLOCKED(object); 174eec2e4efSMateusz Guzik } 175eec2e4efSMateusz Guzik 176eec2e4efSMateusz Guzik static void 177eec2e4efSMateusz Guzik tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start, 178eec2e4efSMateusz Guzik vm_offset_t end) 179eec2e4efSMateusz Guzik { 180eec2e4efSMateusz Guzik vm_offset_t new, old; 181eec2e4efSMateusz Guzik 182eec2e4efSMateusz Guzik VM_OBJECT_WLOCK(object); 183eec2e4efSMateusz Guzik KASSERT((object->flags & OBJ_ANON) == 0, 184eec2e4efSMateusz Guzik ("%s: object %p with OBJ_ANON", __func__, object)); 185eec2e4efSMateusz Guzik old = object->un_pager.swp.writemappings; 1866ada4e8aSKonstantin Belousov KASSERT(old >= (vm_ooffset_t)end - start, 1876ada4e8aSKonstantin Belousov ("tmpfs obj %p writecount %jx dec %jx", object, (uintmax_t)old, 1886ada4e8aSKonstantin Belousov (uintmax_t)((vm_ooffset_t)end - start))); 189eec2e4efSMateusz Guzik object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; 190eec2e4efSMateusz Guzik new = object->un_pager.swp.writemappings; 191eec2e4efSMateusz Guzik tmpfs_pager_writecount_recalc(object, old, new); 192eec2e4efSMateusz Guzik VM_OBJECT_ASSERT_UNLOCKED(object); 193eec2e4efSMateusz Guzik } 194eec2e4efSMateusz Guzik 19528bc23abSKonstantin Belousov static void 19628bc23abSKonstantin Belousov tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) 19728bc23abSKonstantin Belousov { 19828bc23abSKonstantin Belousov struct vnode *vp; 19928bc23abSKonstantin Belousov 20028bc23abSKonstantin Belousov /* 20128bc23abSKonstantin Belousov * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type 202d9dc64f1SKonstantin Belousov * type. In this case there is no v_writecount to adjust. 20328bc23abSKonstantin Belousov */ 20428bc23abSKonstantin Belousov if (vp_heldp != NULL) 20528bc23abSKonstantin Belousov VM_OBJECT_RLOCK(object); 20628bc23abSKonstantin Belousov else 20728bc23abSKonstantin Belousov VM_OBJECT_ASSERT_LOCKED(object); 20828bc23abSKonstantin Belousov if ((object->flags & OBJ_TMPFS) != 0) { 209d9dc64f1SKonstantin Belousov vp = VM_TO_TMPFS_VP(object); 21028bc23abSKonstantin Belousov if (vp != NULL) { 21128bc23abSKonstantin Belousov *vpp = vp; 21228bc23abSKonstantin Belousov if (vp_heldp != NULL) { 21328bc23abSKonstantin Belousov vhold(vp); 21428bc23abSKonstantin Belousov *vp_heldp = true; 21528bc23abSKonstantin Belousov } 21628bc23abSKonstantin Belousov } 21728bc23abSKonstantin Belousov } 21828bc23abSKonstantin Belousov if (vp_heldp != NULL) 21928bc23abSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 22028bc23abSKonstantin Belousov } 22128bc23abSKonstantin Belousov 22237aea264SKonstantin Belousov static void 22337aea264SKonstantin Belousov tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size) 22437aea264SKonstantin Belousov { 22537aea264SKonstantin Belousov struct tmpfs_node *node; 22637aea264SKonstantin Belousov struct tmpfs_mount *tm; 22737aea264SKonstantin Belousov vm_size_t c; 22837aea264SKonstantin Belousov 22937aea264SKonstantin Belousov swap_pager_freespace(obj, start, size, &c); 23037aea264SKonstantin Belousov if ((obj->flags & OBJ_TMPFS) == 0 || c == 0) 23137aea264SKonstantin Belousov return; 23237aea264SKonstantin Belousov 23337aea264SKonstantin Belousov node = obj->un_pager.swp.swp_priv; 23437aea264SKonstantin Belousov MPASS(node->tn_type == VREG); 23537aea264SKonstantin Belousov tm = node->tn_reg.tn_tmp; 23637aea264SKonstantin Belousov 23737aea264SKonstantin Belousov KASSERT(tm->tm_pages_used >= c, 23837aea264SKonstantin Belousov ("tmpfs tm %p pages %jd free %jd", tm, 23937aea264SKonstantin Belousov (uintmax_t)tm->tm_pages_used, (uintmax_t)c)); 24037aea264SKonstantin Belousov atomic_add_long(&tm->tm_pages_used, -c); 24137aea264SKonstantin Belousov KASSERT(node->tn_reg.tn_pages >= c, 24237aea264SKonstantin Belousov ("tmpfs node %p pages %jd free %jd", node, 24337aea264SKonstantin Belousov (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c)); 24437aea264SKonstantin Belousov node->tn_reg.tn_pages -= c; 24537aea264SKonstantin Belousov } 24637aea264SKonstantin Belousov 24737aea264SKonstantin Belousov static void 24837aea264SKonstantin Belousov tmpfs_page_inserted(vm_object_t obj, vm_page_t m) 24937aea264SKonstantin Belousov { 25037aea264SKonstantin Belousov struct tmpfs_node *node; 25137aea264SKonstantin Belousov struct tmpfs_mount *tm; 25237aea264SKonstantin Belousov 25337aea264SKonstantin Belousov if ((obj->flags & OBJ_TMPFS) == 0) 25437aea264SKonstantin Belousov return; 25537aea264SKonstantin Belousov 25637aea264SKonstantin Belousov node = obj->un_pager.swp.swp_priv; 25737aea264SKonstantin Belousov MPASS(node->tn_type == VREG); 25837aea264SKonstantin Belousov tm = node->tn_reg.tn_tmp; 25937aea264SKonstantin Belousov 26037aea264SKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { 26137aea264SKonstantin Belousov atomic_add_long(&tm->tm_pages_used, 1); 26237aea264SKonstantin Belousov node->tn_reg.tn_pages += 1; 26337aea264SKonstantin Belousov } 26437aea264SKonstantin Belousov } 26537aea264SKonstantin Belousov 26637aea264SKonstantin Belousov static void 26737aea264SKonstantin Belousov tmpfs_page_removed(vm_object_t obj, vm_page_t m) 26837aea264SKonstantin Belousov { 26937aea264SKonstantin Belousov struct tmpfs_node *node; 27037aea264SKonstantin Belousov struct tmpfs_mount *tm; 27137aea264SKonstantin Belousov 27237aea264SKonstantin Belousov if ((obj->flags & OBJ_TMPFS) == 0) 27337aea264SKonstantin Belousov return; 27437aea264SKonstantin Belousov 27537aea264SKonstantin Belousov node = obj->un_pager.swp.swp_priv; 27637aea264SKonstantin Belousov MPASS(node->tn_type == VREG); 27737aea264SKonstantin Belousov tm = node->tn_reg.tn_tmp; 27837aea264SKonstantin Belousov 27937aea264SKonstantin Belousov if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) { 28037aea264SKonstantin Belousov KASSERT(tm->tm_pages_used >= 1, 28137aea264SKonstantin Belousov ("tmpfs tm %p pages %jd free 1", tm, 28237aea264SKonstantin Belousov (uintmax_t)tm->tm_pages_used)); 28337aea264SKonstantin Belousov atomic_add_long(&tm->tm_pages_used, -1); 28437aea264SKonstantin Belousov KASSERT(node->tn_reg.tn_pages >= 1, 28537aea264SKonstantin Belousov ("tmpfs node %p pages %jd free 1", node, 28637aea264SKonstantin Belousov (uintmax_t)node->tn_reg.tn_pages)); 28737aea264SKonstantin Belousov node->tn_reg.tn_pages -= 1; 28837aea264SKonstantin Belousov } 28937aea264SKonstantin Belousov } 29037aea264SKonstantin Belousov 29137aea264SKonstantin Belousov static boolean_t 29237aea264SKonstantin Belousov tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex) 29337aea264SKonstantin Belousov { 29437aea264SKonstantin Belousov struct tmpfs_mount *tm; 29537aea264SKonstantin Belousov 29637aea264SKonstantin Belousov tm = VM_TO_TMPFS_MP(obj); 29737aea264SKonstantin Belousov if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) || 29837aea264SKonstantin Belousov tm->tm_pages_max == 0) 29937aea264SKonstantin Belousov return (true); 300ed19c098SMike Karels if (tm->tm_pages_max == ULONG_MAX) 301ed19c098SMike Karels return (tmpfs_mem_avail() >= 1); 30237aea264SKonstantin Belousov return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used)); 30337aea264SKonstantin Belousov } 30437aea264SKonstantin Belousov 30528bc23abSKonstantin Belousov struct pagerops tmpfs_pager_ops = { 30628bc23abSKonstantin Belousov .pgo_kvme_type = KVME_TYPE_VNODE, 30728bc23abSKonstantin Belousov .pgo_alloc = tmpfs_pager_alloc, 30828bc23abSKonstantin Belousov .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, 309eec2e4efSMateusz Guzik .pgo_update_writecount = tmpfs_pager_update_writecount, 310eec2e4efSMateusz Guzik .pgo_release_writecount = tmpfs_pager_release_writecount, 31128bc23abSKonstantin Belousov .pgo_mightbedirty = vm_object_mightbedirty_, 31228bc23abSKonstantin Belousov .pgo_getvp = tmpfs_pager_getvp, 31337aea264SKonstantin Belousov .pgo_freespace = tmpfs_pager_freespace, 31437aea264SKonstantin Belousov .pgo_page_inserted = tmpfs_page_inserted, 31537aea264SKonstantin Belousov .pgo_page_removed = tmpfs_page_removed, 31637aea264SKonstantin Belousov .pgo_can_alloc_page = tmpfs_can_alloc_page, 31728bc23abSKonstantin Belousov }; 31828bc23abSKonstantin Belousov 319a51c8071SKonstantin Belousov static int 320a51c8071SKonstantin Belousov tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 321a51c8071SKonstantin Belousov { 322a51c8071SKonstantin Belousov struct tmpfs_node *node; 323a51c8071SKonstantin Belousov 324a51c8071SKonstantin Belousov node = mem; 325a51c8071SKonstantin Belousov node->tn_gen++; 326a51c8071SKonstantin Belousov node->tn_size = 0; 327a51c8071SKonstantin Belousov node->tn_status = 0; 328016b7c7eSKonstantin Belousov node->tn_accessed = false; 329a51c8071SKonstantin Belousov node->tn_flags = 0; 330a51c8071SKonstantin Belousov node->tn_links = 0; 331a51c8071SKonstantin Belousov node->tn_vnode = NULL; 332a51c8071SKonstantin Belousov node->tn_vpstate = 0; 333a51c8071SKonstantin Belousov return (0); 334a51c8071SKonstantin Belousov } 335a51c8071SKonstantin Belousov 336a51c8071SKonstantin Belousov static void 337a51c8071SKonstantin Belousov tmpfs_node_dtor(void *mem, int size, void *arg) 338a51c8071SKonstantin Belousov { 339a51c8071SKonstantin Belousov struct tmpfs_node *node; 340a51c8071SKonstantin Belousov 341a51c8071SKonstantin Belousov node = mem; 342a51c8071SKonstantin Belousov node->tn_type = VNON; 343a51c8071SKonstantin Belousov } 344a51c8071SKonstantin Belousov 345a51c8071SKonstantin Belousov static int 346a51c8071SKonstantin Belousov tmpfs_node_init(void *mem, int size, int flags) 347a51c8071SKonstantin Belousov { 348a51c8071SKonstantin Belousov struct tmpfs_node *node; 349a51c8071SKonstantin Belousov 350a51c8071SKonstantin Belousov node = mem; 351a51c8071SKonstantin Belousov node->tn_id = 0; 3526bd3f23aSRyan Libby mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF | MTX_NEW); 353a51c8071SKonstantin Belousov node->tn_gen = arc4random(); 354a51c8071SKonstantin Belousov return (0); 355a51c8071SKonstantin Belousov } 356a51c8071SKonstantin Belousov 357a51c8071SKonstantin Belousov static void 358a51c8071SKonstantin Belousov tmpfs_node_fini(void *mem, int size) 359a51c8071SKonstantin Belousov { 360a51c8071SKonstantin Belousov struct tmpfs_node *node; 361a51c8071SKonstantin Belousov 362a51c8071SKonstantin Belousov node = mem; 363a51c8071SKonstantin Belousov mtx_destroy(&node->tn_interlock); 364a51c8071SKonstantin Belousov } 365a51c8071SKonstantin Belousov 36628bc23abSKonstantin Belousov int 367a51c8071SKonstantin Belousov tmpfs_subr_init(void) 368a51c8071SKonstantin Belousov { 36928bc23abSKonstantin Belousov tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops, 37028bc23abSKonstantin Belousov OBJT_SWAP); 37128bc23abSKonstantin Belousov if (tmpfs_pager_type == -1) 37228bc23abSKonstantin Belousov return (EINVAL); 373a51c8071SKonstantin Belousov tmpfs_node_pool = uma_zcreate("TMPFS node", 374a51c8071SKonstantin Belousov sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, 375a51c8071SKonstantin Belousov tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); 376172ffe70SMateusz Guzik VFS_SMR_ZONE_SET(tmpfs_node_pool); 37763659234SMike Karels 37863659234SMike Karels tmpfs_pages_avail_init = tmpfs_mem_avail(); 37963659234SMike Karels tmpfs_set_reserve_from_percent(); 38028bc23abSKonstantin Belousov return (0); 381a51c8071SKonstantin Belousov } 382a51c8071SKonstantin Belousov 383a51c8071SKonstantin Belousov void 384a51c8071SKonstantin Belousov tmpfs_subr_uninit(void) 385a51c8071SKonstantin Belousov { 38628bc23abSKonstantin Belousov if (tmpfs_pager_type != -1) 38728bc23abSKonstantin Belousov vm_pager_free_dyn_type(tmpfs_pager_type); 38828bc23abSKonstantin Belousov tmpfs_pager_type = -1; 389a51c8071SKonstantin Belousov uma_zdestroy(tmpfs_node_pool); 390a51c8071SKonstantin Belousov } 391a51c8071SKonstantin Belousov 392da7aa277SGleb Kurtsou static int 393da7aa277SGleb Kurtsou sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 394da7aa277SGleb Kurtsou { 395da7aa277SGleb Kurtsou int error; 396da7aa277SGleb Kurtsou long pages, bytes; 397da7aa277SGleb Kurtsou 398da7aa277SGleb Kurtsou pages = *(long *)arg1; 399da7aa277SGleb Kurtsou bytes = pages * PAGE_SIZE; 400da7aa277SGleb Kurtsou 401da7aa277SGleb Kurtsou error = sysctl_handle_long(oidp, &bytes, 0, req); 402da7aa277SGleb Kurtsou if (error || !req->newptr) 403da7aa277SGleb Kurtsou return (error); 404da7aa277SGleb Kurtsou 405da7aa277SGleb Kurtsou pages = bytes / PAGE_SIZE; 406da7aa277SGleb Kurtsou if (pages < TMPFS_PAGES_MINRESERVED) 407da7aa277SGleb Kurtsou return (EINVAL); 408da7aa277SGleb Kurtsou 409da7aa277SGleb Kurtsou *(long *)arg1 = pages; 410da7aa277SGleb Kurtsou return (0); 411da7aa277SGleb Kurtsou } 412da7aa277SGleb Kurtsou 4132a829749SMateusz Guzik SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, 4142a829749SMateusz Guzik CTLTYPE_LONG | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_pages_reserved, 0, 4152a829749SMateusz Guzik sysctl_mem_reserved, "L", 416f8439900SGleb Kurtsou "Amount of available memory and swap below which tmpfs growth stops"); 417da7aa277SGleb Kurtsou 41863659234SMike Karels static int 41963659234SMike Karels sysctl_mem_percent(SYSCTL_HANDLER_ARGS) 42063659234SMike Karels { 42163659234SMike Karels int error, percent; 42263659234SMike Karels 42363659234SMike Karels percent = *(int *)arg1; 42463659234SMike Karels error = sysctl_handle_int(oidp, &percent, 0, req); 42563659234SMike Karels if (error || !req->newptr) 42663659234SMike Karels return (error); 42763659234SMike Karels 42863659234SMike Karels if ((unsigned) percent > 100) 42963659234SMike Karels return (EINVAL); 43063659234SMike Karels 4313cded059SJessica Clarke *(int *)arg1 = percent; 43263659234SMike Karels tmpfs_set_reserve_from_percent(); 43363659234SMike Karels return (0); 43463659234SMike Karels } 43563659234SMike Karels 43663659234SMike Karels static void 43763659234SMike Karels tmpfs_set_reserve_from_percent(void) 43863659234SMike Karels { 43963659234SMike Karels size_t reserved; 44063659234SMike Karels 44163659234SMike Karels reserved = tmpfs_pages_avail_init * (100 - tmpfs_mem_percent) / 100; 44263659234SMike Karels tmpfs_pages_reserved = max(reserved, TMPFS_PAGES_MINRESERVED); 44363659234SMike Karels } 44463659234SMike Karels 44563659234SMike Karels SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_percent, 44663659234SMike Karels CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_mem_percent, 0, 44763659234SMike Karels sysctl_mem_percent, "I", 44863659234SMike Karels "Percent of available memory that can be used if no size limit"); 44963659234SMike Karels 4504fd5efe7SGleb Kurtsou static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 4514fd5efe7SGleb Kurtsou struct tmpfs_dirent *b); 4524fd5efe7SGleb Kurtsou RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 4534fd5efe7SGleb Kurtsou 454da7aa277SGleb Kurtsou size_t 455da7aa277SGleb Kurtsou tmpfs_mem_avail(void) 456da7aa277SGleb Kurtsou { 457f9cc8410SEric van Gyzen size_t avail; 458f9cc8410SEric van Gyzen long reserved; 459da7aa277SGleb Kurtsou 460f9cc8410SEric van Gyzen avail = swap_pager_avail + vm_free_count(); 461f9cc8410SEric van Gyzen reserved = atomic_load_long(&tmpfs_pages_reserved); 462f9cc8410SEric van Gyzen if (__predict_false(avail < reserved)) 463f9cc8410SEric van Gyzen return (0); 464f9cc8410SEric van Gyzen return (avail - reserved); 465da7aa277SGleb Kurtsou } 466da7aa277SGleb Kurtsou 467da7aa277SGleb Kurtsou size_t 468da7aa277SGleb Kurtsou tmpfs_pages_used(struct tmpfs_mount *tmp) 469da7aa277SGleb Kurtsou { 470da7aa277SGleb Kurtsou const size_t node_size = sizeof(struct tmpfs_node) + 471da7aa277SGleb Kurtsou sizeof(struct tmpfs_dirent); 472da7aa277SGleb Kurtsou size_t meta_pages; 473da7aa277SGleb Kurtsou 474da7aa277SGleb Kurtsou meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 475da7aa277SGleb Kurtsou PAGE_SIZE); 476da7aa277SGleb Kurtsou return (meta_pages + tmp->tm_pages_used); 477da7aa277SGleb Kurtsou } 478da7aa277SGleb Kurtsou 47956242a4cSFedor Uporov bool 480da7aa277SGleb Kurtsou tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 481da7aa277SGleb Kurtsou { 482da7aa277SGleb Kurtsou if (tmpfs_mem_avail() < req_pages) 4837f055843SKonstantin Belousov return (false); 484da7aa277SGleb Kurtsou 485ed2159c9SMateusz Guzik if (tmp->tm_pages_max != ULONG_MAX && 486da7aa277SGleb Kurtsou tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 4877f055843SKonstantin Belousov return (false); 488da7aa277SGleb Kurtsou 4897f055843SKonstantin Belousov return (true); 490da7aa277SGleb Kurtsou } 491da7aa277SGleb Kurtsou 492399be910SKa Ho Ng static int 493399be910SKa Ho Ng tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 494399be910SKa Ho Ng int end, boolean_t ignerr) 495399be910SKa Ho Ng { 496399be910SKa Ho Ng vm_page_t m; 497399be910SKa Ho Ng int rv, error; 498399be910SKa Ho Ng 499399be910SKa Ho Ng VM_OBJECT_ASSERT_WLOCKED(object); 500399be910SKa Ho Ng KASSERT(base >= 0, ("%s: base %d", __func__, base)); 501399be910SKa Ho Ng KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 502399be910SKa Ho Ng end)); 503399be910SKa Ho Ng error = 0; 504399be910SKa Ho Ng 505399be910SKa Ho Ng retry: 506399be910SKa Ho Ng m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 507399be910SKa Ho Ng if (m != NULL) { 508399be910SKa Ho Ng MPASS(vm_page_all_valid(m)); 509399be910SKa Ho Ng } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 510399be910SKa Ho Ng m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | 511399be910SKa Ho Ng VM_ALLOC_WAITFAIL); 512399be910SKa Ho Ng if (m == NULL) 513399be910SKa Ho Ng goto retry; 514399be910SKa Ho Ng vm_object_pip_add(object, 1); 515399be910SKa Ho Ng VM_OBJECT_WUNLOCK(object); 516399be910SKa Ho Ng rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 517399be910SKa Ho Ng VM_OBJECT_WLOCK(object); 518399be910SKa Ho Ng vm_object_pip_wakeup(object); 519399be910SKa Ho Ng if (rv == VM_PAGER_OK) { 520399be910SKa Ho Ng /* 521399be910SKa Ho Ng * Since the page was not resident, and therefore not 522399be910SKa Ho Ng * recently accessed, immediately enqueue it for 523399be910SKa Ho Ng * asynchronous laundering. The current operation is 524399be910SKa Ho Ng * not regarded as an access. 525399be910SKa Ho Ng */ 526399be910SKa Ho Ng vm_page_launder(m); 527399be910SKa Ho Ng } else { 528399be910SKa Ho Ng vm_page_free(m); 529399be910SKa Ho Ng m = NULL; 530399be910SKa Ho Ng if (!ignerr) 531399be910SKa Ho Ng error = EIO; 532399be910SKa Ho Ng } 533399be910SKa Ho Ng } 534399be910SKa Ho Ng if (m != NULL) { 535399be910SKa Ho Ng pmap_zero_page_area(m, base, end - base); 536399be910SKa Ho Ng vm_page_set_dirty(m); 537399be910SKa Ho Ng vm_page_xunbusy(m); 538399be910SKa Ho Ng } 539399be910SKa Ho Ng 540399be910SKa Ho Ng return (error); 541399be910SKa Ho Ng } 542399be910SKa Ho Ng 54364c25043SKonstantin Belousov void 54464c25043SKonstantin Belousov tmpfs_ref_node(struct tmpfs_node *node) 54564c25043SKonstantin Belousov { 5464601f5f5SKonstantin Belousov #ifdef INVARIANTS 5474601f5f5SKonstantin Belousov u_int old; 54864c25043SKonstantin Belousov 5494601f5f5SKonstantin Belousov old = 5504601f5f5SKonstantin Belousov #endif 5514601f5f5SKonstantin Belousov refcount_acquire(&node->tn_refcount); 5524601f5f5SKonstantin Belousov #ifdef INVARIANTS 5534601f5f5SKonstantin Belousov KASSERT(old > 0, ("node %p zero refcount", node)); 5544601f5f5SKonstantin Belousov #endif 55564c25043SKonstantin Belousov } 55664c25043SKonstantin Belousov 557d1fa59e9SXin LI /* 558d1fa59e9SXin LI * Allocates a new node of type 'type' inside the 'tmp' mount point, with 559d1fa59e9SXin LI * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 560d1fa59e9SXin LI * using the credentials of the process 'p'. 561d1fa59e9SXin LI * 562d1fa59e9SXin LI * If the node type is set to 'VDIR', then the parent parameter must point 563d1fa59e9SXin LI * to the parent directory of the node being created. It may only be NULL 564d1fa59e9SXin LI * while allocating the root node. 565d1fa59e9SXin LI * 566d1fa59e9SXin LI * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 567d1fa59e9SXin LI * specifies the device the node represents. 568d1fa59e9SXin LI * 569d1fa59e9SXin LI * If the node type is set to 'VLNK', then the parameter target specifies 570d1fa59e9SXin LI * the file name of the target file for the symbolic link that is being 571d1fa59e9SXin LI * created. 572d1fa59e9SXin LI * 573d1fa59e9SXin LI * Note that new nodes are retrieved from the available list if it has 574d1fa59e9SXin LI * items or, if it is empty, from the node pool as long as there is enough 575d1fa59e9SXin LI * space to create them. 576d1fa59e9SXin LI * 577d1fa59e9SXin LI * Returns zero on success or an appropriate error code on failure. 578d1fa59e9SXin LI */ 579d1fa59e9SXin LI int 580ba8cc6d7SMateusz Guzik tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype) type, 581d1fa59e9SXin LI uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 5821493c2eeSBrooks Davis const char *target, dev_t rdev, struct tmpfs_node **node) 583d1fa59e9SXin LI { 584d1fa59e9SXin LI struct tmpfs_node *nnode; 585618029afSMateusz Guzik char *symlink; 586618029afSMateusz Guzik char symlink_smr; 587d1fa59e9SXin LI 588d1fa59e9SXin LI /* If the root directory of the 'tmp' file system is not yet 589d1fa59e9SXin LI * allocated, this must be the request to do it. */ 590d1fa59e9SXin LI MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 591d1fa59e9SXin LI 592b918ee2cSKonstantin Belousov MPASS((type == VLNK) ^ (target == NULL)); 593b918ee2cSKonstantin Belousov MPASS((type == VBLK || type == VCHR) ^ (rdev == VNOVAL)); 594d1fa59e9SXin LI 595189ee6beSJaakko Heinonen if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 5967adb1776SXin LI return (ENOSPC); 5977f055843SKonstantin Belousov if (!tmpfs_pages_check_avail(tmp, 1)) 598da7aa277SGleb Kurtsou return (ENOSPC); 599d1fa59e9SXin LI 6004cda7f7eSKonstantin Belousov if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 6014cda7f7eSKonstantin Belousov /* 6024cda7f7eSKonstantin Belousov * When a new tmpfs node is created for fully 6034cda7f7eSKonstantin Belousov * constructed mount point, there must be a parent 6044cda7f7eSKonstantin Belousov * node, which vnode is locked exclusively. As 6054cda7f7eSKonstantin Belousov * consequence, if the unmount is executing in 6064cda7f7eSKonstantin Belousov * parallel, vflush() cannot reclaim the parent vnode. 6074cda7f7eSKonstantin Belousov * Due to this, the check for MNTK_UNMOUNT flag is not 6084cda7f7eSKonstantin Belousov * racy: if we did not see MNTK_UNMOUNT flag, then tmp 6094cda7f7eSKonstantin Belousov * cannot be destroyed until node construction is 6104cda7f7eSKonstantin Belousov * finished and the parent vnode unlocked. 6114cda7f7eSKonstantin Belousov * 6124cda7f7eSKonstantin Belousov * Tmpfs does not need to instantiate new nodes during 6134cda7f7eSKonstantin Belousov * unmount. 6144cda7f7eSKonstantin Belousov */ 6154cda7f7eSKonstantin Belousov return (EBUSY); 6164cda7f7eSKonstantin Belousov } 617ae265753SKonstantin Belousov if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) 618ae265753SKonstantin Belousov return (EROFS); 6194cda7f7eSKonstantin Belousov 620172ffe70SMateusz Guzik nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); 621d1fa59e9SXin LI 622d1fa59e9SXin LI /* Generic initialization. */ 623d1fa59e9SXin LI nnode->tn_type = type; 6248d5892eeSXin LI vfs_timestamp(&nnode->tn_atime); 625d1fa59e9SXin LI nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 626d1fa59e9SXin LI nnode->tn_atime; 627d1fa59e9SXin LI nnode->tn_uid = uid; 628d1fa59e9SXin LI nnode->tn_gid = gid; 629d1fa59e9SXin LI nnode->tn_mode = mode; 63030e0cf49SMateusz Guzik nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); 63164c25043SKonstantin Belousov nnode->tn_refcount = 1; 63256242a4cSFedor Uporov LIST_INIT(&nnode->tn_extattrs); 633d1fa59e9SXin LI 634d1fa59e9SXin LI /* Type-specific initialization. */ 635d1fa59e9SXin LI switch (nnode->tn_type) { 636d1fa59e9SXin LI case VBLK: 637d1fa59e9SXin LI case VCHR: 638d1fa59e9SXin LI nnode->tn_rdev = rdev; 639d1fa59e9SXin LI break; 640d1fa59e9SXin LI 641d1fa59e9SXin LI case VDIR: 6424fd5efe7SGleb Kurtsou RB_INIT(&nnode->tn_dir.tn_dirhead); 6434fd5efe7SGleb Kurtsou LIST_INIT(&nnode->tn_dir.tn_dupindex); 6447871e52bSXin LI MPASS(parent != nnode); 6457871e52bSXin LI MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 646d1fa59e9SXin LI nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 647d1fa59e9SXin LI nnode->tn_dir.tn_readdir_lastn = 0; 648d1fa59e9SXin LI nnode->tn_dir.tn_readdir_lastp = NULL; 649*8fa5e0f2SJason A. Harmening nnode->tn_dir.tn_wht_size = 0; 650d1fa59e9SXin LI nnode->tn_links++; 65182cf92d4SXin LI TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 652d1fa59e9SXin LI nnode->tn_dir.tn_parent->tn_links++; 65382cf92d4SXin LI TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 654d1fa59e9SXin LI break; 655d1fa59e9SXin LI 656d1fa59e9SXin LI case VFIFO: 657d1fa59e9SXin LI /* FALLTHROUGH */ 658d1fa59e9SXin LI case VSOCK: 659d1fa59e9SXin LI break; 660d1fa59e9SXin LI 661d1fa59e9SXin LI case VLNK: 662d1fa59e9SXin LI MPASS(strlen(target) < MAXPATHLEN); 663d1fa59e9SXin LI nnode->tn_size = strlen(target); 664618029afSMateusz Guzik 665618029afSMateusz Guzik symlink = NULL; 666618029afSMateusz Guzik if (!tmp->tm_nonc) { 6670f01fb01SKonstantin Belousov symlink = cache_symlink_alloc(nnode->tn_size + 1, 6680f01fb01SKonstantin Belousov M_WAITOK); 669618029afSMateusz Guzik symlink_smr = true; 670618029afSMateusz Guzik } 671618029afSMateusz Guzik if (symlink == NULL) { 6720f01fb01SKonstantin Belousov symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, 6730f01fb01SKonstantin Belousov M_WAITOK); 674618029afSMateusz Guzik symlink_smr = false; 675618029afSMateusz Guzik } 676618029afSMateusz Guzik memcpy(symlink, target, nnode->tn_size + 1); 677618029afSMateusz Guzik 678618029afSMateusz Guzik /* 679618029afSMateusz Guzik * Allow safe symlink resolving for lockless lookup. 680618029afSMateusz Guzik * tmpfs_fplookup_symlink references this comment. 681618029afSMateusz Guzik * 682618029afSMateusz Guzik * 1. nnode is not yet visible to the world 683618029afSMateusz Guzik * 2. both tn_link_target and tn_link_smr get populated 684618029afSMateusz Guzik * 3. release fence publishes their content 6850f01fb01SKonstantin Belousov * 4. tn_link_target content is immutable until node 6860f01fb01SKonstantin Belousov * destruction, where the pointer gets set to NULL 687618029afSMateusz Guzik * 5. tn_link_smr is never changed once set 688618029afSMateusz Guzik * 6890f01fb01SKonstantin Belousov * As a result it is sufficient to issue load consume 6900f01fb01SKonstantin Belousov * on the node pointer to also get the above content 6910f01fb01SKonstantin Belousov * in a stable manner. Worst case tn_link_smr flag 6920f01fb01SKonstantin Belousov * may be set to true despite being stale, while the 6930f01fb01SKonstantin Belousov * target buffer is already cleared out. 694618029afSMateusz Guzik */ 695cc96f92aSMateusz Guzik atomic_store_ptr(&nnode->tn_link_target, symlink); 696618029afSMateusz Guzik atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); 697618029afSMateusz Guzik atomic_thread_fence_rel(); 698d1fa59e9SXin LI break; 699d1fa59e9SXin LI 700d1fa59e9SXin LI case VREG: 70180bca63cSKonstantin Belousov nnode->tn_reg.tn_aobj = 70228bc23abSKonstantin Belousov vm_pager_allocate(tmpfs_pager_type, NULL, 0, 7034b8365d7SKonstantin Belousov VM_PROT_DEFAULT, 0, 7043364c323SKonstantin Belousov NULL /* XXXKIB - tmpfs needs swap reservation */); 705d9dc64f1SKonstantin Belousov nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode; 706d9dc64f1SKonstantin Belousov vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS); 707081e36e7SKonstantin Belousov nnode->tn_reg.tn_tmp = tmp; 70837aea264SKonstantin Belousov nnode->tn_reg.tn_pages = 0; 709d1fa59e9SXin LI break; 710d1fa59e9SXin LI 711d1fa59e9SXin LI default: 712bba7ed20SKonstantin Belousov panic("tmpfs_alloc_node: type %p %d", nnode, 713bba7ed20SKonstantin Belousov (int)nnode->tn_type); 714d1fa59e9SXin LI } 715d1fa59e9SXin LI 716d1fa59e9SXin LI TMPFS_LOCK(tmp); 717d1fa59e9SXin LI LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 71864c25043SKonstantin Belousov nnode->tn_attached = true; 719d1fa59e9SXin LI tmp->tm_nodes_inuse++; 72064c25043SKonstantin Belousov tmp->tm_refcount++; 721d1fa59e9SXin LI TMPFS_UNLOCK(tmp); 722d1fa59e9SXin LI 723d1fa59e9SXin LI *node = nnode; 724bba7ed20SKonstantin Belousov return (0); 725d1fa59e9SXin LI } 726d1fa59e9SXin LI 727d1fa59e9SXin LI /* 728d1fa59e9SXin LI * Destroys the node pointed to by node from the file system 'tmp'. 729bba7ed20SKonstantin Belousov * If the node references a directory, no entries are allowed. 730d1fa59e9SXin LI */ 731d1fa59e9SXin LI void 732d1fa59e9SXin LI tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 733d1fa59e9SXin LI { 7344601f5f5SKonstantin Belousov if (refcount_release_if_not_last(&node->tn_refcount)) 7354601f5f5SKonstantin Belousov return; 7360ae6383dSXin LI 737d1fa59e9SXin LI TMPFS_LOCK(tmp); 73864c25043SKonstantin Belousov TMPFS_NODE_LOCK(node); 73964c25043SKonstantin Belousov if (!tmpfs_free_node_locked(tmp, node, false)) { 74064c25043SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 74164c25043SKonstantin Belousov TMPFS_UNLOCK(tmp); 74264c25043SKonstantin Belousov } 74364c25043SKonstantin Belousov } 74464c25043SKonstantin Belousov 74564c25043SKonstantin Belousov bool 74664c25043SKonstantin Belousov tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, 74764c25043SKonstantin Belousov bool detach) 74864c25043SKonstantin Belousov { 74956242a4cSFedor Uporov struct tmpfs_extattr *ea; 75064c25043SKonstantin Belousov vm_object_t uobj; 751618029afSMateusz Guzik char *symlink; 7524601f5f5SKonstantin Belousov bool last; 75364c25043SKonstantin Belousov 75464c25043SKonstantin Belousov TMPFS_MP_ASSERT_LOCKED(tmp); 75564c25043SKonstantin Belousov TMPFS_NODE_ASSERT_LOCKED(node); 75664c25043SKonstantin Belousov 7574601f5f5SKonstantin Belousov last = refcount_release(&node->tn_refcount); 7584601f5f5SKonstantin Belousov if (node->tn_attached && (detach || last)) { 75964c25043SKonstantin Belousov MPASS(tmp->tm_nodes_inuse > 0); 760d1fa59e9SXin LI tmp->tm_nodes_inuse--; 76164c25043SKonstantin Belousov LIST_REMOVE(node, tn_entries); 76264c25043SKonstantin Belousov node->tn_attached = false; 76364c25043SKonstantin Belousov } 7644601f5f5SKonstantin Belousov if (!last) 76564c25043SKonstantin Belousov return (false); 76664c25043SKonstantin Belousov 767f4aa6452SMateusz Guzik TMPFS_NODE_UNLOCK(node); 768f4aa6452SMateusz Guzik 76964c25043SKonstantin Belousov #ifdef INVARIANTS 77064c25043SKonstantin Belousov MPASS(node->tn_vnode == NULL); 77164c25043SKonstantin Belousov MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 772d1fa59e9SXin LI 773f4aa6452SMateusz Guzik /* 7740f01fb01SKonstantin Belousov * Make sure this is a node type we can deal with. Everything 7750f01fb01SKonstantin Belousov * is explicitly enumerated without the 'default' clause so 7760f01fb01SKonstantin Belousov * the compiler can throw an error in case a new type is 7770f01fb01SKonstantin Belousov * added. 778f4aa6452SMateusz Guzik */ 779d1fa59e9SXin LI switch (node->tn_type) { 780d1fa59e9SXin LI case VBLK: 781d1fa59e9SXin LI case VCHR: 782d1fa59e9SXin LI case VDIR: 783d1fa59e9SXin LI case VFIFO: 784d1fa59e9SXin LI case VSOCK: 785d1fa59e9SXin LI case VLNK: 786f4aa6452SMateusz Guzik case VREG: 787f4aa6452SMateusz Guzik break; 788f4aa6452SMateusz Guzik case VNON: 789f4aa6452SMateusz Guzik case VBAD: 790f4aa6452SMateusz Guzik case VMARKER: 7910f01fb01SKonstantin Belousov panic("%s: bad type %d for node %p", __func__, 7920f01fb01SKonstantin Belousov (int)node->tn_type, node); 793f4aa6452SMateusz Guzik } 794f4aa6452SMateusz Guzik #endif 795f4aa6452SMateusz Guzik 79656242a4cSFedor Uporov while ((ea = LIST_FIRST(&node->tn_extattrs)) != NULL) { 79756242a4cSFedor Uporov LIST_REMOVE(ea, ea_extattrs); 79856242a4cSFedor Uporov tmpfs_extattr_free(ea); 79956242a4cSFedor Uporov } 80056242a4cSFedor Uporov 801f4aa6452SMateusz Guzik switch (node->tn_type) { 802f4aa6452SMateusz Guzik case VREG: 803f4aa6452SMateusz Guzik uobj = node->tn_reg.tn_aobj; 80437aea264SKonstantin Belousov node->tn_reg.tn_aobj = NULL; 80537aea264SKonstantin Belousov if (uobj != NULL) { 80637aea264SKonstantin Belousov VM_OBJECT_WLOCK(uobj); 80737aea264SKonstantin Belousov KASSERT((uobj->flags & OBJ_TMPFS) != 0, 80837aea264SKonstantin Belousov ("tmpfs node %p uobj %p not tmpfs", node, uobj)); 80937aea264SKonstantin Belousov vm_object_clear_flag(uobj, OBJ_TMPFS); 81037aea264SKonstantin Belousov KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages, 81137aea264SKonstantin Belousov ("tmpfs tmp %p node %p pages %jd free %jd", tmp, 81237aea264SKonstantin Belousov node, (uintmax_t)tmp->tm_pages_used, 81337aea264SKonstantin Belousov (uintmax_t)node->tn_reg.tn_pages)); 81437aea264SKonstantin Belousov atomic_add_long(&tmp->tm_pages_used, 81537aea264SKonstantin Belousov -node->tn_reg.tn_pages); 81637aea264SKonstantin Belousov VM_OBJECT_WUNLOCK(uobj); 81737aea264SKonstantin Belousov } 818f4aa6452SMateusz Guzik tmpfs_free_tmp(tmp); 81937aea264SKonstantin Belousov 82037aea264SKonstantin Belousov /* 82137aea264SKonstantin Belousov * vm_object_deallocate() must not be called while 82237aea264SKonstantin Belousov * owning tm_allnode_lock, because deallocate might 82337aea264SKonstantin Belousov * sleep. Call it after tmpfs_free_tmp() does the 82437aea264SKonstantin Belousov * unlock. 82537aea264SKonstantin Belousov */ 826d9dc64f1SKonstantin Belousov if (uobj != NULL) 827f4aa6452SMateusz Guzik vm_object_deallocate(uobj); 82837aea264SKonstantin Belousov 829f4aa6452SMateusz Guzik break; 830f4aa6452SMateusz Guzik case VLNK: 831f4aa6452SMateusz Guzik tmpfs_free_tmp(tmp); 832f4aa6452SMateusz Guzik 833618029afSMateusz Guzik symlink = node->tn_link_target; 834cc96f92aSMateusz Guzik atomic_store_ptr(&node->tn_link_target, NULL); 835618029afSMateusz Guzik if (atomic_load_char(&node->tn_link_smr)) { 836618029afSMateusz Guzik cache_symlink_free(symlink, node->tn_size + 1); 837618029afSMateusz Guzik } else { 838618029afSMateusz Guzik free(symlink, M_TMPFSNAME); 839618029afSMateusz Guzik } 840d1fa59e9SXin LI break; 841d1fa59e9SXin LI default: 842f4aa6452SMateusz Guzik tmpfs_free_tmp(tmp); 843f4aa6452SMateusz Guzik break; 844d1fa59e9SXin LI } 845d1fa59e9SXin LI 846172ffe70SMateusz Guzik uma_zfree_smr(tmpfs_node_pool, node); 84764c25043SKonstantin Belousov return (true); 848d1fa59e9SXin LI } 849d1fa59e9SXin LI 8504fd5efe7SGleb Kurtsou static __inline uint32_t 8514fd5efe7SGleb Kurtsou tmpfs_dirent_hash(const char *name, u_int len) 8524fd5efe7SGleb Kurtsou { 8534fd5efe7SGleb Kurtsou uint32_t hash; 8544fd5efe7SGleb Kurtsou 8554fd5efe7SGleb Kurtsou hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 8564fd5efe7SGleb Kurtsou #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 8574fd5efe7SGleb Kurtsou hash &= 0xf; 8584fd5efe7SGleb Kurtsou #endif 8594fd5efe7SGleb Kurtsou if (hash < TMPFS_DIRCOOKIE_MIN) 8604fd5efe7SGleb Kurtsou hash += TMPFS_DIRCOOKIE_MIN; 8614fd5efe7SGleb Kurtsou 8624fd5efe7SGleb Kurtsou return (hash); 8634fd5efe7SGleb Kurtsou } 8644fd5efe7SGleb Kurtsou 8654fd5efe7SGleb Kurtsou static __inline off_t 8664fd5efe7SGleb Kurtsou tmpfs_dirent_cookie(struct tmpfs_dirent *de) 8674fd5efe7SGleb Kurtsou { 86862dca316SBryan Drewery if (de == NULL) 86962dca316SBryan Drewery return (TMPFS_DIRCOOKIE_EOF); 87062dca316SBryan Drewery 8714fd5efe7SGleb Kurtsou MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 8724fd5efe7SGleb Kurtsou 8734fd5efe7SGleb Kurtsou return (de->td_cookie); 8744fd5efe7SGleb Kurtsou } 8754fd5efe7SGleb Kurtsou 8764fd5efe7SGleb Kurtsou static __inline boolean_t 8774fd5efe7SGleb Kurtsou tmpfs_dirent_dup(struct tmpfs_dirent *de) 8784fd5efe7SGleb Kurtsou { 8794fd5efe7SGleb Kurtsou return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 8804fd5efe7SGleb Kurtsou } 8814fd5efe7SGleb Kurtsou 8824fd5efe7SGleb Kurtsou static __inline boolean_t 8834fd5efe7SGleb Kurtsou tmpfs_dirent_duphead(struct tmpfs_dirent *de) 8844fd5efe7SGleb Kurtsou { 8854fd5efe7SGleb Kurtsou return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 8864fd5efe7SGleb Kurtsou } 8874fd5efe7SGleb Kurtsou 8884fd5efe7SGleb Kurtsou void 8894fd5efe7SGleb Kurtsou tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 8904fd5efe7SGleb Kurtsou { 8914fd5efe7SGleb Kurtsou de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 8924fd5efe7SGleb Kurtsou memcpy(de->ud.td_name, name, namelen); 8934fd5efe7SGleb Kurtsou de->td_namelen = namelen; 8944fd5efe7SGleb Kurtsou } 8954fd5efe7SGleb Kurtsou 896d1fa59e9SXin LI /* 897d1fa59e9SXin LI * Allocates a new directory entry for the node node with a name of name. 898d1fa59e9SXin LI * The new directory entry is returned in *de. 899d1fa59e9SXin LI * 900d1fa59e9SXin LI * The link count of node is increased by one to reflect the new object 901d1fa59e9SXin LI * referencing it. 902d1fa59e9SXin LI * 903d1fa59e9SXin LI * Returns zero on success or an appropriate error code on failure. 904d1fa59e9SXin LI */ 905d1fa59e9SXin LI int 906d1fa59e9SXin LI tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 9074fd5efe7SGleb Kurtsou const char *name, u_int len, struct tmpfs_dirent **de) 908d1fa59e9SXin LI { 909d1fa59e9SXin LI struct tmpfs_dirent *nde; 910d1fa59e9SXin LI 9117c58c37eSMateusz Guzik nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK); 912d1fa59e9SXin LI nde->td_node = node; 9134fd5efe7SGleb Kurtsou if (name != NULL) { 9144fd5efe7SGleb Kurtsou nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 9154fd5efe7SGleb Kurtsou tmpfs_dirent_init(nde, name, len); 9164fd5efe7SGleb Kurtsou } else 9174fd5efe7SGleb Kurtsou nde->td_namelen = 0; 91899d57a6bSEd Schouten if (node != NULL) 919d1fa59e9SXin LI node->tn_links++; 920d1fa59e9SXin LI 921d1fa59e9SXin LI *de = nde; 922d1fa59e9SXin LI 923c12118f6SKa Ho Ng return (0); 924d1fa59e9SXin LI } 925d1fa59e9SXin LI 926d1fa59e9SXin LI /* 927d1fa59e9SXin LI * Frees a directory entry. It is the caller's responsibility to destroy 928d1fa59e9SXin LI * the node referenced by it if needed. 929d1fa59e9SXin LI * 930d1fa59e9SXin LI * The link count of node is decreased by one to reflect the removal of an 931d1fa59e9SXin LI * object that referenced it. This only happens if 'node_exists' is true; 932d1fa59e9SXin LI * otherwise the function will not access the node referred to by the 933d1fa59e9SXin LI * directory entry, as it may already have been released from the outside. 934d1fa59e9SXin LI */ 935d1fa59e9SXin LI void 9364fd5efe7SGleb Kurtsou tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 937d1fa59e9SXin LI { 938d1fa59e9SXin LI struct tmpfs_node *node; 939d1fa59e9SXin LI 940d1fa59e9SXin LI node = de->td_node; 94199d57a6bSEd Schouten if (node != NULL) { 942d1fa59e9SXin LI MPASS(node->tn_links > 0); 943d1fa59e9SXin LI node->tn_links--; 944d1fa59e9SXin LI } 9454fd5efe7SGleb Kurtsou if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 9464fd5efe7SGleb Kurtsou free(de->ud.td_name, M_TMPFSNAME); 9477c58c37eSMateusz Guzik free(de, M_TMPFSDIR); 948d1fa59e9SXin LI } 949d1fa59e9SXin LI 950158cc900SKonstantin Belousov void 951158cc900SKonstantin Belousov tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 952158cc900SKonstantin Belousov { 953eec2e4efSMateusz Guzik bool want_vrele; 954158cc900SKonstantin Belousov 95555781cb9SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 956158cc900SKonstantin Belousov if (vp->v_type != VREG || obj == NULL) 957158cc900SKonstantin Belousov return; 958158cc900SKonstantin Belousov 959158cc900SKonstantin Belousov VM_OBJECT_WLOCK(obj); 960158cc900SKonstantin Belousov VI_LOCK(vp); 96146811949SKonstantin Belousov vp->v_object = NULL; 96246811949SKonstantin Belousov 963eec2e4efSMateusz Guzik /* 964eec2e4efSMateusz Guzik * May be going through forced unmount. 965eec2e4efSMateusz Guzik */ 966eec2e4efSMateusz Guzik want_vrele = false; 967eec2e4efSMateusz Guzik if ((obj->flags & OBJ_TMPFS_VREF) != 0) { 968eec2e4efSMateusz Guzik vm_object_clear_flag(obj, OBJ_TMPFS_VREF); 969eec2e4efSMateusz Guzik want_vrele = true; 970eec2e4efSMateusz Guzik } 971eec2e4efSMateusz Guzik 9723c93d227SKonstantin Belousov if (vp->v_writecount < 0) 9733c93d227SKonstantin Belousov vp->v_writecount = 0; 974158cc900SKonstantin Belousov VI_UNLOCK(vp); 975158cc900SKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 976eec2e4efSMateusz Guzik if (want_vrele) { 977eec2e4efSMateusz Guzik vrele(vp); 978eec2e4efSMateusz Guzik } 979158cc900SKonstantin Belousov } 980158cc900SKonstantin Belousov 981158cc900SKonstantin Belousov /* 982d1fa59e9SXin LI * Allocates a new vnode for the node node or returns a new reference to 983d1fa59e9SXin LI * an existing one if the node had already a vnode referencing it. The 984d1fa59e9SXin LI * resulting locked vnode is returned in *vpp. 985d1fa59e9SXin LI * 986d1fa59e9SXin LI * Returns zero on success or an appropriate error code on failure. 987d1fa59e9SXin LI */ 988d1fa59e9SXin LI int 9890ae6383dSXin LI tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 990dfd233edSAttilio Rao struct vnode **vpp) 991d1fa59e9SXin LI { 992d1fa59e9SXin LI struct vnode *vp; 9931abe3656SMateusz Guzik enum vgetstate vs; 99464c25043SKonstantin Belousov struct tmpfs_mount *tm; 9956f2af3fcSKonstantin Belousov vm_object_t object; 9966f2af3fcSKonstantin Belousov int error; 997d1fa59e9SXin LI 9986f2af3fcSKonstantin Belousov error = 0; 99964c25043SKonstantin Belousov tm = VFS_TO_TMPFS(mp); 10000ae6383dSXin LI TMPFS_NODE_LOCK(node); 10014601f5f5SKonstantin Belousov tmpfs_ref_node(node); 100264c25043SKonstantin Belousov loop: 100364c25043SKonstantin Belousov TMPFS_NODE_ASSERT_LOCKED(node); 1004fb755714SXin LI if ((vp = node->tn_vnode) != NULL) { 100582cf92d4SXin LI MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 10068239a7a8SKonstantin Belousov if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 1007abd80ddbSMateusz Guzik (VN_IS_DOOMED(vp) && 10088239a7a8SKonstantin Belousov (lkflag & LK_NOWAIT) != 0)) { 10098239a7a8SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 10108239a7a8SKonstantin Belousov error = ENOENT; 10118239a7a8SKonstantin Belousov vp = NULL; 10128239a7a8SKonstantin Belousov goto out; 10138239a7a8SKonstantin Belousov } 1014abd80ddbSMateusz Guzik if (VN_IS_DOOMED(vp)) { 10158239a7a8SKonstantin Belousov node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 10168239a7a8SKonstantin Belousov while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 10178239a7a8SKonstantin Belousov msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 10188239a7a8SKonstantin Belousov 0, "tmpfsE", 0); 10198239a7a8SKonstantin Belousov } 102064c25043SKonstantin Belousov goto loop; 10218239a7a8SKonstantin Belousov } 10221abe3656SMateusz Guzik vs = vget_prep(vp); 10230ae6383dSXin LI TMPFS_NODE_UNLOCK(node); 10241abe3656SMateusz Guzik error = vget_finish(vp, lkflag, vs); 102564c25043SKonstantin Belousov if (error == ENOENT) { 102664c25043SKonstantin Belousov TMPFS_NODE_LOCK(node); 10278239a7a8SKonstantin Belousov goto loop; 102864c25043SKonstantin Belousov } 1029ca846258SGleb Kurtsou if (error != 0) { 1030ca846258SGleb Kurtsou vp = NULL; 1031ca846258SGleb Kurtsou goto out; 1032ca846258SGleb Kurtsou } 1033d1fa59e9SXin LI 1034d1fa59e9SXin LI /* 1035d1fa59e9SXin LI * Make sure the vnode is still there after 1036d1fa59e9SXin LI * getting the interlock to avoid racing a free. 1037d1fa59e9SXin LI */ 1038439d942bSMateusz Guzik if (node->tn_vnode != vp) { 1039d1fa59e9SXin LI vput(vp); 104064c25043SKonstantin Belousov TMPFS_NODE_LOCK(node); 1041d1fa59e9SXin LI goto loop; 1042d1fa59e9SXin LI } 1043d1fa59e9SXin LI 1044d1fa59e9SXin LI goto out; 1045d1fa59e9SXin LI } 1046d1fa59e9SXin LI 104782cf92d4SXin LI if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 104882cf92d4SXin LI (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 104982cf92d4SXin LI TMPFS_NODE_UNLOCK(node); 105082cf92d4SXin LI error = ENOENT; 105182cf92d4SXin LI vp = NULL; 105282cf92d4SXin LI goto out; 105382cf92d4SXin LI } 105482cf92d4SXin LI 1055d1fa59e9SXin LI /* 1056d1fa59e9SXin LI * otherwise lock the vp list while we call getnewvnode 1057d1fa59e9SXin LI * since that can block. 1058d1fa59e9SXin LI */ 1059d1fa59e9SXin LI if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 1060d1fa59e9SXin LI node->tn_vpstate |= TMPFS_VNODE_WANT; 1061fb755714SXin LI error = msleep((caddr_t) &node->tn_vpstate, 106264c25043SKonstantin Belousov TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); 106364c25043SKonstantin Belousov if (error != 0) 106464c25043SKonstantin Belousov goto out; 1065fb755714SXin LI goto loop; 1066fb755714SXin LI } else 1067d1fa59e9SXin LI node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 1068fb755714SXin LI 1069d1fa59e9SXin LI TMPFS_NODE_UNLOCK(node); 1070d1fa59e9SXin LI 1071d1fa59e9SXin LI /* Get a new vnode and associate it with our node. */ 107200ac6a98SKonstantin Belousov error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? 107300ac6a98SKonstantin Belousov &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); 1074d1fa59e9SXin LI if (error != 0) 1075d1fa59e9SXin LI goto unlock; 1076d1fa59e9SXin LI MPASS(vp != NULL); 1077d1fa59e9SXin LI 1078fd63693dSKonstantin Belousov /* lkflag is ignored, the lock is exclusive */ 1079c8b29d12SMateusz Guzik (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1080d1fa59e9SXin LI 1081d1fa59e9SXin LI vp->v_data = node; 1082d1fa59e9SXin LI vp->v_type = node->tn_type; 1083d1fa59e9SXin LI 1084d1fa59e9SXin LI /* Type-specific initialization. */ 1085d1fa59e9SXin LI switch (node->tn_type) { 1086d1fa59e9SXin LI case VBLK: 1087d1fa59e9SXin LI /* FALLTHROUGH */ 1088d1fa59e9SXin LI case VCHR: 1089fb755714SXin LI /* FALLTHROUGH */ 1090d1fa59e9SXin LI case VLNK: 1091d1fa59e9SXin LI /* FALLTHROUGH */ 1092d1fa59e9SXin LI case VSOCK: 1093d1fa59e9SXin LI break; 1094fb755714SXin LI case VFIFO: 1095fb755714SXin LI vp->v_op = &tmpfs_fifoop_entries; 1096fb755714SXin LI break; 10976f2af3fcSKonstantin Belousov case VREG: 10986f2af3fcSKonstantin Belousov object = node->tn_reg.tn_aobj; 10996f2af3fcSKonstantin Belousov VM_OBJECT_WLOCK(object); 1100eec2e4efSMateusz Guzik KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 1101eec2e4efSMateusz Guzik ("%s: object %p with OBJ_TMPFS_VREF but without vnode", 1102eec2e4efSMateusz Guzik __func__, object)); 11036f2af3fcSKonstantin Belousov VI_LOCK(vp); 11046f2af3fcSKonstantin Belousov KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 11056f2af3fcSKonstantin Belousov vp->v_object = object; 11060f613ab8SKonstantin Belousov vn_irflag_set_locked(vp, (tm->tm_pgread ? VIRF_PGREAD : 0) | 11070f613ab8SKonstantin Belousov VIRF_TEXT_REF); 11086f2af3fcSKonstantin Belousov VI_UNLOCK(vp); 110958d7ac11SKonstantin Belousov VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 111058d7ac11SKonstantin Belousov ("leaked OBJ_TMPFS_VREF")); 111158d7ac11SKonstantin Belousov if (object->un_pager.swp.writemappings > 0) { 111258d7ac11SKonstantin Belousov vrefact(vp); 111358d7ac11SKonstantin Belousov vlazy(vp); 111458d7ac11SKonstantin Belousov vm_object_set_flag(object, OBJ_TMPFS_VREF); 111558d7ac11SKonstantin Belousov } 11166f2af3fcSKonstantin Belousov VM_OBJECT_WUNLOCK(object); 11176f2af3fcSKonstantin Belousov break; 11187871e52bSXin LI case VDIR: 111982cf92d4SXin LI MPASS(node->tn_dir.tn_parent != NULL); 11207871e52bSXin LI if (node->tn_dir.tn_parent == node) 11217871e52bSXin LI vp->v_vflag |= VV_ROOT; 11227871e52bSXin LI break; 1123d1fa59e9SXin LI 1124d1fa59e9SXin LI default: 11251fa8f5f0SXin LI panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 1126d1fa59e9SXin LI } 112760c5c866SKonstantin Belousov if (vp->v_type != VFIFO) 112860c5c866SKonstantin Belousov VN_LOCK_ASHARE(vp); 1129d1fa59e9SXin LI 113066c5fbcaSKonstantin Belousov error = insmntque1(vp, mp); 11315ccdfdabSMateusz Guzik if (error != 0) { 113266c5fbcaSKonstantin Belousov /* Need to clear v_object for insmntque failure. */ 113366c5fbcaSKonstantin Belousov tmpfs_destroy_vobject(vp, vp->v_object); 113466c5fbcaSKonstantin Belousov vp->v_object = NULL; 113566c5fbcaSKonstantin Belousov vp->v_data = NULL; 113666c5fbcaSKonstantin Belousov vp->v_op = &dead_vnodeops; 113766c5fbcaSKonstantin Belousov vgone(vp); 113866c5fbcaSKonstantin Belousov vput(vp); 11390ae6383dSXin LI vp = NULL; 1140829f0bcbSMateusz Guzik } else { 1141829f0bcbSMateusz Guzik vn_set_state(vp, VSTATE_CONSTRUCTED); 11425ccdfdabSMateusz Guzik } 1143d1fa59e9SXin LI 1144d1fa59e9SXin LI unlock: 1145d1fa59e9SXin LI TMPFS_NODE_LOCK(node); 11460ae6383dSXin LI 1147fb755714SXin LI MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 1148d1fa59e9SXin LI node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 11490ae6383dSXin LI node->tn_vnode = vp; 1150d1fa59e9SXin LI 1151d1fa59e9SXin LI if (node->tn_vpstate & TMPFS_VNODE_WANT) { 1152d1fa59e9SXin LI node->tn_vpstate &= ~TMPFS_VNODE_WANT; 1153d1fa59e9SXin LI TMPFS_NODE_UNLOCK(node); 1154d1fa59e9SXin LI wakeup((caddr_t) &node->tn_vpstate); 11558d5892eeSXin LI } else 1156d1fa59e9SXin LI TMPFS_NODE_UNLOCK(node); 1157d1fa59e9SXin LI 1158d1fa59e9SXin LI out: 115964c25043SKonstantin Belousov if (error == 0) { 1160d1fa59e9SXin LI *vpp = vp; 1161d1fa59e9SXin LI 11620ae6383dSXin LI #ifdef INVARIANTS 11639ff2fbdfSKonstantin Belousov MPASS(*vpp != NULL); 11649ff2fbdfSKonstantin Belousov ASSERT_VOP_LOCKED(*vpp, __func__); 11650ae6383dSXin LI TMPFS_NODE_LOCK(node); 1166d1fa59e9SXin LI MPASS(*vpp == node->tn_vnode); 11670ae6383dSXin LI TMPFS_NODE_UNLOCK(node); 11680ae6383dSXin LI #endif 116964c25043SKonstantin Belousov } 117064c25043SKonstantin Belousov tmpfs_free_node(tm, node); 1171d1fa59e9SXin LI 1172bba7ed20SKonstantin Belousov return (error); 1173d1fa59e9SXin LI } 1174d1fa59e9SXin LI 1175d1fa59e9SXin LI /* 1176d1fa59e9SXin LI * Destroys the association between the vnode vp and the node it 1177d1fa59e9SXin LI * references. 1178d1fa59e9SXin LI */ 1179d1fa59e9SXin LI void 1180d1fa59e9SXin LI tmpfs_free_vp(struct vnode *vp) 1181d1fa59e9SXin LI { 1182d1fa59e9SXin LI struct tmpfs_node *node; 1183d1fa59e9SXin LI 1184d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 1185d1fa59e9SXin LI 1186d2ca06cdSKonstantin Belousov TMPFS_NODE_ASSERT_LOCKED(node); 1187d1fa59e9SXin LI node->tn_vnode = NULL; 11888239a7a8SKonstantin Belousov if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 11898239a7a8SKonstantin Belousov wakeup(&node->tn_vnode); 11908239a7a8SKonstantin Belousov node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 1191d1fa59e9SXin LI vp->v_data = NULL; 1192d1fa59e9SXin LI } 1193d1fa59e9SXin LI 1194d1fa59e9SXin LI /* 1195d1fa59e9SXin LI * Allocates a new file of type 'type' and adds it to the parent directory 1196d1fa59e9SXin LI * 'dvp'; this addition is done using the component name given in 'cnp'. 1197d1fa59e9SXin LI * The ownership of the new file is automatically assigned based on the 1198d1fa59e9SXin LI * credentials of the caller (through 'cnp'), the group is set based on 1199d1fa59e9SXin LI * the parent directory and the mode is determined from the 'vap' argument. 1200d1fa59e9SXin LI * If successful, *vpp holds a vnode to the newly created file and zero 1201d1fa59e9SXin LI * is returned. Otherwise *vpp is NULL and the function returns an 1202d1fa59e9SXin LI * appropriate error code. 1203d1fa59e9SXin LI */ 1204d1fa59e9SXin LI int 1205d1fa59e9SXin LI tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 12061493c2eeSBrooks Davis struct componentname *cnp, const char *target) 1207d1fa59e9SXin LI { 1208d1fa59e9SXin LI int error; 1209d1fa59e9SXin LI struct tmpfs_dirent *de; 1210d1fa59e9SXin LI struct tmpfs_mount *tmp; 1211d1fa59e9SXin LI struct tmpfs_node *dnode; 1212d1fa59e9SXin LI struct tmpfs_node *node; 1213d1fa59e9SXin LI struct tmpfs_node *parent; 1214d1fa59e9SXin LI 1215e7e6c820SKonstantin Belousov ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); 1216d1fa59e9SXin LI 1217d1fa59e9SXin LI tmp = VFS_TO_TMPFS(dvp->v_mount); 1218d1fa59e9SXin LI dnode = VP_TO_TMPFS_DIR(dvp); 1219d1fa59e9SXin LI *vpp = NULL; 1220d1fa59e9SXin LI 1221d1fa59e9SXin LI /* If the entry we are creating is a directory, we cannot overflow 1222d1fa59e9SXin LI * the number of links of its parent, because it will get a new 1223d1fa59e9SXin LI * link. */ 1224d1fa59e9SXin LI if (vap->va_type == VDIR) { 1225d1fa59e9SXin LI /* Ensure that we do not overflow the maximum number of links 1226d1fa59e9SXin LI * imposed by the system. */ 122735b1a3abSJohn Baldwin MPASS(dnode->tn_links <= TMPFS_LINK_MAX); 122835b1a3abSJohn Baldwin if (dnode->tn_links == TMPFS_LINK_MAX) { 12297a41bc2fSKonstantin Belousov return (EMLINK); 1230d1fa59e9SXin LI } 1231d1fa59e9SXin LI 1232d1fa59e9SXin LI parent = dnode; 12337871e52bSXin LI MPASS(parent != NULL); 1234d1fa59e9SXin LI } else 1235d1fa59e9SXin LI parent = NULL; 1236d1fa59e9SXin LI 1237d1fa59e9SXin LI /* Allocate a node that represents the new file. */ 12384cda7f7eSKonstantin Belousov error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 1239bba7ed20SKonstantin Belousov cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, 1240bba7ed20SKonstantin Belousov target, vap->va_rdev, &node); 1241d1fa59e9SXin LI if (error != 0) 12427a41bc2fSKonstantin Belousov return (error); 1243d1fa59e9SXin LI 1244d1fa59e9SXin LI /* Allocate a directory entry that points to the new file. */ 1245d1fa59e9SXin LI error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 1246d1fa59e9SXin LI &de); 1247d1fa59e9SXin LI if (error != 0) { 1248d1fa59e9SXin LI tmpfs_free_node(tmp, node); 12497a41bc2fSKonstantin Belousov return (error); 1250d1fa59e9SXin LI } 1251d1fa59e9SXin LI 1252d1fa59e9SXin LI /* Allocate a vnode for the new file. */ 1253dfd233edSAttilio Rao error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 1254d1fa59e9SXin LI if (error != 0) { 12554fd5efe7SGleb Kurtsou tmpfs_free_dirent(tmp, de); 1256d1fa59e9SXin LI tmpfs_free_node(tmp, node); 12577a41bc2fSKonstantin Belousov return (error); 1258d1fa59e9SXin LI } 1259d1fa59e9SXin LI 1260d1fa59e9SXin LI /* Now that all required items are allocated, we can proceed to 1261d1fa59e9SXin LI * insert the new node into the directory, an operation that 1262d1fa59e9SXin LI * cannot fail. */ 126399d57a6bSEd Schouten if (cnp->cn_flags & ISWHITEOUT) 126499d57a6bSEd Schouten tmpfs_dir_whiteout_remove(dvp, cnp); 1265d1fa59e9SXin LI tmpfs_dir_attach(dvp, de); 12667a41bc2fSKonstantin Belousov return (0); 1267d1fa59e9SXin LI } 1268d1fa59e9SXin LI 12691c07d69bSKonstantin Belousov struct tmpfs_dirent * 12704fd5efe7SGleb Kurtsou tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 12714fd5efe7SGleb Kurtsou { 12724fd5efe7SGleb Kurtsou struct tmpfs_dirent *de; 12734fd5efe7SGleb Kurtsou 12744fd5efe7SGleb Kurtsou de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 12754fd5efe7SGleb Kurtsou dc->tdc_tree = de; 12764fd5efe7SGleb Kurtsou if (de != NULL && tmpfs_dirent_duphead(de)) 12774fd5efe7SGleb Kurtsou de = LIST_FIRST(&de->ud.td_duphead); 12784fd5efe7SGleb Kurtsou dc->tdc_current = de; 12794fd5efe7SGleb Kurtsou 12804fd5efe7SGleb Kurtsou return (dc->tdc_current); 12814fd5efe7SGleb Kurtsou } 12824fd5efe7SGleb Kurtsou 12831c07d69bSKonstantin Belousov struct tmpfs_dirent * 12844fd5efe7SGleb Kurtsou tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 12854fd5efe7SGleb Kurtsou { 12864fd5efe7SGleb Kurtsou struct tmpfs_dirent *de; 12874fd5efe7SGleb Kurtsou 12884fd5efe7SGleb Kurtsou MPASS(dc->tdc_tree != NULL); 12894fd5efe7SGleb Kurtsou if (tmpfs_dirent_dup(dc->tdc_current)) { 12904fd5efe7SGleb Kurtsou dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 12914fd5efe7SGleb Kurtsou if (dc->tdc_current != NULL) 12924fd5efe7SGleb Kurtsou return (dc->tdc_current); 12934fd5efe7SGleb Kurtsou } 12944fd5efe7SGleb Kurtsou dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 12954fd5efe7SGleb Kurtsou &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 12964fd5efe7SGleb Kurtsou if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 12974fd5efe7SGleb Kurtsou dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 12984fd5efe7SGleb Kurtsou MPASS(dc->tdc_current != NULL); 12994fd5efe7SGleb Kurtsou } 13004fd5efe7SGleb Kurtsou 13014fd5efe7SGleb Kurtsou return (dc->tdc_current); 13024fd5efe7SGleb Kurtsou } 13034fd5efe7SGleb Kurtsou 13044fd5efe7SGleb Kurtsou /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 13054fd5efe7SGleb Kurtsou static struct tmpfs_dirent * 13064fd5efe7SGleb Kurtsou tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 13074fd5efe7SGleb Kurtsou { 13084fd5efe7SGleb Kurtsou struct tmpfs_dirent *de, dekey; 13094fd5efe7SGleb Kurtsou 13104fd5efe7SGleb Kurtsou dekey.td_hash = hash; 13114fd5efe7SGleb Kurtsou de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 13124fd5efe7SGleb Kurtsou return (de); 13134fd5efe7SGleb Kurtsou } 13144fd5efe7SGleb Kurtsou 13154fd5efe7SGleb Kurtsou /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 13164fd5efe7SGleb Kurtsou static struct tmpfs_dirent * 13174fd5efe7SGleb Kurtsou tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 13184fd5efe7SGleb Kurtsou struct tmpfs_dir_cursor *dc) 13194fd5efe7SGleb Kurtsou { 13204fd5efe7SGleb Kurtsou struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 13214fd5efe7SGleb Kurtsou struct tmpfs_dirent *de, dekey; 13224fd5efe7SGleb Kurtsou 13234fd5efe7SGleb Kurtsou MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 13244fd5efe7SGleb Kurtsou 13254fd5efe7SGleb Kurtsou if (cookie == node->tn_dir.tn_readdir_lastn && 13264fd5efe7SGleb Kurtsou (de = node->tn_dir.tn_readdir_lastp) != NULL) { 13274fd5efe7SGleb Kurtsou /* Protect against possible race, tn_readdir_last[pn] 13284fd5efe7SGleb Kurtsou * may be updated with only shared vnode lock held. */ 13294fd5efe7SGleb Kurtsou if (cookie == tmpfs_dirent_cookie(de)) 13304fd5efe7SGleb Kurtsou goto out; 13314fd5efe7SGleb Kurtsou } 13324fd5efe7SGleb Kurtsou 13334fd5efe7SGleb Kurtsou if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 13344fd5efe7SGleb Kurtsou LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 13354fd5efe7SGleb Kurtsou uh.td_dup.index_entries) { 13364fd5efe7SGleb Kurtsou MPASS(tmpfs_dirent_dup(de)); 13374fd5efe7SGleb Kurtsou if (de->td_cookie == cookie) 13384fd5efe7SGleb Kurtsou goto out; 13394fd5efe7SGleb Kurtsou /* dupindex list is sorted. */ 13404fd5efe7SGleb Kurtsou if (de->td_cookie < cookie) { 13414fd5efe7SGleb Kurtsou de = NULL; 13424fd5efe7SGleb Kurtsou goto out; 13434fd5efe7SGleb Kurtsou } 13444fd5efe7SGleb Kurtsou } 13454fd5efe7SGleb Kurtsou MPASS(de == NULL); 13464fd5efe7SGleb Kurtsou goto out; 13474fd5efe7SGleb Kurtsou } 13484fd5efe7SGleb Kurtsou 134915ad3e51SKonstantin Belousov if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 135015ad3e51SKonstantin Belousov de = NULL; 135115ad3e51SKonstantin Belousov } else { 13524fd5efe7SGleb Kurtsou dekey.td_hash = cookie; 13534fd5efe7SGleb Kurtsou /* Recover if direntry for cookie was removed */ 13544fd5efe7SGleb Kurtsou de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 135515ad3e51SKonstantin Belousov } 13564fd5efe7SGleb Kurtsou dc->tdc_tree = de; 13574fd5efe7SGleb Kurtsou dc->tdc_current = de; 13584fd5efe7SGleb Kurtsou if (de != NULL && tmpfs_dirent_duphead(de)) { 13594fd5efe7SGleb Kurtsou dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 13604fd5efe7SGleb Kurtsou MPASS(dc->tdc_current != NULL); 13614fd5efe7SGleb Kurtsou } 13624fd5efe7SGleb Kurtsou return (dc->tdc_current); 13634fd5efe7SGleb Kurtsou 13644fd5efe7SGleb Kurtsou out: 13654fd5efe7SGleb Kurtsou dc->tdc_tree = de; 13664fd5efe7SGleb Kurtsou dc->tdc_current = de; 13674fd5efe7SGleb Kurtsou if (de != NULL && tmpfs_dirent_dup(de)) 13684fd5efe7SGleb Kurtsou dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 13694fd5efe7SGleb Kurtsou de->td_hash); 13704fd5efe7SGleb Kurtsou return (dc->tdc_current); 13714fd5efe7SGleb Kurtsou } 13724fd5efe7SGleb Kurtsou 13734fd5efe7SGleb Kurtsou /* 13744fd5efe7SGleb Kurtsou * Looks for a directory entry in the directory represented by node. 13754fd5efe7SGleb Kurtsou * 'cnp' describes the name of the entry to look for. Note that the . 13764fd5efe7SGleb Kurtsou * and .. components are not allowed as they do not physically exist 13774fd5efe7SGleb Kurtsou * within directories. 13784fd5efe7SGleb Kurtsou * 13794fd5efe7SGleb Kurtsou * Returns a pointer to the entry when found, otherwise NULL. 13804fd5efe7SGleb Kurtsou */ 13814fd5efe7SGleb Kurtsou struct tmpfs_dirent * 13824fd5efe7SGleb Kurtsou tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 13834fd5efe7SGleb Kurtsou struct componentname *cnp) 13844fd5efe7SGleb Kurtsou { 13854fd5efe7SGleb Kurtsou struct tmpfs_dir_duphead *duphead; 13864fd5efe7SGleb Kurtsou struct tmpfs_dirent *de; 13874fd5efe7SGleb Kurtsou uint32_t hash; 13884fd5efe7SGleb Kurtsou 13894fd5efe7SGleb Kurtsou MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 13904fd5efe7SGleb Kurtsou MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 13914fd5efe7SGleb Kurtsou cnp->cn_nameptr[1] == '.'))); 13924fd5efe7SGleb Kurtsou TMPFS_VALIDATE_DIR(node); 13934fd5efe7SGleb Kurtsou 13944fd5efe7SGleb Kurtsou hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 13954fd5efe7SGleb Kurtsou de = tmpfs_dir_xlookup_hash(node, hash); 13964fd5efe7SGleb Kurtsou if (de != NULL && tmpfs_dirent_duphead(de)) { 13974fd5efe7SGleb Kurtsou duphead = &de->ud.td_duphead; 13984fd5efe7SGleb Kurtsou LIST_FOREACH(de, duphead, uh.td_dup.entries) { 13994fd5efe7SGleb Kurtsou if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 14004fd5efe7SGleb Kurtsou cnp->cn_namelen)) 14014fd5efe7SGleb Kurtsou break; 14024fd5efe7SGleb Kurtsou } 14034fd5efe7SGleb Kurtsou } else if (de != NULL) { 14044fd5efe7SGleb Kurtsou if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 14054fd5efe7SGleb Kurtsou cnp->cn_namelen)) 14064fd5efe7SGleb Kurtsou de = NULL; 14074fd5efe7SGleb Kurtsou } 14084fd5efe7SGleb Kurtsou if (de != NULL && f != NULL && de->td_node != f) 14094fd5efe7SGleb Kurtsou de = NULL; 14104fd5efe7SGleb Kurtsou 14114fd5efe7SGleb Kurtsou return (de); 14124fd5efe7SGleb Kurtsou } 14134fd5efe7SGleb Kurtsou 14144fd5efe7SGleb Kurtsou /* 14154fd5efe7SGleb Kurtsou * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 14164fd5efe7SGleb Kurtsou * list, allocate new cookie value. 14174fd5efe7SGleb Kurtsou */ 14184fd5efe7SGleb Kurtsou static void 14194fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 14204fd5efe7SGleb Kurtsou struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 14214fd5efe7SGleb Kurtsou { 14224fd5efe7SGleb Kurtsou struct tmpfs_dir_duphead *dupindex; 14234fd5efe7SGleb Kurtsou struct tmpfs_dirent *de, *pde; 14244fd5efe7SGleb Kurtsou 14254fd5efe7SGleb Kurtsou dupindex = &dnode->tn_dir.tn_dupindex; 14264fd5efe7SGleb Kurtsou de = LIST_FIRST(dupindex); 14274fd5efe7SGleb Kurtsou if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 14284fd5efe7SGleb Kurtsou if (de == NULL) 14294fd5efe7SGleb Kurtsou nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 14304fd5efe7SGleb Kurtsou else 14314fd5efe7SGleb Kurtsou nde->td_cookie = de->td_cookie + 1; 14324fd5efe7SGleb Kurtsou MPASS(tmpfs_dirent_dup(nde)); 14334fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 14344fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 14354fd5efe7SGleb Kurtsou return; 14364fd5efe7SGleb Kurtsou } 14374fd5efe7SGleb Kurtsou 14384fd5efe7SGleb Kurtsou /* 14394fd5efe7SGleb Kurtsou * Cookie numbers are near exhaustion. Scan dupindex list for unused 14404fd5efe7SGleb Kurtsou * numbers. dupindex list is sorted in descending order. Keep it so 14414fd5efe7SGleb Kurtsou * after inserting nde. 14424fd5efe7SGleb Kurtsou */ 14434fd5efe7SGleb Kurtsou while (1) { 14444fd5efe7SGleb Kurtsou pde = de; 14454fd5efe7SGleb Kurtsou de = LIST_NEXT(de, uh.td_dup.index_entries); 14464fd5efe7SGleb Kurtsou if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 14474fd5efe7SGleb Kurtsou /* 14484fd5efe7SGleb Kurtsou * Last element of the index doesn't have minimal cookie 14494fd5efe7SGleb Kurtsou * value, use it. 14504fd5efe7SGleb Kurtsou */ 14514fd5efe7SGleb Kurtsou nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 14524fd5efe7SGleb Kurtsou LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 14534fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 14544fd5efe7SGleb Kurtsou return; 14554fd5efe7SGleb Kurtsou } else if (de == NULL) { 14564fd5efe7SGleb Kurtsou /* 14574fd5efe7SGleb Kurtsou * We are so lucky have 2^30 hash duplicates in single 14584fd5efe7SGleb Kurtsou * directory :) Return largest possible cookie value. 14594fd5efe7SGleb Kurtsou * It should be fine except possible issues with 14604fd5efe7SGleb Kurtsou * VOP_READDIR restart. 14614fd5efe7SGleb Kurtsou */ 14624fd5efe7SGleb Kurtsou nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 14634fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(dupindex, nde, 14644fd5efe7SGleb Kurtsou uh.td_dup.index_entries); 14654fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 14664fd5efe7SGleb Kurtsou return; 14674fd5efe7SGleb Kurtsou } 14684fd5efe7SGleb Kurtsou if (de->td_cookie + 1 == pde->td_cookie || 14694fd5efe7SGleb Kurtsou de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 14704fd5efe7SGleb Kurtsou continue; /* No hole or invalid cookie. */ 14714fd5efe7SGleb Kurtsou nde->td_cookie = de->td_cookie + 1; 14724fd5efe7SGleb Kurtsou MPASS(tmpfs_dirent_dup(nde)); 14734fd5efe7SGleb Kurtsou MPASS(pde->td_cookie > nde->td_cookie); 14744fd5efe7SGleb Kurtsou MPASS(nde->td_cookie > de->td_cookie); 14754fd5efe7SGleb Kurtsou LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 14764fd5efe7SGleb Kurtsou LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 14774fd5efe7SGleb Kurtsou return; 147874b8d63dSPedro F. Giffuni } 14794fd5efe7SGleb Kurtsou } 14804fd5efe7SGleb Kurtsou 1481d1fa59e9SXin LI /* 1482d1fa59e9SXin LI * Attaches the directory entry de to the directory represented by vp. 1483d1fa59e9SXin LI * Note that this does not change the link count of the node pointed by 1484d1fa59e9SXin LI * the directory entry, as this is done by tmpfs_alloc_dirent. 1485d1fa59e9SXin LI */ 1486d1fa59e9SXin LI void 1487d1fa59e9SXin LI tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 1488d1fa59e9SXin LI { 1489d1fa59e9SXin LI struct tmpfs_node *dnode; 14904fd5efe7SGleb Kurtsou struct tmpfs_dirent *xde, *nde; 1491d1fa59e9SXin LI 1492fb755714SXin LI ASSERT_VOP_ELOCKED(vp, __func__); 14934fd5efe7SGleb Kurtsou MPASS(de->td_namelen > 0); 14944fd5efe7SGleb Kurtsou MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 14954fd5efe7SGleb Kurtsou MPASS(de->td_cookie == de->td_hash); 14964fd5efe7SGleb Kurtsou 1497d1fa59e9SXin LI dnode = VP_TO_TMPFS_DIR(vp); 14984fd5efe7SGleb Kurtsou dnode->tn_dir.tn_readdir_lastn = 0; 14994fd5efe7SGleb Kurtsou dnode->tn_dir.tn_readdir_lastp = NULL; 15004fd5efe7SGleb Kurtsou 15014fd5efe7SGleb Kurtsou MPASS(!tmpfs_dirent_dup(de)); 15024fd5efe7SGleb Kurtsou xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 15034fd5efe7SGleb Kurtsou if (xde != NULL && tmpfs_dirent_duphead(xde)) 15044fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 15054fd5efe7SGleb Kurtsou else if (xde != NULL) { 15064fd5efe7SGleb Kurtsou /* 15074fd5efe7SGleb Kurtsou * Allocate new duphead. Swap xde with duphead to avoid 15084fd5efe7SGleb Kurtsou * adding/removing elements with the same hash. 15094fd5efe7SGleb Kurtsou */ 15104fd5efe7SGleb Kurtsou MPASS(!tmpfs_dirent_dup(xde)); 15114fd5efe7SGleb Kurtsou tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 15124fd5efe7SGleb Kurtsou &nde); 15134fd5efe7SGleb Kurtsou /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 15144fd5efe7SGleb Kurtsou memcpy(nde, xde, sizeof(*xde)); 15154fd5efe7SGleb Kurtsou xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 15164fd5efe7SGleb Kurtsou LIST_INIT(&xde->ud.td_duphead); 15174fd5efe7SGleb Kurtsou xde->td_namelen = 0; 15184fd5efe7SGleb Kurtsou xde->td_node = NULL; 15194fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 15204fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 15214fd5efe7SGleb Kurtsou } 1522d1fa59e9SXin LI dnode->tn_size += sizeof(struct tmpfs_dirent); 1523016b7c7eSKonstantin Belousov dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1524016b7c7eSKonstantin Belousov dnode->tn_accessed = true; 1525e0a60ae1SKonstantin Belousov tmpfs_update(vp); 1526d1fa59e9SXin LI } 1527d1fa59e9SXin LI 1528d1fa59e9SXin LI /* 1529d1fa59e9SXin LI * Detaches the directory entry de from the directory represented by vp. 1530d1fa59e9SXin LI * Note that this does not change the link count of the node pointed by 1531d1fa59e9SXin LI * the directory entry, as this is done by tmpfs_free_dirent. 1532d1fa59e9SXin LI */ 1533d1fa59e9SXin LI void 1534d1fa59e9SXin LI tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1535d1fa59e9SXin LI { 15364fd5efe7SGleb Kurtsou struct tmpfs_mount *tmp; 15374fd5efe7SGleb Kurtsou struct tmpfs_dir *head; 1538d1fa59e9SXin LI struct tmpfs_node *dnode; 15394fd5efe7SGleb Kurtsou struct tmpfs_dirent *xde; 1540d1fa59e9SXin LI 1541fb755714SXin LI ASSERT_VOP_ELOCKED(vp, __func__); 1542d1fa59e9SXin LI 15434fd5efe7SGleb Kurtsou dnode = VP_TO_TMPFS_DIR(vp); 15444fd5efe7SGleb Kurtsou head = &dnode->tn_dir.tn_dirhead; 1545d1fa59e9SXin LI dnode->tn_dir.tn_readdir_lastn = 0; 1546d1fa59e9SXin LI dnode->tn_dir.tn_readdir_lastp = NULL; 1547d1fa59e9SXin LI 15484fd5efe7SGleb Kurtsou if (tmpfs_dirent_dup(de)) { 15494fd5efe7SGleb Kurtsou /* Remove duphead if de was last entry. */ 15504fd5efe7SGleb Kurtsou if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 15514fd5efe7SGleb Kurtsou xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 15524fd5efe7SGleb Kurtsou MPASS(tmpfs_dirent_duphead(xde)); 15534fd5efe7SGleb Kurtsou } else 15544fd5efe7SGleb Kurtsou xde = NULL; 15554fd5efe7SGleb Kurtsou LIST_REMOVE(de, uh.td_dup.entries); 15564fd5efe7SGleb Kurtsou LIST_REMOVE(de, uh.td_dup.index_entries); 15574fd5efe7SGleb Kurtsou if (xde != NULL) { 15584fd5efe7SGleb Kurtsou if (LIST_EMPTY(&xde->ud.td_duphead)) { 15594fd5efe7SGleb Kurtsou RB_REMOVE(tmpfs_dir, head, xde); 15604fd5efe7SGleb Kurtsou tmp = VFS_TO_TMPFS(vp->v_mount); 15614fd5efe7SGleb Kurtsou MPASS(xde->td_node == NULL); 15624fd5efe7SGleb Kurtsou tmpfs_free_dirent(tmp, xde); 15634fd5efe7SGleb Kurtsou } 15644fd5efe7SGleb Kurtsou } 156585512850SKonstantin Belousov de->td_cookie = de->td_hash; 15664fd5efe7SGleb Kurtsou } else 15674fd5efe7SGleb Kurtsou RB_REMOVE(tmpfs_dir, head, de); 15684fd5efe7SGleb Kurtsou 1569d1fa59e9SXin LI dnode->tn_size -= sizeof(struct tmpfs_dirent); 1570016b7c7eSKonstantin Belousov dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1571016b7c7eSKonstantin Belousov dnode->tn_accessed = true; 1572e0a60ae1SKonstantin Belousov tmpfs_update(vp); 1573d1fa59e9SXin LI } 1574d1fa59e9SXin LI 15754fd5efe7SGleb Kurtsou void 15764fd5efe7SGleb Kurtsou tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1577d1fa59e9SXin LI { 15784fd5efe7SGleb Kurtsou struct tmpfs_dirent *de, *dde, *nde; 1579d1fa59e9SXin LI 15804fd5efe7SGleb Kurtsou RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 15814fd5efe7SGleb Kurtsou RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 15824fd5efe7SGleb Kurtsou /* Node may already be destroyed. */ 15834fd5efe7SGleb Kurtsou de->td_node = NULL; 15844fd5efe7SGleb Kurtsou if (tmpfs_dirent_duphead(de)) { 15854fd5efe7SGleb Kurtsou while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 15864fd5efe7SGleb Kurtsou LIST_REMOVE(dde, uh.td_dup.entries); 15874fd5efe7SGleb Kurtsou dde->td_node = NULL; 15884fd5efe7SGleb Kurtsou tmpfs_free_dirent(tmp, dde); 1589d1fa59e9SXin LI } 1590d1fa59e9SXin LI } 15914fd5efe7SGleb Kurtsou tmpfs_free_dirent(tmp, de); 15924fd5efe7SGleb Kurtsou } 1593d1fa59e9SXin LI } 1594d1fa59e9SXin LI 1595d1fa59e9SXin LI /* 1596d1fa59e9SXin LI * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1597d1fa59e9SXin LI * directory and returns it in the uio space. The function returns 0 1598d1fa59e9SXin LI * on success, -1 if there was not enough space in the uio structure to 1599d1fa59e9SXin LI * hold the directory entry or an appropriate error code if another 1600d1fa59e9SXin LI * error happens. 1601d1fa59e9SXin LI */ 16024fd5efe7SGleb Kurtsou static int 1603e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1604e1cdc30fSKonstantin Belousov struct uio *uio) 1605d1fa59e9SXin LI { 1606d1fa59e9SXin LI int error; 1607d1fa59e9SXin LI struct dirent dent; 1608d1fa59e9SXin LI 1609d1fa59e9SXin LI TMPFS_VALIDATE_DIR(node); 1610d1fa59e9SXin LI MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1611d1fa59e9SXin LI 1612d1fa59e9SXin LI dent.d_fileno = node->tn_id; 161390f580b9SMark Johnston dent.d_off = TMPFS_DIRCOOKIE_DOTDOT; 1614d1fa59e9SXin LI dent.d_type = DT_DIR; 1615d1fa59e9SXin LI dent.d_namlen = 1; 1616d1fa59e9SXin LI dent.d_name[0] = '.'; 1617d1fa59e9SXin LI dent.d_reclen = GENERIC_DIRSIZ(&dent); 16186d2e2df7SMark Johnston dirent_terminate(&dent); 1619d1fa59e9SXin LI 1620d1fa59e9SXin LI if (dent.d_reclen > uio->uio_resid) 16214fd5efe7SGleb Kurtsou error = EJUSTRETURN; 16224fd5efe7SGleb Kurtsou else 1623d1fa59e9SXin LI error = uiomove(&dent, dent.d_reclen, uio); 1624d1fa59e9SXin LI 1625016b7c7eSKonstantin Belousov tmpfs_set_accessed(tm, node); 1626d1fa59e9SXin LI 16275dc11286SKonstantin Belousov return (error); 1628d1fa59e9SXin LI } 1629d1fa59e9SXin LI 1630d1fa59e9SXin LI /* 1631d1fa59e9SXin LI * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1632d1fa59e9SXin LI * directory and returns it in the uio space. The function returns 0 1633d1fa59e9SXin LI * on success, -1 if there was not enough space in the uio structure to 1634d1fa59e9SXin LI * hold the directory entry or an appropriate error code if another 1635d1fa59e9SXin LI * error happens. 1636d1fa59e9SXin LI */ 16374fd5efe7SGleb Kurtsou static int 1638e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 163990f580b9SMark Johnston struct uio *uio, off_t next) 1640d1fa59e9SXin LI { 1641c5dac63cSKonstantin Belousov struct tmpfs_node *parent; 1642d1fa59e9SXin LI struct dirent dent; 1643c5dac63cSKonstantin Belousov int error; 1644d1fa59e9SXin LI 1645d1fa59e9SXin LI TMPFS_VALIDATE_DIR(node); 1646d1fa59e9SXin LI MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1647d1fa59e9SXin LI 164882cf92d4SXin LI /* 164982cf92d4SXin LI * Return ENOENT if the current node is already removed. 165082cf92d4SXin LI */ 165182cf92d4SXin LI TMPFS_ASSERT_LOCKED(node); 1652c5dac63cSKonstantin Belousov parent = node->tn_dir.tn_parent; 1653c5dac63cSKonstantin Belousov if (parent == NULL) 165482cf92d4SXin LI return (ENOENT); 165582cf92d4SXin LI 1656c5dac63cSKonstantin Belousov dent.d_fileno = parent->tn_id; 165790f580b9SMark Johnston dent.d_off = next; 1658d1fa59e9SXin LI dent.d_type = DT_DIR; 1659d1fa59e9SXin LI dent.d_namlen = 2; 1660d1fa59e9SXin LI dent.d_name[0] = '.'; 1661d1fa59e9SXin LI dent.d_name[1] = '.'; 1662d1fa59e9SXin LI dent.d_reclen = GENERIC_DIRSIZ(&dent); 16636d2e2df7SMark Johnston dirent_terminate(&dent); 1664d1fa59e9SXin LI 1665d1fa59e9SXin LI if (dent.d_reclen > uio->uio_resid) 16664fd5efe7SGleb Kurtsou error = EJUSTRETURN; 1667d1fa59e9SXin LI else 16684fd5efe7SGleb Kurtsou error = uiomove(&dent, dent.d_reclen, uio); 1669d1fa59e9SXin LI 1670016b7c7eSKonstantin Belousov tmpfs_set_accessed(tm, node); 1671d1fa59e9SXin LI 16725dc11286SKonstantin Belousov return (error); 1673d1fa59e9SXin LI } 1674d1fa59e9SXin LI 1675d1fa59e9SXin LI /* 1676d1fa59e9SXin LI * Helper function for tmpfs_readdir. Returns as much directory entries 1677d1fa59e9SXin LI * as can fit in the uio space. The read starts at uio->uio_offset. 1678d1fa59e9SXin LI * The function returns 0 on success, -1 if there was not enough space 1679d1fa59e9SXin LI * in the uio structure to hold the directory entry or an appropriate 1680d1fa59e9SXin LI * error code if another error happens. 1681d1fa59e9SXin LI */ 1682d1fa59e9SXin LI int 1683e1cdc30fSKonstantin Belousov tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, 1684b214fcceSAlan Somers struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies) 1685d1fa59e9SXin LI { 16864fd5efe7SGleb Kurtsou struct tmpfs_dir_cursor dc; 168790f580b9SMark Johnston struct tmpfs_dirent *de, *nde; 16884fd5efe7SGleb Kurtsou off_t off; 16894fd5efe7SGleb Kurtsou int error; 1690d1fa59e9SXin LI 1691d1fa59e9SXin LI TMPFS_VALIDATE_DIR(node); 1692d1fa59e9SXin LI 16934fd5efe7SGleb Kurtsou off = 0; 169462dca316SBryan Drewery 169562dca316SBryan Drewery /* 169662dca316SBryan Drewery * Lookup the node from the current offset. The starting offset of 169762dca316SBryan Drewery * 0 will lookup both '.' and '..', and then the first real entry, 169862dca316SBryan Drewery * or EOF if there are none. Then find all entries for the dir that 169962dca316SBryan Drewery * fit into the buffer. Once no more entries are found (de == NULL), 170062dca316SBryan Drewery * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 170162dca316SBryan Drewery * call to return 0. 170262dca316SBryan Drewery */ 17034fd5efe7SGleb Kurtsou switch (uio->uio_offset) { 17044fd5efe7SGleb Kurtsou case TMPFS_DIRCOOKIE_DOT: 1705e1cdc30fSKonstantin Belousov error = tmpfs_dir_getdotdent(tm, node, uio); 17064fd5efe7SGleb Kurtsou if (error != 0) 17074fd5efe7SGleb Kurtsou return (error); 170890f580b9SMark Johnston uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT; 1709ac09d109SBryan Drewery if (cookies != NULL) 171090f580b9SMark Johnston cookies[(*ncookies)++] = off; 1711504bde01SBryan Drewery /* FALLTHROUGH */ 17124fd5efe7SGleb Kurtsou case TMPFS_DIRCOOKIE_DOTDOT: 171390f580b9SMark Johnston de = tmpfs_dir_first(node, &dc); 171490f580b9SMark Johnston off = tmpfs_dirent_cookie(de); 171590f580b9SMark Johnston error = tmpfs_dir_getdotdotdent(tm, node, uio, off); 17164fd5efe7SGleb Kurtsou if (error != 0) 17174fd5efe7SGleb Kurtsou return (error); 171890f580b9SMark Johnston uio->uio_offset = off; 1719ac09d109SBryan Drewery if (cookies != NULL) 172090f580b9SMark Johnston cookies[(*ncookies)++] = off; 172162dca316SBryan Drewery /* EOF. */ 17224fd5efe7SGleb Kurtsou if (de == NULL) 17234fd5efe7SGleb Kurtsou return (0); 17244fd5efe7SGleb Kurtsou break; 17254fd5efe7SGleb Kurtsou case TMPFS_DIRCOOKIE_EOF: 17264fd5efe7SGleb Kurtsou return (0); 17274fd5efe7SGleb Kurtsou default: 17284fd5efe7SGleb Kurtsou de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 17294fd5efe7SGleb Kurtsou if (de == NULL) 17304fd5efe7SGleb Kurtsou return (EINVAL); 1731ac09d109SBryan Drewery if (cookies != NULL) 17324fd5efe7SGleb Kurtsou off = tmpfs_dirent_cookie(de); 1733d1fa59e9SXin LI } 1734d1fa59e9SXin LI 173590f580b9SMark Johnston /* 173690f580b9SMark Johnston * Read as much entries as possible; i.e., until we reach the end of the 173790f580b9SMark Johnston * directory or we exhaust uio space. 173890f580b9SMark Johnston */ 1739d1fa59e9SXin LI do { 1740d1fa59e9SXin LI struct dirent d; 1741d1fa59e9SXin LI 174290f580b9SMark Johnston /* 174390f580b9SMark Johnston * Create a dirent structure representing the current tmpfs_node 174490f580b9SMark Johnston * and fill it. 174590f580b9SMark Johnston */ 174699d57a6bSEd Schouten if (de->td_node == NULL) { 174799d57a6bSEd Schouten d.d_fileno = 1; 174899d57a6bSEd Schouten d.d_type = DT_WHT; 174999d57a6bSEd Schouten } else { 1750d1fa59e9SXin LI d.d_fileno = de->td_node->tn_id; 1751d1fa59e9SXin LI switch (de->td_node->tn_type) { 1752d1fa59e9SXin LI case VBLK: 1753d1fa59e9SXin LI d.d_type = DT_BLK; 1754d1fa59e9SXin LI break; 1755d1fa59e9SXin LI 1756d1fa59e9SXin LI case VCHR: 1757d1fa59e9SXin LI d.d_type = DT_CHR; 1758d1fa59e9SXin LI break; 1759d1fa59e9SXin LI 1760d1fa59e9SXin LI case VDIR: 1761d1fa59e9SXin LI d.d_type = DT_DIR; 1762d1fa59e9SXin LI break; 1763d1fa59e9SXin LI 1764d1fa59e9SXin LI case VFIFO: 1765d1fa59e9SXin LI d.d_type = DT_FIFO; 1766d1fa59e9SXin LI break; 1767d1fa59e9SXin LI 1768d1fa59e9SXin LI case VLNK: 1769d1fa59e9SXin LI d.d_type = DT_LNK; 1770d1fa59e9SXin LI break; 1771d1fa59e9SXin LI 1772d1fa59e9SXin LI case VREG: 1773d1fa59e9SXin LI d.d_type = DT_REG; 1774d1fa59e9SXin LI break; 1775d1fa59e9SXin LI 1776d1fa59e9SXin LI case VSOCK: 1777d1fa59e9SXin LI d.d_type = DT_SOCK; 1778d1fa59e9SXin LI break; 1779d1fa59e9SXin LI 1780d1fa59e9SXin LI default: 17811fa8f5f0SXin LI panic("tmpfs_dir_getdents: type %p %d", 17821fa8f5f0SXin LI de->td_node, (int)de->td_node->tn_type); 1783d1fa59e9SXin LI } 178499d57a6bSEd Schouten } 1785d1fa59e9SXin LI d.d_namlen = de->td_namelen; 1786d1fa59e9SXin LI MPASS(de->td_namelen < sizeof(d.d_name)); 17874fd5efe7SGleb Kurtsou (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1788d1fa59e9SXin LI d.d_reclen = GENERIC_DIRSIZ(&d); 1789d1fa59e9SXin LI 179090f580b9SMark Johnston /* 179190f580b9SMark Johnston * Stop reading if the directory entry we are treating is bigger 179290f580b9SMark Johnston * than the amount of data that can be returned. 179390f580b9SMark Johnston */ 1794d1fa59e9SXin LI if (d.d_reclen > uio->uio_resid) { 17954fd5efe7SGleb Kurtsou error = EJUSTRETURN; 1796d1fa59e9SXin LI break; 1797d1fa59e9SXin LI } 1798d1fa59e9SXin LI 179990f580b9SMark Johnston nde = tmpfs_dir_next(node, &dc); 180090f580b9SMark Johnston d.d_off = tmpfs_dirent_cookie(nde); 180190f580b9SMark Johnston dirent_terminate(&d); 180290f580b9SMark Johnston 180390f580b9SMark Johnston /* 180490f580b9SMark Johnston * Copy the new dirent structure into the output buffer and 180590f580b9SMark Johnston * advance pointers. 180690f580b9SMark Johnston */ 1807d1fa59e9SXin LI error = uiomove(&d, d.d_reclen, uio); 18089fb9c623SKonstantin Belousov if (error == 0) { 180990f580b9SMark Johnston de = nde; 1810ac09d109SBryan Drewery if (cookies != NULL) { 18114fd5efe7SGleb Kurtsou off = tmpfs_dirent_cookie(de); 1812ac09d109SBryan Drewery MPASS(*ncookies < maxcookies); 18134fd5efe7SGleb Kurtsou cookies[(*ncookies)++] = off; 18144fd5efe7SGleb Kurtsou } 18159fb9c623SKonstantin Belousov } 1816d1fa59e9SXin LI } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1817d1fa59e9SXin LI 1818ac09d109SBryan Drewery /* Skip setting off when using cookies as it is already done above. */ 1819ac09d109SBryan Drewery if (cookies == NULL) 18204fd5efe7SGleb Kurtsou off = tmpfs_dirent_cookie(de); 1821d1fa59e9SXin LI 1822ac09d109SBryan Drewery /* Update the offset and cache. */ 18234fd5efe7SGleb Kurtsou uio->uio_offset = off; 18244fd5efe7SGleb Kurtsou node->tn_dir.tn_readdir_lastn = off; 18254fd5efe7SGleb Kurtsou node->tn_dir.tn_readdir_lastp = de; 18264fd5efe7SGleb Kurtsou 1827016b7c7eSKonstantin Belousov tmpfs_set_accessed(tm, node); 1828016b7c7eSKonstantin Belousov return (error); 1829d1fa59e9SXin LI } 1830d1fa59e9SXin LI 183199d57a6bSEd Schouten int 183299d57a6bSEd Schouten tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 183399d57a6bSEd Schouten { 183499d57a6bSEd Schouten struct tmpfs_dirent *de; 1835*8fa5e0f2SJason A. Harmening struct tmpfs_node *dnode; 183699d57a6bSEd Schouten int error; 183799d57a6bSEd Schouten 183899d57a6bSEd Schouten error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 183999d57a6bSEd Schouten cnp->cn_nameptr, cnp->cn_namelen, &de); 184099d57a6bSEd Schouten if (error != 0) 184199d57a6bSEd Schouten return (error); 1842*8fa5e0f2SJason A. Harmening dnode = VP_TO_TMPFS_DIR(dvp); 184399d57a6bSEd Schouten tmpfs_dir_attach(dvp, de); 1844*8fa5e0f2SJason A. Harmening dnode->tn_dir.tn_wht_size += sizeof(*de); 184599d57a6bSEd Schouten return (0); 184699d57a6bSEd Schouten } 184799d57a6bSEd Schouten 184899d57a6bSEd Schouten void 184999d57a6bSEd Schouten tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 185099d57a6bSEd Schouten { 185199d57a6bSEd Schouten struct tmpfs_dirent *de; 1852*8fa5e0f2SJason A. Harmening struct tmpfs_node *dnode; 185399d57a6bSEd Schouten 1854*8fa5e0f2SJason A. Harmening dnode = VP_TO_TMPFS_DIR(dvp); 1855*8fa5e0f2SJason A. Harmening de = tmpfs_dir_lookup(dnode, NULL, cnp); 185699d57a6bSEd Schouten MPASS(de != NULL && de->td_node == NULL); 1857*8fa5e0f2SJason A. Harmening MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de)); 1858*8fa5e0f2SJason A. Harmening dnode->tn_dir.tn_wht_size -= sizeof(*de); 185999d57a6bSEd Schouten tmpfs_dir_detach(dvp, de); 18604fd5efe7SGleb Kurtsou tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 186199d57a6bSEd Schouten } 186299d57a6bSEd Schouten 1863d1fa59e9SXin LI /* 1864*8fa5e0f2SJason A. Harmening * Frees any dirents still associated with the directory represented 1865*8fa5e0f2SJason A. Harmening * by dvp in preparation for the removal of the directory. This is 1866*8fa5e0f2SJason A. Harmening * required when removing a directory which contains only whiteout 1867*8fa5e0f2SJason A. Harmening * entries. 1868*8fa5e0f2SJason A. Harmening */ 1869*8fa5e0f2SJason A. Harmening void 1870*8fa5e0f2SJason A. Harmening tmpfs_dir_clear_whiteouts(struct vnode *dvp) 1871*8fa5e0f2SJason A. Harmening { 1872*8fa5e0f2SJason A. Harmening struct tmpfs_dir_cursor dc; 1873*8fa5e0f2SJason A. Harmening struct tmpfs_dirent *de; 1874*8fa5e0f2SJason A. Harmening struct tmpfs_node *dnode; 1875*8fa5e0f2SJason A. Harmening 1876*8fa5e0f2SJason A. Harmening dnode = VP_TO_TMPFS_DIR(dvp); 1877*8fa5e0f2SJason A. Harmening 1878*8fa5e0f2SJason A. Harmening while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) { 1879*8fa5e0f2SJason A. Harmening KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p", 1880*8fa5e0f2SJason A. Harmening __func__, de)); 1881*8fa5e0f2SJason A. Harmening dnode->tn_dir.tn_wht_size -= sizeof(*de); 1882*8fa5e0f2SJason A. Harmening tmpfs_dir_detach(dvp, de); 1883*8fa5e0f2SJason A. Harmening tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1884*8fa5e0f2SJason A. Harmening } 1885*8fa5e0f2SJason A. Harmening MPASS(dnode->tn_size == 0); 1886*8fa5e0f2SJason A. Harmening MPASS(dnode->tn_dir.tn_wht_size == 0); 1887*8fa5e0f2SJason A. Harmening } 1888*8fa5e0f2SJason A. Harmening 1889*8fa5e0f2SJason A. Harmening /* 18904673c751SAlan Cox * Resizes the aobj associated with the regular file pointed to by 'vp' to the 18914673c751SAlan Cox * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 18924673c751SAlan Cox * 'newsize' must be positive. 1893d1fa59e9SXin LI * 1894d1fa59e9SXin LI * Returns zero on success or an appropriate error code on failure. 1895d1fa59e9SXin LI */ 1896d1fa59e9SXin LI int 18970b05cac3SAlan Cox tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1898d1fa59e9SXin LI { 1899d1fa59e9SXin LI struct tmpfs_node *node; 1900b10d1d5dSAlan Cox vm_object_t uobj; 19012971897dSAlan Cox vm_pindex_t idx, newpages, oldpages; 1902d1fa59e9SXin LI off_t oldsize; 1903399be910SKa Ho Ng int base, error; 1904d1fa59e9SXin LI 1905d1fa59e9SXin LI MPASS(vp->v_type == VREG); 1906d1fa59e9SXin LI MPASS(newsize >= 0); 1907d1fa59e9SXin LI 1908d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 1909b10d1d5dSAlan Cox uobj = node->tn_reg.tn_aobj; 1910d1fa59e9SXin LI 19114673c751SAlan Cox /* 19124673c751SAlan Cox * Convert the old and new sizes to the number of pages needed to 1913d1fa59e9SXin LI * store them. It may happen that we do not need to do anything 1914d1fa59e9SXin LI * because the last allocated page can accommodate the change on 19154673c751SAlan Cox * its own. 19164673c751SAlan Cox */ 1917d1fa59e9SXin LI oldsize = node->tn_size; 1918b10d1d5dSAlan Cox oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1919b10d1d5dSAlan Cox MPASS(oldpages == uobj->size); 1920b10d1d5dSAlan Cox newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1921e3e10c39SMateusz Guzik 1922e3e10c39SMateusz Guzik if (__predict_true(newpages == oldpages && newsize >= oldsize)) { 1923e3e10c39SMateusz Guzik node->tn_size = newsize; 1924e3e10c39SMateusz Guzik return (0); 1925e3e10c39SMateusz Guzik } 1926e3e10c39SMateusz Guzik 192789f6b863SAttilio Rao VM_OBJECT_WLOCK(uobj); 1928d1fa59e9SXin LI if (newsize < oldsize) { 1929d1fa59e9SXin LI /* 19302971897dSAlan Cox * Zero the truncated part of the last page. 19312971897dSAlan Cox */ 19322971897dSAlan Cox base = newsize & PAGE_MASK; 19332971897dSAlan Cox if (base != 0) { 19342971897dSAlan Cox idx = OFF_TO_IDX(newsize); 1935399be910SKa Ho Ng error = tmpfs_partial_page_invalidate(uobj, idx, base, 1936399be910SKa Ho Ng PAGE_SIZE, ignerr); 1937399be910SKa Ho Ng if (error != 0) { 1938d6e13f3bSJeff Roberson VM_OBJECT_WUNLOCK(uobj); 1939399be910SKa Ho Ng return (error); 19402971897dSAlan Cox } 19412971897dSAlan Cox } 19422971897dSAlan Cox 19432971897dSAlan Cox /* 19444673c751SAlan Cox * Release any swap space and free any whole pages. 1945d1fa59e9SXin LI */ 194684242cf6SMark Johnston if (newpages < oldpages) 19476bbee8e2SAlan Cox vm_object_page_remove(uobj, newpages, 0, 0); 1948d1fa59e9SXin LI } 1949b10d1d5dSAlan Cox uobj->size = newpages; 195089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(uobj); 19512971897dSAlan Cox 19522971897dSAlan Cox node->tn_size = newsize; 19534673c751SAlan Cox return (0); 1954d1fa59e9SXin LI } 1955d1fa59e9SXin LI 19568d7cd10bSKa Ho Ng /* 19578d7cd10bSKa Ho Ng * Punch hole in the aobj associated with the regular file pointed to by 'vp'. 19588d7cd10bSKa Ho Ng * Requests completely beyond the end-of-file are converted to no-op. 19598d7cd10bSKa Ho Ng * 19608d7cd10bSKa Ho Ng * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on 19618d7cd10bSKa Ho Ng * failure. 19628d7cd10bSKa Ho Ng */ 19638d7cd10bSKa Ho Ng int 19648d7cd10bSKa Ho Ng tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length) 19658d7cd10bSKa Ho Ng { 19668d7cd10bSKa Ho Ng struct tmpfs_node *node; 19678d7cd10bSKa Ho Ng vm_object_t object; 19688d7cd10bSKa Ho Ng vm_pindex_t pistart, pi, piend; 19698d7cd10bSKa Ho Ng int startofs, endofs, end; 19708d7cd10bSKa Ho Ng off_t off, len; 19718d7cd10bSKa Ho Ng int error; 19728d7cd10bSKa Ho Ng 19738d7cd10bSKa Ho Ng KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows", 19748d7cd10bSKa Ho Ng __func__)); 19758d7cd10bSKa Ho Ng node = VP_TO_TMPFS_NODE(vp); 19768d7cd10bSKa Ho Ng KASSERT(node->tn_type == VREG, ("%s: node is not regular file", 19778d7cd10bSKa Ho Ng __func__)); 19788d7cd10bSKa Ho Ng object = node->tn_reg.tn_aobj; 19798d7cd10bSKa Ho Ng off = *offset; 19808d7cd10bSKa Ho Ng len = omin(node->tn_size - off, *length); 19818d7cd10bSKa Ho Ng startofs = off & PAGE_MASK; 19828d7cd10bSKa Ho Ng endofs = (off + len) & PAGE_MASK; 19838d7cd10bSKa Ho Ng pistart = OFF_TO_IDX(off); 19848d7cd10bSKa Ho Ng piend = OFF_TO_IDX(off + len); 19858d7cd10bSKa Ho Ng pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK); 19868d7cd10bSKa Ho Ng error = 0; 19878d7cd10bSKa Ho Ng 19888d7cd10bSKa Ho Ng /* Handle the case when offset is on or beyond file size. */ 19898d7cd10bSKa Ho Ng if (len <= 0) { 19908d7cd10bSKa Ho Ng *length = 0; 19918d7cd10bSKa Ho Ng return (0); 19928d7cd10bSKa Ho Ng } 19938d7cd10bSKa Ho Ng 19948d7cd10bSKa Ho Ng VM_OBJECT_WLOCK(object); 19958d7cd10bSKa Ho Ng 19968d7cd10bSKa Ho Ng /* 19978d7cd10bSKa Ho Ng * If there is a partial page at the beginning of the hole-punching 19988d7cd10bSKa Ho Ng * request, fill the partial page with zeroes. 19998d7cd10bSKa Ho Ng */ 20008d7cd10bSKa Ho Ng if (startofs != 0) { 20018d7cd10bSKa Ho Ng end = pistart != piend ? PAGE_SIZE : endofs; 20028d7cd10bSKa Ho Ng error = tmpfs_partial_page_invalidate(object, pistart, startofs, 20038d7cd10bSKa Ho Ng end, FALSE); 20048d7cd10bSKa Ho Ng if (error != 0) 20058d7cd10bSKa Ho Ng goto out; 20068d7cd10bSKa Ho Ng off += end - startofs; 20078d7cd10bSKa Ho Ng len -= end - startofs; 20088d7cd10bSKa Ho Ng } 20098d7cd10bSKa Ho Ng 20108d7cd10bSKa Ho Ng /* 20118d7cd10bSKa Ho Ng * Toss away the full pages in the affected area. 20128d7cd10bSKa Ho Ng */ 20138d7cd10bSKa Ho Ng if (pi < piend) { 20148d7cd10bSKa Ho Ng vm_object_page_remove(object, pi, piend, 0); 20158d7cd10bSKa Ho Ng off += IDX_TO_OFF(piend - pi); 20168d7cd10bSKa Ho Ng len -= IDX_TO_OFF(piend - pi); 20178d7cd10bSKa Ho Ng } 20188d7cd10bSKa Ho Ng 20198d7cd10bSKa Ho Ng /* 20208d7cd10bSKa Ho Ng * If there is a partial page at the end of the hole-punching request, 20218d7cd10bSKa Ho Ng * fill the partial page with zeroes. 20228d7cd10bSKa Ho Ng */ 20238d7cd10bSKa Ho Ng if (endofs != 0 && pistart != piend) { 20248d7cd10bSKa Ho Ng error = tmpfs_partial_page_invalidate(object, piend, 0, endofs, 20258d7cd10bSKa Ho Ng FALSE); 20268d7cd10bSKa Ho Ng if (error != 0) 20278d7cd10bSKa Ho Ng goto out; 20288d7cd10bSKa Ho Ng off += endofs; 20298d7cd10bSKa Ho Ng len -= endofs; 20308d7cd10bSKa Ho Ng } 20318d7cd10bSKa Ho Ng 20328d7cd10bSKa Ho Ng out: 20338d7cd10bSKa Ho Ng VM_OBJECT_WUNLOCK(object); 20348d7cd10bSKa Ho Ng *offset = off; 20358d7cd10bSKa Ho Ng *length = len; 20368d7cd10bSKa Ho Ng return (error); 20378d7cd10bSKa Ho Ng } 20388d7cd10bSKa Ho Ng 2039f40cb1c6SKonstantin Belousov void 2040f40cb1c6SKonstantin Belousov tmpfs_check_mtime(struct vnode *vp) 2041f40cb1c6SKonstantin Belousov { 2042f40cb1c6SKonstantin Belousov struct tmpfs_node *node; 2043f40cb1c6SKonstantin Belousov struct vm_object *obj; 2044f40cb1c6SKonstantin Belousov 2045f40cb1c6SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "check_mtime"); 2046f40cb1c6SKonstantin Belousov if (vp->v_type != VREG) 2047f40cb1c6SKonstantin Belousov return; 2048f40cb1c6SKonstantin Belousov obj = vp->v_object; 204928bc23abSKonstantin Belousov KASSERT(obj->type == tmpfs_pager_type && 20504b8365d7SKonstantin Belousov (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) == 20514b8365d7SKonstantin Belousov (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj")); 2052f40cb1c6SKonstantin Belousov /* unlocked read */ 205367d0e293SJeff Roberson if (obj->generation != obj->cleangeneration) { 2054f40cb1c6SKonstantin Belousov VM_OBJECT_WLOCK(obj); 205567d0e293SJeff Roberson if (obj->generation != obj->cleangeneration) { 205667d0e293SJeff Roberson obj->cleangeneration = obj->generation; 2057f40cb1c6SKonstantin Belousov node = VP_TO_TMPFS_NODE(vp); 2058311d39f2SKonstantin Belousov node->tn_status |= TMPFS_NODE_MODIFIED | 2059311d39f2SKonstantin Belousov TMPFS_NODE_CHANGED; 2060f40cb1c6SKonstantin Belousov } 2061f40cb1c6SKonstantin Belousov VM_OBJECT_WUNLOCK(obj); 2062f40cb1c6SKonstantin Belousov } 2063f40cb1c6SKonstantin Belousov } 2064f40cb1c6SKonstantin Belousov 2065d1fa59e9SXin LI /* 2066d1fa59e9SXin LI * Change flags of the given vnode. 2067d1fa59e9SXin LI * Caller should execute tmpfs_update on vp after a successful execution. 2068d1fa59e9SXin LI * The vnode must be locked on entry and remain locked on exit. 2069d1fa59e9SXin LI */ 2070d1fa59e9SXin LI int 2071b4b2596bSPawel Jakub Dawidek tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 20720f01fb01SKonstantin Belousov struct thread *td) 2073d1fa59e9SXin LI { 2074d1fa59e9SXin LI int error; 2075d1fa59e9SXin LI struct tmpfs_node *node; 2076d1fa59e9SXin LI 2077305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chflags"); 2078d1fa59e9SXin LI 2079d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 2080d1fa59e9SXin LI 20813b5f179dSKenneth D. Merry if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 20823b5f179dSKenneth D. Merry UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 20833b5f179dSKenneth D. Merry UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 20843b5f179dSKenneth D. Merry UF_SPARSE | UF_SYSTEM)) != 0) 2085587fdb53SJaakko Heinonen return (EOPNOTSUPP); 2086587fdb53SJaakko Heinonen 2087d1fa59e9SXin LI /* Disallow this operation if the file system is mounted read-only. */ 2088d1fa59e9SXin LI if (vp->v_mount->mnt_flag & MNT_RDONLY) 208923f90714SKonstantin Belousov return (EROFS); 2090d1fa59e9SXin LI 2091d1fa59e9SXin LI /* 2092d1fa59e9SXin LI * Callers may only modify the file flags on objects they 2093d1fa59e9SXin LI * have VADMIN rights for. 2094d1fa59e9SXin LI */ 20950f01fb01SKonstantin Belousov if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 2096d1fa59e9SXin LI return (error); 2097d1fa59e9SXin LI /* 2098d1fa59e9SXin LI * Unprivileged processes are not permitted to unset system 2099d1fa59e9SXin LI * flags, or modify flags if any system flags are set. 2100d1fa59e9SXin LI */ 2101cc426dd3SMateusz Guzik if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 2102587fdb53SJaakko Heinonen if (node->tn_flags & 2103587fdb53SJaakko Heinonen (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 2104d1fa59e9SXin LI error = securelevel_gt(cred, 0); 2105d1fa59e9SXin LI if (error) 2106d1fa59e9SXin LI return (error); 2107d1fa59e9SXin LI } 2108d1fa59e9SXin LI } else { 2109587fdb53SJaakko Heinonen if (node->tn_flags & 2110587fdb53SJaakko Heinonen (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 2111587fdb53SJaakko Heinonen ((flags ^ node->tn_flags) & SF_SETTABLE)) 2112d1fa59e9SXin LI return (EPERM); 2113d1fa59e9SXin LI } 2114587fdb53SJaakko Heinonen node->tn_flags = flags; 2115d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_CHANGED; 2116d1fa59e9SXin LI 2117305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chflags2"); 2118d1fa59e9SXin LI 2119305b4229SKonstantin Belousov return (0); 2120d1fa59e9SXin LI } 2121d1fa59e9SXin LI 2122d1fa59e9SXin LI /* 2123d1fa59e9SXin LI * Change access mode on the given vnode. 2124d1fa59e9SXin LI * Caller should execute tmpfs_update on vp after a successful execution. 2125d1fa59e9SXin LI * The vnode must be locked on entry and remain locked on exit. 2126d1fa59e9SXin LI */ 2127d1fa59e9SXin LI int 21280f01fb01SKonstantin Belousov tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, 21290f01fb01SKonstantin Belousov struct thread *td) 2130d1fa59e9SXin LI { 2131d1fa59e9SXin LI int error; 2132d1fa59e9SXin LI struct tmpfs_node *node; 2133172ffe70SMateusz Guzik mode_t newmode; 2134d1fa59e9SXin LI 2135305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chmod"); 2136172ffe70SMateusz Guzik ASSERT_VOP_IN_SEQC(vp); 2137d1fa59e9SXin LI 2138d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 2139d1fa59e9SXin LI 2140d1fa59e9SXin LI /* Disallow this operation if the file system is mounted read-only. */ 2141d1fa59e9SXin LI if (vp->v_mount->mnt_flag & MNT_RDONLY) 2142c12118f6SKa Ho Ng return (EROFS); 2143d1fa59e9SXin LI 2144d1fa59e9SXin LI /* Immutable or append-only files cannot be modified, either. */ 2145d1fa59e9SXin LI if (node->tn_flags & (IMMUTABLE | APPEND)) 2146c12118f6SKa Ho Ng return (EPERM); 2147d1fa59e9SXin LI 2148d1fa59e9SXin LI /* 2149d1fa59e9SXin LI * To modify the permissions on a file, must possess VADMIN 2150d1fa59e9SXin LI * for that file. 2151d1fa59e9SXin LI */ 21520f01fb01SKonstantin Belousov if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 2153d1fa59e9SXin LI return (error); 2154d1fa59e9SXin LI 2155d1fa59e9SXin LI /* 2156d1fa59e9SXin LI * Privileged processes may set the sticky bit on non-directories, 2157d1fa59e9SXin LI * as well as set the setgid bit on a file with a group that the 2158d1fa59e9SXin LI * process is not a member of. 2159d1fa59e9SXin LI */ 2160d1fa59e9SXin LI if (vp->v_type != VDIR && (mode & S_ISTXT)) { 2161cc426dd3SMateusz Guzik if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 2162d1fa59e9SXin LI return (EFTYPE); 2163d1fa59e9SXin LI } 2164d1fa59e9SXin LI if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 2165cc426dd3SMateusz Guzik error = priv_check_cred(cred, PRIV_VFS_SETGID); 2166d1fa59e9SXin LI if (error) 2167d1fa59e9SXin LI return (error); 2168d1fa59e9SXin LI } 2169d1fa59e9SXin LI 2170172ffe70SMateusz Guzik newmode = node->tn_mode & ~ALLPERMS; 2171172ffe70SMateusz Guzik newmode |= mode & ALLPERMS; 2172172ffe70SMateusz Guzik atomic_store_short(&node->tn_mode, newmode); 2173d1fa59e9SXin LI 2174d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_CHANGED; 2175d1fa59e9SXin LI 2176305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chmod2"); 2177d1fa59e9SXin LI 2178305b4229SKonstantin Belousov return (0); 2179d1fa59e9SXin LI } 2180d1fa59e9SXin LI 2181d1fa59e9SXin LI /* 2182d1fa59e9SXin LI * Change ownership of the given vnode. At least one of uid or gid must 2183d1fa59e9SXin LI * be different than VNOVAL. If one is set to that value, the attribute 2184d1fa59e9SXin LI * is unchanged. 2185d1fa59e9SXin LI * Caller should execute tmpfs_update on vp after a successful execution. 2186d1fa59e9SXin LI * The vnode must be locked on entry and remain locked on exit. 2187d1fa59e9SXin LI */ 2188d1fa59e9SXin LI int 2189d1fa59e9SXin LI tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 21900f01fb01SKonstantin Belousov struct thread *td) 2191d1fa59e9SXin LI { 2192d1fa59e9SXin LI int error; 2193d1fa59e9SXin LI struct tmpfs_node *node; 2194d1fa59e9SXin LI uid_t ouid; 2195d1fa59e9SXin LI gid_t ogid; 2196172ffe70SMateusz Guzik mode_t newmode; 2197d1fa59e9SXin LI 2198305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chown"); 2199172ffe70SMateusz Guzik ASSERT_VOP_IN_SEQC(vp); 2200d1fa59e9SXin LI 2201d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 2202d1fa59e9SXin LI 2203d1fa59e9SXin LI /* Assign default values if they are unknown. */ 2204d1fa59e9SXin LI MPASS(uid != VNOVAL || gid != VNOVAL); 2205d1fa59e9SXin LI if (uid == VNOVAL) 2206d1fa59e9SXin LI uid = node->tn_uid; 2207d1fa59e9SXin LI if (gid == VNOVAL) 2208d1fa59e9SXin LI gid = node->tn_gid; 2209d1fa59e9SXin LI MPASS(uid != VNOVAL && gid != VNOVAL); 2210d1fa59e9SXin LI 2211d1fa59e9SXin LI /* Disallow this operation if the file system is mounted read-only. */ 2212d1fa59e9SXin LI if (vp->v_mount->mnt_flag & MNT_RDONLY) 221323f90714SKonstantin Belousov return (EROFS); 2214d1fa59e9SXin LI 2215d1fa59e9SXin LI /* Immutable or append-only files cannot be modified, either. */ 2216d1fa59e9SXin LI if (node->tn_flags & (IMMUTABLE | APPEND)) 221723f90714SKonstantin Belousov return (EPERM); 2218d1fa59e9SXin LI 2219d1fa59e9SXin LI /* 2220d1fa59e9SXin LI * To modify the ownership of a file, must possess VADMIN for that 2221d1fa59e9SXin LI * file. 2222d1fa59e9SXin LI */ 22230f01fb01SKonstantin Belousov if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 2224d1fa59e9SXin LI return (error); 2225d1fa59e9SXin LI 2226d1fa59e9SXin LI /* 2227d1fa59e9SXin LI * To change the owner of a file, or change the group of a file to a 2228d1fa59e9SXin LI * group of which we are not a member, the caller must have 2229d1fa59e9SXin LI * privilege. 2230d1fa59e9SXin LI */ 2231d1fa59e9SXin LI if ((uid != node->tn_uid || 2232d1fa59e9SXin LI (gid != node->tn_gid && !groupmember(gid, cred))) && 2233cc426dd3SMateusz Guzik (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 2234d1fa59e9SXin LI return (error); 2235d1fa59e9SXin LI 2236d1fa59e9SXin LI ogid = node->tn_gid; 2237d1fa59e9SXin LI ouid = node->tn_uid; 2238d1fa59e9SXin LI 2239d1fa59e9SXin LI node->tn_uid = uid; 2240d1fa59e9SXin LI node->tn_gid = gid; 2241d1fa59e9SXin LI 2242d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_CHANGED; 2243d1fa59e9SXin LI 22440f01fb01SKonstantin Belousov if ((node->tn_mode & (S_ISUID | S_ISGID)) != 0 && 22450f01fb01SKonstantin Belousov (ouid != uid || ogid != gid)) { 2246172ffe70SMateusz Guzik if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 2247172ffe70SMateusz Guzik newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 2248172ffe70SMateusz Guzik atomic_store_short(&node->tn_mode, newmode); 2249172ffe70SMateusz Guzik } 2250d1fa59e9SXin LI } 2251d1fa59e9SXin LI 2252305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chown2"); 2253d1fa59e9SXin LI 2254305b4229SKonstantin Belousov return (0); 2255d1fa59e9SXin LI } 2256d1fa59e9SXin LI 2257d1fa59e9SXin LI /* 2258d1fa59e9SXin LI * Change size of the given vnode. 2259d1fa59e9SXin LI * Caller should execute tmpfs_update on vp after a successful execution. 2260d1fa59e9SXin LI * The vnode must be locked on entry and remain locked on exit. 2261d1fa59e9SXin LI */ 2262d1fa59e9SXin LI int 2263d1fa59e9SXin LI tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 22640f01fb01SKonstantin Belousov struct thread *td) 2265d1fa59e9SXin LI { 2266d1fa59e9SXin LI int error; 2267d1fa59e9SXin LI struct tmpfs_node *node; 2268d1fa59e9SXin LI 2269305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chsize"); 2270d1fa59e9SXin LI 2271d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 2272d1fa59e9SXin LI 2273d1fa59e9SXin LI /* Decide whether this is a valid operation based on the file type. */ 2274d1fa59e9SXin LI error = 0; 2275d1fa59e9SXin LI switch (vp->v_type) { 2276d1fa59e9SXin LI case VDIR: 227723f90714SKonstantin Belousov return (EISDIR); 2278d1fa59e9SXin LI 2279d1fa59e9SXin LI case VREG: 2280d1fa59e9SXin LI if (vp->v_mount->mnt_flag & MNT_RDONLY) 228123f90714SKonstantin Belousov return (EROFS); 2282d1fa59e9SXin LI break; 2283d1fa59e9SXin LI 2284d1fa59e9SXin LI case VBLK: 2285d1fa59e9SXin LI /* FALLTHROUGH */ 2286d1fa59e9SXin LI case VCHR: 2287d1fa59e9SXin LI /* FALLTHROUGH */ 2288d1fa59e9SXin LI case VFIFO: 228923f90714SKonstantin Belousov /* 229023f90714SKonstantin Belousov * Allow modifications of special files even if in the file 2291d1fa59e9SXin LI * system is mounted read-only (we are not modifying the 229223f90714SKonstantin Belousov * files themselves, but the objects they represent). 229323f90714SKonstantin Belousov */ 229423f90714SKonstantin Belousov return (0); 2295d1fa59e9SXin LI 2296d1fa59e9SXin LI default: 2297d1fa59e9SXin LI /* Anything else is unsupported. */ 229823f90714SKonstantin Belousov return (EOPNOTSUPP); 2299d1fa59e9SXin LI } 2300d1fa59e9SXin LI 2301d1fa59e9SXin LI /* Immutable or append-only files cannot be modified, either. */ 2302d1fa59e9SXin LI if (node->tn_flags & (IMMUTABLE | APPEND)) 230323f90714SKonstantin Belousov return (EPERM); 2304d1fa59e9SXin LI 2305b5b16659SKonstantin Belousov error = vn_rlimit_trunc(size, td); 2306b5b16659SKonstantin Belousov if (error != 0) 2307b5b16659SKonstantin Belousov return (error); 2308b5b16659SKonstantin Belousov 2309d1fa59e9SXin LI error = tmpfs_truncate(vp, size); 231023f90714SKonstantin Belousov /* 231123f90714SKonstantin Belousov * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 231223f90714SKonstantin Belousov * for us, as will update tn_status; no need to do that here. 231323f90714SKonstantin Belousov */ 2314d1fa59e9SXin LI 2315305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chsize2"); 2316d1fa59e9SXin LI 2317305b4229SKonstantin Belousov return (error); 2318d1fa59e9SXin LI } 2319d1fa59e9SXin LI 2320d1fa59e9SXin LI /* 2321d1fa59e9SXin LI * Change access and modification times of the given vnode. 2322d1fa59e9SXin LI * Caller should execute tmpfs_update on vp after a successful execution. 2323d1fa59e9SXin LI * The vnode must be locked on entry and remain locked on exit. 2324d1fa59e9SXin LI */ 2325d1fa59e9SXin LI int 23267b81a399SKonstantin Belousov tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 23270f01fb01SKonstantin Belousov struct ucred *cred, struct thread *td) 2328d1fa59e9SXin LI { 2329d1fa59e9SXin LI int error; 2330d1fa59e9SXin LI struct tmpfs_node *node; 2331d1fa59e9SXin LI 2332305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chtimes"); 2333d1fa59e9SXin LI 2334d1fa59e9SXin LI node = VP_TO_TMPFS_NODE(vp); 2335d1fa59e9SXin LI 2336d1fa59e9SXin LI /* Disallow this operation if the file system is mounted read-only. */ 2337d1fa59e9SXin LI if (vp->v_mount->mnt_flag & MNT_RDONLY) 233823f90714SKonstantin Belousov return (EROFS); 2339d1fa59e9SXin LI 2340d1fa59e9SXin LI /* Immutable or append-only files cannot be modified, either. */ 2341d1fa59e9SXin LI if (node->tn_flags & (IMMUTABLE | APPEND)) 234223f90714SKonstantin Belousov return (EPERM); 2343d1fa59e9SXin LI 23440f01fb01SKonstantin Belousov error = vn_utimes_perm(vp, vap, cred, td); 23457b81a399SKonstantin Belousov if (error != 0) 23469b258fcaSXin LI return (error); 2347d1fa59e9SXin LI 2348382353e2SChristian Brueffer if (vap->va_atime.tv_sec != VNOVAL) 2349016b7c7eSKonstantin Belousov node->tn_accessed = true; 2350382353e2SChristian Brueffer if (vap->va_mtime.tv_sec != VNOVAL) 2351d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_MODIFIED; 2352382353e2SChristian Brueffer if (vap->va_birthtime.tv_sec != VNOVAL) 2353d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_MODIFIED; 23547b81a399SKonstantin Belousov tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 2355382353e2SChristian Brueffer if (vap->va_birthtime.tv_sec != VNOVAL) 23567b81a399SKonstantin Belousov node->tn_birthtime = vap->va_birthtime; 2357305b4229SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "chtimes2"); 2358d1fa59e9SXin LI 2359305b4229SKonstantin Belousov return (0); 2360d1fa59e9SXin LI } 2361d1fa59e9SXin LI 2362d1fa59e9SXin LI void 2363e1cdc30fSKonstantin Belousov tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) 23645dc11286SKonstantin Belousov { 23655dc11286SKonstantin Belousov 2366e1cdc30fSKonstantin Belousov if ((node->tn_status & status) == status || tm->tm_ronly) 23675dc11286SKonstantin Belousov return; 23685dc11286SKonstantin Belousov TMPFS_NODE_LOCK(node); 23695dc11286SKonstantin Belousov node->tn_status |= status; 23705dc11286SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 23715dc11286SKonstantin Belousov } 23725dc11286SKonstantin Belousov 2373016b7c7eSKonstantin Belousov void 2374016b7c7eSKonstantin Belousov tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node) 2375016b7c7eSKonstantin Belousov { 2376016b7c7eSKonstantin Belousov if (node->tn_accessed || tm->tm_ronly) 2377016b7c7eSKonstantin Belousov return; 2378016b7c7eSKonstantin Belousov atomic_store_8(&node->tn_accessed, true); 2379016b7c7eSKonstantin Belousov } 2380016b7c7eSKonstantin Belousov 23815dc11286SKonstantin Belousov /* Sync timestamps */ 23823b622fc8SMateusz Guzik void 23833b622fc8SMateusz Guzik tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 2384d1fa59e9SXin LI const struct timespec *mod) 2385d1fa59e9SXin LI { 23863b622fc8SMateusz Guzik struct tmpfs_node *node; 2387d1fa59e9SXin LI struct timespec now; 2388d1fa59e9SXin LI 23893b622fc8SMateusz Guzik ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); 23903b622fc8SMateusz Guzik node = VP_TO_TMPFS_NODE(vp); 2391d1fa59e9SXin LI 2392016b7c7eSKonstantin Belousov if (!node->tn_accessed && 2393016b7c7eSKonstantin Belousov (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) 2394d1fa59e9SXin LI return; 2395d1fa59e9SXin LI 2396b746bf08SXin LI vfs_timestamp(&now); 23973b622fc8SMateusz Guzik TMPFS_NODE_LOCK(node); 2398016b7c7eSKonstantin Belousov if (node->tn_accessed) { 2399d1fa59e9SXin LI if (acc == NULL) 2400d1fa59e9SXin LI acc = &now; 2401d1fa59e9SXin LI node->tn_atime = *acc; 2402d1fa59e9SXin LI } 2403d1fa59e9SXin LI if (node->tn_status & TMPFS_NODE_MODIFIED) { 2404d1fa59e9SXin LI if (mod == NULL) 2405d1fa59e9SXin LI mod = &now; 2406d1fa59e9SXin LI node->tn_mtime = *mod; 2407d1fa59e9SXin LI } 24085dc11286SKonstantin Belousov if (node->tn_status & TMPFS_NODE_CHANGED) 2409d1fa59e9SXin LI node->tn_ctime = now; 2410016b7c7eSKonstantin Belousov node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 2411016b7c7eSKonstantin Belousov node->tn_accessed = false; 24125dc11286SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 24135dc11286SKonstantin Belousov 2414d1b06863SMark Murray /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 241519fa89e9SMark Murray random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); 2416d1fa59e9SXin LI } 2417d1fa59e9SXin LI 2418d1fa59e9SXin LI int 2419d1fa59e9SXin LI tmpfs_truncate(struct vnode *vp, off_t length) 2420d1fa59e9SXin LI { 2421d1fa59e9SXin LI struct tmpfs_node *node; 2422860399ebSKonstantin Belousov int error; 2423d1fa59e9SXin LI 2424860399ebSKonstantin Belousov if (length < 0) 2425860399ebSKonstantin Belousov return (EINVAL); 2426d1fa59e9SXin LI if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 2427d1fa59e9SXin LI return (EFBIG); 2428d1fa59e9SXin LI 2429860399ebSKonstantin Belousov node = VP_TO_TMPFS_NODE(vp); 2430860399ebSKonstantin Belousov error = node->tn_size == length ? 0 : tmpfs_reg_resize(vp, length, 2431860399ebSKonstantin Belousov FALSE); 24325dc11286SKonstantin Belousov if (error == 0) 2433d1fa59e9SXin LI node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 2434d1fa59e9SXin LI tmpfs_update(vp); 2435d1fa59e9SXin LI 24365dc11286SKonstantin Belousov return (error); 2437d1fa59e9SXin LI } 24384fd5efe7SGleb Kurtsou 24394fd5efe7SGleb Kurtsou static __inline int 24404fd5efe7SGleb Kurtsou tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 24414fd5efe7SGleb Kurtsou { 24424fd5efe7SGleb Kurtsou if (a->td_hash > b->td_hash) 24434fd5efe7SGleb Kurtsou return (1); 24444fd5efe7SGleb Kurtsou else if (a->td_hash < b->td_hash) 24454fd5efe7SGleb Kurtsou return (-1); 24464fd5efe7SGleb Kurtsou return (0); 24474fd5efe7SGleb Kurtsou } 24484fd5efe7SGleb Kurtsou 24494fd5efe7SGleb Kurtsou RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 2450