xref: /freebsd-src/sys/fs/tmpfs/tmpfs_subr.c (revision 8fa5e0f21fd14bb3f5d977ae9130dae3e197f2ba)
1ad3638eeSXin LI /*	$NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $	*/
2d1fa59e9SXin LI 
3e08d5567SXin LI /*-
4b61a5730SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
5d63027b6SPedro F. Giffuni  *
6d1fa59e9SXin LI  * Copyright (c) 2005 The NetBSD Foundation, Inc.
7d1fa59e9SXin LI  * All rights reserved.
8d1fa59e9SXin LI  *
9d1fa59e9SXin LI  * This code is derived from software contributed to The NetBSD Foundation
10d1fa59e9SXin LI  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11d1fa59e9SXin LI  * 2005 program.
12d1fa59e9SXin LI  *
13d1fa59e9SXin LI  * Redistribution and use in source and binary forms, with or without
14d1fa59e9SXin LI  * modification, are permitted provided that the following conditions
15d1fa59e9SXin LI  * are met:
16d1fa59e9SXin LI  * 1. Redistributions of source code must retain the above copyright
17d1fa59e9SXin LI  *    notice, this list of conditions and the following disclaimer.
18d1fa59e9SXin LI  * 2. Redistributions in binary form must reproduce the above copyright
19d1fa59e9SXin LI  *    notice, this list of conditions and the following disclaimer in the
20d1fa59e9SXin LI  *    documentation and/or other materials provided with the distribution.
21d1fa59e9SXin LI  *
22d1fa59e9SXin LI  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23d1fa59e9SXin LI  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24d1fa59e9SXin LI  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25d1fa59e9SXin LI  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26d1fa59e9SXin LI  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27d1fa59e9SXin LI  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28d1fa59e9SXin LI  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29d1fa59e9SXin LI  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30d1fa59e9SXin LI  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31d1fa59e9SXin LI  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32d1fa59e9SXin LI  * POSSIBILITY OF SUCH DAMAGE.
33d1fa59e9SXin LI  */
34d1fa59e9SXin LI 
35d1fa59e9SXin LI /*
36d1fa59e9SXin LI  * Efficient memory file system supporting functions.
37d1fa59e9SXin LI  */
38fdafd315SWarner Losh 
39d1fa59e9SXin LI #include <sys/param.h>
406d2e2df7SMark Johnston #include <sys/systm.h>
41135beaf6SGleb Smirnoff #include <sys/dirent.h>
424fd5efe7SGleb Kurtsou #include <sys/fnv_hash.h>
4389f6b863SAttilio Rao #include <sys/lock.h>
44135beaf6SGleb Smirnoff #include <sys/limits.h>
45135beaf6SGleb Smirnoff #include <sys/mount.h>
46d1fa59e9SXin LI #include <sys/namei.h>
47d1fa59e9SXin LI #include <sys/priv.h>
48d1fa59e9SXin LI #include <sys/proc.h>
49d1b06863SMark Murray #include <sys/random.h>
504601f5f5SKonstantin Belousov #include <sys/refcount.h>
5189f6b863SAttilio Rao #include <sys/rwlock.h>
52081e36e7SKonstantin Belousov #include <sys/smr.h>
53d1fa59e9SXin LI #include <sys/stat.h>
54db94ad12SGleb Kurtsou #include <sys/sysctl.h>
5528bc23abSKonstantin Belousov #include <sys/user.h>
56d1fa59e9SXin LI #include <sys/vnode.h>
57d1fa59e9SXin LI #include <sys/vmmeter.h>
58d1fa59e9SXin LI 
59d1fa59e9SXin LI #include <vm/vm.h>
601c771f92SKonstantin Belousov #include <vm/vm_param.h>
61d1fa59e9SXin LI #include <vm/vm_object.h>
62d1fa59e9SXin LI #include <vm/vm_page.h>
632971897dSAlan Cox #include <vm/vm_pageout.h>
64d1fa59e9SXin LI #include <vm/vm_pager.h>
65d1fa59e9SXin LI #include <vm/vm_extern.h>
66135beaf6SGleb Smirnoff #include <vm/swap_pager.h>
676bb132baSBrooks Davis #include <vm/uma.h>
68d1fa59e9SXin LI 
69d1fa59e9SXin LI #include <fs/tmpfs/tmpfs.h>
70d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_fifoops.h>
71d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_vnops.h>
72d1fa59e9SXin LI 
737029da5cSPawel Biernacki SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
747029da5cSPawel Biernacki     "tmpfs file system");
75db94ad12SGleb Kurtsou 
76da7aa277SGleb Kurtsou static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
7763659234SMike Karels static long tmpfs_pages_avail_init;
7863659234SMike Karels static int tmpfs_mem_percent = TMPFS_MEM_PERCENT;
7963659234SMike Karels static void tmpfs_set_reserve_from_percent(void);
80da7aa277SGleb Kurtsou 
817c58c37eSMateusz Guzik MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure");
82a51c8071SKonstantin Belousov static uma_zone_t tmpfs_node_pool;
83172ffe70SMateusz Guzik VFS_SMR_DECLARE;
84a51c8071SKonstantin Belousov 
8528bc23abSKonstantin Belousov int tmpfs_pager_type = -1;
8628bc23abSKonstantin Belousov 
8728bc23abSKonstantin Belousov static vm_object_t
8828bc23abSKonstantin Belousov tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
8928bc23abSKonstantin Belousov     vm_ooffset_t offset, struct ucred *cred)
9028bc23abSKonstantin Belousov {
9128bc23abSKonstantin Belousov 	vm_object_t object;
9228bc23abSKonstantin Belousov 
9328bc23abSKonstantin Belousov 	MPASS(handle == NULL);
9428bc23abSKonstantin Belousov 	MPASS(offset == 0);
9528bc23abSKonstantin Belousov 	object = vm_object_allocate_dyn(tmpfs_pager_type, size,
9628bc23abSKonstantin Belousov 	    OBJ_COLORED | OBJ_SWAP);
9728bc23abSKonstantin Belousov 	if (!swap_pager_init_object(object, NULL, NULL, size, 0)) {
9828bc23abSKonstantin Belousov 		vm_object_deallocate(object);
9928bc23abSKonstantin Belousov 		object = NULL;
10028bc23abSKonstantin Belousov 	}
10128bc23abSKonstantin Belousov 	return (object);
10228bc23abSKonstantin Belousov }
10328bc23abSKonstantin Belousov 
104eec2e4efSMateusz Guzik /*
105eec2e4efSMateusz Guzik  * Make sure tmpfs vnodes with writable mappings can be found on the lazy list.
106eec2e4efSMateusz Guzik  *
107eec2e4efSMateusz Guzik  * This allows for periodic mtime updates while only scanning vnodes which are
108eec2e4efSMateusz Guzik  * plausibly dirty, see tmpfs_update_mtime_lazy.
109eec2e4efSMateusz Guzik  */
110eec2e4efSMateusz Guzik static void
111eec2e4efSMateusz Guzik tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old,
112eec2e4efSMateusz Guzik     vm_offset_t new)
113eec2e4efSMateusz Guzik {
114eec2e4efSMateusz Guzik 	struct vnode *vp;
115eec2e4efSMateusz Guzik 
116eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_WLOCKED(object);
117eec2e4efSMateusz Guzik 
118d9dc64f1SKonstantin Belousov 	vp = VM_TO_TMPFS_VP(object);
119eec2e4efSMateusz Guzik 
120eec2e4efSMateusz Guzik 	/*
121eec2e4efSMateusz Guzik 	 * Forced unmount?
122eec2e4efSMateusz Guzik 	 */
12346811949SKonstantin Belousov 	if (vp == NULL || vp->v_object == NULL) {
124eec2e4efSMateusz Guzik 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
1250f01fb01SKonstantin Belousov 		    ("object %p with OBJ_TMPFS_VREF but without vnode",
1260f01fb01SKonstantin Belousov 		    object));
127eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
128eec2e4efSMateusz Guzik 		return;
129eec2e4efSMateusz Guzik 	}
130eec2e4efSMateusz Guzik 
131eec2e4efSMateusz Guzik 	if (old == 0) {
132eec2e4efSMateusz Guzik 		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
133eec2e4efSMateusz Guzik 		    ("object without writable mappings has a reference"));
134eec2e4efSMateusz Guzik 		VNPASS(vp->v_usecount > 0, vp);
135eec2e4efSMateusz Guzik 	} else {
136eec2e4efSMateusz Guzik 		VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp,
1370f01fb01SKonstantin Belousov 		    ("object with writable mappings does not "
1380f01fb01SKonstantin Belousov 		    "have a reference"));
139eec2e4efSMateusz Guzik 	}
140eec2e4efSMateusz Guzik 
141eec2e4efSMateusz Guzik 	if (old == new) {
142eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
143eec2e4efSMateusz Guzik 		return;
144eec2e4efSMateusz Guzik 	}
145eec2e4efSMateusz Guzik 
146eec2e4efSMateusz Guzik 	if (new == 0) {
147eec2e4efSMateusz Guzik 		vm_object_clear_flag(object, OBJ_TMPFS_VREF);
148eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
149eec2e4efSMateusz Guzik 		vrele(vp);
150eec2e4efSMateusz Guzik 	} else {
151eec2e4efSMateusz Guzik 		if ((object->flags & OBJ_TMPFS_VREF) == 0) {
152eec2e4efSMateusz Guzik 			vref(vp);
153eec2e4efSMateusz Guzik 			vlazy(vp);
154eec2e4efSMateusz Guzik 			vm_object_set_flag(object, OBJ_TMPFS_VREF);
155eec2e4efSMateusz Guzik 		}
156eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
157eec2e4efSMateusz Guzik 	}
158eec2e4efSMateusz Guzik }
159eec2e4efSMateusz Guzik 
160eec2e4efSMateusz Guzik static void
161eec2e4efSMateusz Guzik tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start,
162eec2e4efSMateusz Guzik     vm_offset_t end)
163eec2e4efSMateusz Guzik {
164eec2e4efSMateusz Guzik 	vm_offset_t new, old;
165eec2e4efSMateusz Guzik 
166eec2e4efSMateusz Guzik 	VM_OBJECT_WLOCK(object);
167eec2e4efSMateusz Guzik 	KASSERT((object->flags & OBJ_ANON) == 0,
168eec2e4efSMateusz Guzik 	    ("%s: object %p with OBJ_ANON", __func__, object));
169eec2e4efSMateusz Guzik 	old = object->un_pager.swp.writemappings;
170eec2e4efSMateusz Guzik 	object->un_pager.swp.writemappings += (vm_ooffset_t)end - start;
171eec2e4efSMateusz Guzik 	new = object->un_pager.swp.writemappings;
172eec2e4efSMateusz Guzik 	tmpfs_pager_writecount_recalc(object, old, new);
173eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_UNLOCKED(object);
174eec2e4efSMateusz Guzik }
175eec2e4efSMateusz Guzik 
176eec2e4efSMateusz Guzik static void
177eec2e4efSMateusz Guzik tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start,
178eec2e4efSMateusz Guzik     vm_offset_t end)
179eec2e4efSMateusz Guzik {
180eec2e4efSMateusz Guzik 	vm_offset_t new, old;
181eec2e4efSMateusz Guzik 
182eec2e4efSMateusz Guzik 	VM_OBJECT_WLOCK(object);
183eec2e4efSMateusz Guzik 	KASSERT((object->flags & OBJ_ANON) == 0,
184eec2e4efSMateusz Guzik 	    ("%s: object %p with OBJ_ANON", __func__, object));
185eec2e4efSMateusz Guzik 	old = object->un_pager.swp.writemappings;
1866ada4e8aSKonstantin Belousov 	KASSERT(old >= (vm_ooffset_t)end - start,
1876ada4e8aSKonstantin Belousov 	    ("tmpfs obj %p writecount %jx dec %jx", object, (uintmax_t)old,
1886ada4e8aSKonstantin Belousov 	    (uintmax_t)((vm_ooffset_t)end - start)));
189eec2e4efSMateusz Guzik 	object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
190eec2e4efSMateusz Guzik 	new = object->un_pager.swp.writemappings;
191eec2e4efSMateusz Guzik 	tmpfs_pager_writecount_recalc(object, old, new);
192eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_UNLOCKED(object);
193eec2e4efSMateusz Guzik }
194eec2e4efSMateusz Guzik 
19528bc23abSKonstantin Belousov static void
19628bc23abSKonstantin Belousov tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp)
19728bc23abSKonstantin Belousov {
19828bc23abSKonstantin Belousov 	struct vnode *vp;
19928bc23abSKonstantin Belousov 
20028bc23abSKonstantin Belousov 	/*
20128bc23abSKonstantin Belousov 	 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type
202d9dc64f1SKonstantin Belousov 	 * type.  In this case there is no v_writecount to adjust.
20328bc23abSKonstantin Belousov 	 */
20428bc23abSKonstantin Belousov 	if (vp_heldp != NULL)
20528bc23abSKonstantin Belousov 		VM_OBJECT_RLOCK(object);
20628bc23abSKonstantin Belousov 	else
20728bc23abSKonstantin Belousov 		VM_OBJECT_ASSERT_LOCKED(object);
20828bc23abSKonstantin Belousov 	if ((object->flags & OBJ_TMPFS) != 0) {
209d9dc64f1SKonstantin Belousov 		vp = VM_TO_TMPFS_VP(object);
21028bc23abSKonstantin Belousov 		if (vp != NULL) {
21128bc23abSKonstantin Belousov 			*vpp = vp;
21228bc23abSKonstantin Belousov 			if (vp_heldp != NULL) {
21328bc23abSKonstantin Belousov 				vhold(vp);
21428bc23abSKonstantin Belousov 				*vp_heldp = true;
21528bc23abSKonstantin Belousov 			}
21628bc23abSKonstantin Belousov 		}
21728bc23abSKonstantin Belousov 	}
21828bc23abSKonstantin Belousov 	if (vp_heldp != NULL)
21928bc23abSKonstantin Belousov 		VM_OBJECT_RUNLOCK(object);
22028bc23abSKonstantin Belousov }
22128bc23abSKonstantin Belousov 
22237aea264SKonstantin Belousov static void
22337aea264SKonstantin Belousov tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size)
22437aea264SKonstantin Belousov {
22537aea264SKonstantin Belousov 	struct tmpfs_node *node;
22637aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
22737aea264SKonstantin Belousov 	vm_size_t c;
22837aea264SKonstantin Belousov 
22937aea264SKonstantin Belousov 	swap_pager_freespace(obj, start, size, &c);
23037aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0 || c == 0)
23137aea264SKonstantin Belousov 		return;
23237aea264SKonstantin Belousov 
23337aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
23437aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
23537aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
23637aea264SKonstantin Belousov 
23737aea264SKonstantin Belousov 	KASSERT(tm->tm_pages_used >= c,
23837aea264SKonstantin Belousov 	    ("tmpfs tm %p pages %jd free %jd", tm,
23937aea264SKonstantin Belousov 	    (uintmax_t)tm->tm_pages_used, (uintmax_t)c));
24037aea264SKonstantin Belousov 	atomic_add_long(&tm->tm_pages_used, -c);
24137aea264SKonstantin Belousov 	KASSERT(node->tn_reg.tn_pages >= c,
24237aea264SKonstantin Belousov 	    ("tmpfs node %p pages %jd free %jd", node,
24337aea264SKonstantin Belousov 	    (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c));
24437aea264SKonstantin Belousov 	node->tn_reg.tn_pages -= c;
24537aea264SKonstantin Belousov }
24637aea264SKonstantin Belousov 
24737aea264SKonstantin Belousov static void
24837aea264SKonstantin Belousov tmpfs_page_inserted(vm_object_t obj, vm_page_t m)
24937aea264SKonstantin Belousov {
25037aea264SKonstantin Belousov 	struct tmpfs_node *node;
25137aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
25237aea264SKonstantin Belousov 
25337aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0)
25437aea264SKonstantin Belousov 		return;
25537aea264SKonstantin Belousov 
25637aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
25737aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
25837aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
25937aea264SKonstantin Belousov 
26037aea264SKonstantin Belousov 	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
26137aea264SKonstantin Belousov 		atomic_add_long(&tm->tm_pages_used, 1);
26237aea264SKonstantin Belousov 		node->tn_reg.tn_pages += 1;
26337aea264SKonstantin Belousov 	}
26437aea264SKonstantin Belousov }
26537aea264SKonstantin Belousov 
26637aea264SKonstantin Belousov static void
26737aea264SKonstantin Belousov tmpfs_page_removed(vm_object_t obj, vm_page_t m)
26837aea264SKonstantin Belousov {
26937aea264SKonstantin Belousov 	struct tmpfs_node *node;
27037aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
27137aea264SKonstantin Belousov 
27237aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0)
27337aea264SKonstantin Belousov 		return;
27437aea264SKonstantin Belousov 
27537aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
27637aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
27737aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
27837aea264SKonstantin Belousov 
27937aea264SKonstantin Belousov 	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
28037aea264SKonstantin Belousov 		KASSERT(tm->tm_pages_used >= 1,
28137aea264SKonstantin Belousov 		    ("tmpfs tm %p pages %jd free 1", tm,
28237aea264SKonstantin Belousov 		    (uintmax_t)tm->tm_pages_used));
28337aea264SKonstantin Belousov 		atomic_add_long(&tm->tm_pages_used, -1);
28437aea264SKonstantin Belousov 		KASSERT(node->tn_reg.tn_pages >= 1,
28537aea264SKonstantin Belousov 		    ("tmpfs node %p pages %jd free 1", node,
28637aea264SKonstantin Belousov 		    (uintmax_t)node->tn_reg.tn_pages));
28737aea264SKonstantin Belousov 		node->tn_reg.tn_pages -= 1;
28837aea264SKonstantin Belousov 	}
28937aea264SKonstantin Belousov }
29037aea264SKonstantin Belousov 
29137aea264SKonstantin Belousov static boolean_t
29237aea264SKonstantin Belousov tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex)
29337aea264SKonstantin Belousov {
29437aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
29537aea264SKonstantin Belousov 
29637aea264SKonstantin Belousov 	tm = VM_TO_TMPFS_MP(obj);
29737aea264SKonstantin Belousov 	if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) ||
29837aea264SKonstantin Belousov 	    tm->tm_pages_max == 0)
29937aea264SKonstantin Belousov 		return (true);
300ed19c098SMike Karels 	if (tm->tm_pages_max == ULONG_MAX)
301ed19c098SMike Karels 		return (tmpfs_mem_avail() >= 1);
30237aea264SKonstantin Belousov 	return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used));
30337aea264SKonstantin Belousov }
30437aea264SKonstantin Belousov 
30528bc23abSKonstantin Belousov struct pagerops tmpfs_pager_ops = {
30628bc23abSKonstantin Belousov 	.pgo_kvme_type = KVME_TYPE_VNODE,
30728bc23abSKonstantin Belousov 	.pgo_alloc = tmpfs_pager_alloc,
30828bc23abSKonstantin Belousov 	.pgo_set_writeable_dirty = vm_object_set_writeable_dirty_,
309eec2e4efSMateusz Guzik 	.pgo_update_writecount = tmpfs_pager_update_writecount,
310eec2e4efSMateusz Guzik 	.pgo_release_writecount = tmpfs_pager_release_writecount,
31128bc23abSKonstantin Belousov 	.pgo_mightbedirty = vm_object_mightbedirty_,
31228bc23abSKonstantin Belousov 	.pgo_getvp = tmpfs_pager_getvp,
31337aea264SKonstantin Belousov 	.pgo_freespace = tmpfs_pager_freespace,
31437aea264SKonstantin Belousov 	.pgo_page_inserted = tmpfs_page_inserted,
31537aea264SKonstantin Belousov 	.pgo_page_removed = tmpfs_page_removed,
31637aea264SKonstantin Belousov 	.pgo_can_alloc_page = tmpfs_can_alloc_page,
31728bc23abSKonstantin Belousov };
31828bc23abSKonstantin Belousov 
319a51c8071SKonstantin Belousov static int
320a51c8071SKonstantin Belousov tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
321a51c8071SKonstantin Belousov {
322a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
323a51c8071SKonstantin Belousov 
324a51c8071SKonstantin Belousov 	node = mem;
325a51c8071SKonstantin Belousov 	node->tn_gen++;
326a51c8071SKonstantin Belousov 	node->tn_size = 0;
327a51c8071SKonstantin Belousov 	node->tn_status = 0;
328016b7c7eSKonstantin Belousov 	node->tn_accessed = false;
329a51c8071SKonstantin Belousov 	node->tn_flags = 0;
330a51c8071SKonstantin Belousov 	node->tn_links = 0;
331a51c8071SKonstantin Belousov 	node->tn_vnode = NULL;
332a51c8071SKonstantin Belousov 	node->tn_vpstate = 0;
333a51c8071SKonstantin Belousov 	return (0);
334a51c8071SKonstantin Belousov }
335a51c8071SKonstantin Belousov 
336a51c8071SKonstantin Belousov static void
337a51c8071SKonstantin Belousov tmpfs_node_dtor(void *mem, int size, void *arg)
338a51c8071SKonstantin Belousov {
339a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
340a51c8071SKonstantin Belousov 
341a51c8071SKonstantin Belousov 	node = mem;
342a51c8071SKonstantin Belousov 	node->tn_type = VNON;
343a51c8071SKonstantin Belousov }
344a51c8071SKonstantin Belousov 
345a51c8071SKonstantin Belousov static int
346a51c8071SKonstantin Belousov tmpfs_node_init(void *mem, int size, int flags)
347a51c8071SKonstantin Belousov {
348a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
349a51c8071SKonstantin Belousov 
350a51c8071SKonstantin Belousov 	node = mem;
351a51c8071SKonstantin Belousov 	node->tn_id = 0;
3526bd3f23aSRyan Libby 	mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF | MTX_NEW);
353a51c8071SKonstantin Belousov 	node->tn_gen = arc4random();
354a51c8071SKonstantin Belousov 	return (0);
355a51c8071SKonstantin Belousov }
356a51c8071SKonstantin Belousov 
357a51c8071SKonstantin Belousov static void
358a51c8071SKonstantin Belousov tmpfs_node_fini(void *mem, int size)
359a51c8071SKonstantin Belousov {
360a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
361a51c8071SKonstantin Belousov 
362a51c8071SKonstantin Belousov 	node = mem;
363a51c8071SKonstantin Belousov 	mtx_destroy(&node->tn_interlock);
364a51c8071SKonstantin Belousov }
365a51c8071SKonstantin Belousov 
36628bc23abSKonstantin Belousov int
367a51c8071SKonstantin Belousov tmpfs_subr_init(void)
368a51c8071SKonstantin Belousov {
36928bc23abSKonstantin Belousov 	tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops,
37028bc23abSKonstantin Belousov 	    OBJT_SWAP);
37128bc23abSKonstantin Belousov 	if (tmpfs_pager_type == -1)
37228bc23abSKonstantin Belousov 		return (EINVAL);
373a51c8071SKonstantin Belousov 	tmpfs_node_pool = uma_zcreate("TMPFS node",
374a51c8071SKonstantin Belousov 	    sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor,
375a51c8071SKonstantin Belousov 	    tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0);
376172ffe70SMateusz Guzik 	VFS_SMR_ZONE_SET(tmpfs_node_pool);
37763659234SMike Karels 
37863659234SMike Karels 	tmpfs_pages_avail_init = tmpfs_mem_avail();
37963659234SMike Karels 	tmpfs_set_reserve_from_percent();
38028bc23abSKonstantin Belousov 	return (0);
381a51c8071SKonstantin Belousov }
382a51c8071SKonstantin Belousov 
383a51c8071SKonstantin Belousov void
384a51c8071SKonstantin Belousov tmpfs_subr_uninit(void)
385a51c8071SKonstantin Belousov {
38628bc23abSKonstantin Belousov 	if (tmpfs_pager_type != -1)
38728bc23abSKonstantin Belousov 		vm_pager_free_dyn_type(tmpfs_pager_type);
38828bc23abSKonstantin Belousov 	tmpfs_pager_type = -1;
389a51c8071SKonstantin Belousov 	uma_zdestroy(tmpfs_node_pool);
390a51c8071SKonstantin Belousov }
391a51c8071SKonstantin Belousov 
392da7aa277SGleb Kurtsou static int
393da7aa277SGleb Kurtsou sysctl_mem_reserved(SYSCTL_HANDLER_ARGS)
394da7aa277SGleb Kurtsou {
395da7aa277SGleb Kurtsou 	int error;
396da7aa277SGleb Kurtsou 	long pages, bytes;
397da7aa277SGleb Kurtsou 
398da7aa277SGleb Kurtsou 	pages = *(long *)arg1;
399da7aa277SGleb Kurtsou 	bytes = pages * PAGE_SIZE;
400da7aa277SGleb Kurtsou 
401da7aa277SGleb Kurtsou 	error = sysctl_handle_long(oidp, &bytes, 0, req);
402da7aa277SGleb Kurtsou 	if (error || !req->newptr)
403da7aa277SGleb Kurtsou 		return (error);
404da7aa277SGleb Kurtsou 
405da7aa277SGleb Kurtsou 	pages = bytes / PAGE_SIZE;
406da7aa277SGleb Kurtsou 	if (pages < TMPFS_PAGES_MINRESERVED)
407da7aa277SGleb Kurtsou 		return (EINVAL);
408da7aa277SGleb Kurtsou 
409da7aa277SGleb Kurtsou 	*(long *)arg1 = pages;
410da7aa277SGleb Kurtsou 	return (0);
411da7aa277SGleb Kurtsou }
412da7aa277SGleb Kurtsou 
4132a829749SMateusz Guzik SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved,
4142a829749SMateusz Guzik     CTLTYPE_LONG | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_pages_reserved, 0,
4152a829749SMateusz Guzik     sysctl_mem_reserved, "L",
416f8439900SGleb Kurtsou     "Amount of available memory and swap below which tmpfs growth stops");
417da7aa277SGleb Kurtsou 
41863659234SMike Karels static int
41963659234SMike Karels sysctl_mem_percent(SYSCTL_HANDLER_ARGS)
42063659234SMike Karels {
42163659234SMike Karels 	int error, percent;
42263659234SMike Karels 
42363659234SMike Karels 	percent = *(int *)arg1;
42463659234SMike Karels 	error = sysctl_handle_int(oidp, &percent, 0, req);
42563659234SMike Karels 	if (error || !req->newptr)
42663659234SMike Karels 		return (error);
42763659234SMike Karels 
42863659234SMike Karels 	if ((unsigned) percent > 100)
42963659234SMike Karels 		return (EINVAL);
43063659234SMike Karels 
4313cded059SJessica Clarke 	*(int *)arg1 = percent;
43263659234SMike Karels 	tmpfs_set_reserve_from_percent();
43363659234SMike Karels 	return (0);
43463659234SMike Karels }
43563659234SMike Karels 
43663659234SMike Karels static void
43763659234SMike Karels tmpfs_set_reserve_from_percent(void)
43863659234SMike Karels {
43963659234SMike Karels 	size_t reserved;
44063659234SMike Karels 
44163659234SMike Karels 	reserved = tmpfs_pages_avail_init * (100 - tmpfs_mem_percent) / 100;
44263659234SMike Karels 	tmpfs_pages_reserved = max(reserved, TMPFS_PAGES_MINRESERVED);
44363659234SMike Karels }
44463659234SMike Karels 
44563659234SMike Karels SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_percent,
44663659234SMike Karels     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_mem_percent, 0,
44763659234SMike Karels     sysctl_mem_percent, "I",
44863659234SMike Karels     "Percent of available memory that can be used if no size limit");
44963659234SMike Karels 
4504fd5efe7SGleb Kurtsou static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
4514fd5efe7SGleb Kurtsou     struct tmpfs_dirent *b);
4524fd5efe7SGleb Kurtsou RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
4534fd5efe7SGleb Kurtsou 
454da7aa277SGleb Kurtsou size_t
455da7aa277SGleb Kurtsou tmpfs_mem_avail(void)
456da7aa277SGleb Kurtsou {
457f9cc8410SEric van Gyzen 	size_t avail;
458f9cc8410SEric van Gyzen 	long reserved;
459da7aa277SGleb Kurtsou 
460f9cc8410SEric van Gyzen 	avail = swap_pager_avail + vm_free_count();
461f9cc8410SEric van Gyzen 	reserved = atomic_load_long(&tmpfs_pages_reserved);
462f9cc8410SEric van Gyzen 	if (__predict_false(avail < reserved))
463f9cc8410SEric van Gyzen 		return (0);
464f9cc8410SEric van Gyzen 	return (avail - reserved);
465da7aa277SGleb Kurtsou }
466da7aa277SGleb Kurtsou 
467da7aa277SGleb Kurtsou size_t
468da7aa277SGleb Kurtsou tmpfs_pages_used(struct tmpfs_mount *tmp)
469da7aa277SGleb Kurtsou {
470da7aa277SGleb Kurtsou 	const size_t node_size = sizeof(struct tmpfs_node) +
471da7aa277SGleb Kurtsou 	    sizeof(struct tmpfs_dirent);
472da7aa277SGleb Kurtsou 	size_t meta_pages;
473da7aa277SGleb Kurtsou 
474da7aa277SGleb Kurtsou 	meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
475da7aa277SGleb Kurtsou 	    PAGE_SIZE);
476da7aa277SGleb Kurtsou 	return (meta_pages + tmp->tm_pages_used);
477da7aa277SGleb Kurtsou }
478da7aa277SGleb Kurtsou 
47956242a4cSFedor Uporov bool
480da7aa277SGleb Kurtsou tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages)
481da7aa277SGleb Kurtsou {
482da7aa277SGleb Kurtsou 	if (tmpfs_mem_avail() < req_pages)
4837f055843SKonstantin Belousov 		return (false);
484da7aa277SGleb Kurtsou 
485ed2159c9SMateusz Guzik 	if (tmp->tm_pages_max != ULONG_MAX &&
486da7aa277SGleb Kurtsou 	    tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp))
4877f055843SKonstantin Belousov 		return (false);
488da7aa277SGleb Kurtsou 
4897f055843SKonstantin Belousov 	return (true);
490da7aa277SGleb Kurtsou }
491da7aa277SGleb Kurtsou 
492399be910SKa Ho Ng static int
493399be910SKa Ho Ng tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
494399be910SKa Ho Ng     int end, boolean_t ignerr)
495399be910SKa Ho Ng {
496399be910SKa Ho Ng 	vm_page_t m;
497399be910SKa Ho Ng 	int rv, error;
498399be910SKa Ho Ng 
499399be910SKa Ho Ng 	VM_OBJECT_ASSERT_WLOCKED(object);
500399be910SKa Ho Ng 	KASSERT(base >= 0, ("%s: base %d", __func__, base));
501399be910SKa Ho Ng 	KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
502399be910SKa Ho Ng 	    end));
503399be910SKa Ho Ng 	error = 0;
504399be910SKa Ho Ng 
505399be910SKa Ho Ng retry:
506399be910SKa Ho Ng 	m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
507399be910SKa Ho Ng 	if (m != NULL) {
508399be910SKa Ho Ng 		MPASS(vm_page_all_valid(m));
509399be910SKa Ho Ng 	} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
510399be910SKa Ho Ng 		m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL |
511399be910SKa Ho Ng 		    VM_ALLOC_WAITFAIL);
512399be910SKa Ho Ng 		if (m == NULL)
513399be910SKa Ho Ng 			goto retry;
514399be910SKa Ho Ng 		vm_object_pip_add(object, 1);
515399be910SKa Ho Ng 		VM_OBJECT_WUNLOCK(object);
516399be910SKa Ho Ng 		rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
517399be910SKa Ho Ng 		VM_OBJECT_WLOCK(object);
518399be910SKa Ho Ng 		vm_object_pip_wakeup(object);
519399be910SKa Ho Ng 		if (rv == VM_PAGER_OK) {
520399be910SKa Ho Ng 			/*
521399be910SKa Ho Ng 			 * Since the page was not resident, and therefore not
522399be910SKa Ho Ng 			 * recently accessed, immediately enqueue it for
523399be910SKa Ho Ng 			 * asynchronous laundering.  The current operation is
524399be910SKa Ho Ng 			 * not regarded as an access.
525399be910SKa Ho Ng 			 */
526399be910SKa Ho Ng 			vm_page_launder(m);
527399be910SKa Ho Ng 		} else {
528399be910SKa Ho Ng 			vm_page_free(m);
529399be910SKa Ho Ng 			m = NULL;
530399be910SKa Ho Ng 			if (!ignerr)
531399be910SKa Ho Ng 				error = EIO;
532399be910SKa Ho Ng 		}
533399be910SKa Ho Ng 	}
534399be910SKa Ho Ng 	if (m != NULL) {
535399be910SKa Ho Ng 		pmap_zero_page_area(m, base, end - base);
536399be910SKa Ho Ng 		vm_page_set_dirty(m);
537399be910SKa Ho Ng 		vm_page_xunbusy(m);
538399be910SKa Ho Ng 	}
539399be910SKa Ho Ng 
540399be910SKa Ho Ng 	return (error);
541399be910SKa Ho Ng }
542399be910SKa Ho Ng 
54364c25043SKonstantin Belousov void
54464c25043SKonstantin Belousov tmpfs_ref_node(struct tmpfs_node *node)
54564c25043SKonstantin Belousov {
5464601f5f5SKonstantin Belousov #ifdef INVARIANTS
5474601f5f5SKonstantin Belousov 	u_int old;
54864c25043SKonstantin Belousov 
5494601f5f5SKonstantin Belousov 	old =
5504601f5f5SKonstantin Belousov #endif
5514601f5f5SKonstantin Belousov 	refcount_acquire(&node->tn_refcount);
5524601f5f5SKonstantin Belousov #ifdef INVARIANTS
5534601f5f5SKonstantin Belousov 	KASSERT(old > 0, ("node %p zero refcount", node));
5544601f5f5SKonstantin Belousov #endif
55564c25043SKonstantin Belousov }
55664c25043SKonstantin Belousov 
557d1fa59e9SXin LI /*
558d1fa59e9SXin LI  * Allocates a new node of type 'type' inside the 'tmp' mount point, with
559d1fa59e9SXin LI  * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
560d1fa59e9SXin LI  * using the credentials of the process 'p'.
561d1fa59e9SXin LI  *
562d1fa59e9SXin LI  * If the node type is set to 'VDIR', then the parent parameter must point
563d1fa59e9SXin LI  * to the parent directory of the node being created.  It may only be NULL
564d1fa59e9SXin LI  * while allocating the root node.
565d1fa59e9SXin LI  *
566d1fa59e9SXin LI  * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
567d1fa59e9SXin LI  * specifies the device the node represents.
568d1fa59e9SXin LI  *
569d1fa59e9SXin LI  * If the node type is set to 'VLNK', then the parameter target specifies
570d1fa59e9SXin LI  * the file name of the target file for the symbolic link that is being
571d1fa59e9SXin LI  * created.
572d1fa59e9SXin LI  *
573d1fa59e9SXin LI  * Note that new nodes are retrieved from the available list if it has
574d1fa59e9SXin LI  * items or, if it is empty, from the node pool as long as there is enough
575d1fa59e9SXin LI  * space to create them.
576d1fa59e9SXin LI  *
577d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
578d1fa59e9SXin LI  */
579d1fa59e9SXin LI int
580ba8cc6d7SMateusz Guzik tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype) type,
581d1fa59e9SXin LI     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
5821493c2eeSBrooks Davis     const char *target, dev_t rdev, struct tmpfs_node **node)
583d1fa59e9SXin LI {
584d1fa59e9SXin LI 	struct tmpfs_node *nnode;
585618029afSMateusz Guzik 	char *symlink;
586618029afSMateusz Guzik 	char symlink_smr;
587d1fa59e9SXin LI 
588d1fa59e9SXin LI 	/* If the root directory of the 'tmp' file system is not yet
589d1fa59e9SXin LI 	 * allocated, this must be the request to do it. */
590d1fa59e9SXin LI 	MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
591d1fa59e9SXin LI 
592b918ee2cSKonstantin Belousov 	MPASS((type == VLNK) ^ (target == NULL));
593b918ee2cSKonstantin Belousov 	MPASS((type == VBLK || type == VCHR) ^ (rdev == VNOVAL));
594d1fa59e9SXin LI 
595189ee6beSJaakko Heinonen 	if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max)
5967adb1776SXin LI 		return (ENOSPC);
5977f055843SKonstantin Belousov 	if (!tmpfs_pages_check_avail(tmp, 1))
598da7aa277SGleb Kurtsou 		return (ENOSPC);
599d1fa59e9SXin LI 
6004cda7f7eSKonstantin Belousov 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
6014cda7f7eSKonstantin Belousov 		/*
6024cda7f7eSKonstantin Belousov 		 * When a new tmpfs node is created for fully
6034cda7f7eSKonstantin Belousov 		 * constructed mount point, there must be a parent
6044cda7f7eSKonstantin Belousov 		 * node, which vnode is locked exclusively.  As
6054cda7f7eSKonstantin Belousov 		 * consequence, if the unmount is executing in
6064cda7f7eSKonstantin Belousov 		 * parallel, vflush() cannot reclaim the parent vnode.
6074cda7f7eSKonstantin Belousov 		 * Due to this, the check for MNTK_UNMOUNT flag is not
6084cda7f7eSKonstantin Belousov 		 * racy: if we did not see MNTK_UNMOUNT flag, then tmp
6094cda7f7eSKonstantin Belousov 		 * cannot be destroyed until node construction is
6104cda7f7eSKonstantin Belousov 		 * finished and the parent vnode unlocked.
6114cda7f7eSKonstantin Belousov 		 *
6124cda7f7eSKonstantin Belousov 		 * Tmpfs does not need to instantiate new nodes during
6134cda7f7eSKonstantin Belousov 		 * unmount.
6144cda7f7eSKonstantin Belousov 		 */
6154cda7f7eSKonstantin Belousov 		return (EBUSY);
6164cda7f7eSKonstantin Belousov 	}
617ae265753SKonstantin Belousov 	if ((mp->mnt_kern_flag & MNT_RDONLY) != 0)
618ae265753SKonstantin Belousov 		return (EROFS);
6194cda7f7eSKonstantin Belousov 
620172ffe70SMateusz Guzik 	nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK);
621d1fa59e9SXin LI 
622d1fa59e9SXin LI 	/* Generic initialization. */
623d1fa59e9SXin LI 	nnode->tn_type = type;
6248d5892eeSXin LI 	vfs_timestamp(&nnode->tn_atime);
625d1fa59e9SXin LI 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
626d1fa59e9SXin LI 	    nnode->tn_atime;
627d1fa59e9SXin LI 	nnode->tn_uid = uid;
628d1fa59e9SXin LI 	nnode->tn_gid = gid;
629d1fa59e9SXin LI 	nnode->tn_mode = mode;
63030e0cf49SMateusz Guzik 	nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr);
63164c25043SKonstantin Belousov 	nnode->tn_refcount = 1;
63256242a4cSFedor Uporov 	LIST_INIT(&nnode->tn_extattrs);
633d1fa59e9SXin LI 
634d1fa59e9SXin LI 	/* Type-specific initialization. */
635d1fa59e9SXin LI 	switch (nnode->tn_type) {
636d1fa59e9SXin LI 	case VBLK:
637d1fa59e9SXin LI 	case VCHR:
638d1fa59e9SXin LI 		nnode->tn_rdev = rdev;
639d1fa59e9SXin LI 		break;
640d1fa59e9SXin LI 
641d1fa59e9SXin LI 	case VDIR:
6424fd5efe7SGleb Kurtsou 		RB_INIT(&nnode->tn_dir.tn_dirhead);
6434fd5efe7SGleb Kurtsou 		LIST_INIT(&nnode->tn_dir.tn_dupindex);
6447871e52bSXin LI 		MPASS(parent != nnode);
6457871e52bSXin LI 		MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
646d1fa59e9SXin LI 		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
647d1fa59e9SXin LI 		nnode->tn_dir.tn_readdir_lastn = 0;
648d1fa59e9SXin LI 		nnode->tn_dir.tn_readdir_lastp = NULL;
649*8fa5e0f2SJason A. Harmening 		nnode->tn_dir.tn_wht_size = 0;
650d1fa59e9SXin LI 		nnode->tn_links++;
65182cf92d4SXin LI 		TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
652d1fa59e9SXin LI 		nnode->tn_dir.tn_parent->tn_links++;
65382cf92d4SXin LI 		TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent);
654d1fa59e9SXin LI 		break;
655d1fa59e9SXin LI 
656d1fa59e9SXin LI 	case VFIFO:
657d1fa59e9SXin LI 		/* FALLTHROUGH */
658d1fa59e9SXin LI 	case VSOCK:
659d1fa59e9SXin LI 		break;
660d1fa59e9SXin LI 
661d1fa59e9SXin LI 	case VLNK:
662d1fa59e9SXin LI 		MPASS(strlen(target) < MAXPATHLEN);
663d1fa59e9SXin LI 		nnode->tn_size = strlen(target);
664618029afSMateusz Guzik 
665618029afSMateusz Guzik 		symlink = NULL;
666618029afSMateusz Guzik 		if (!tmp->tm_nonc) {
6670f01fb01SKonstantin Belousov 			symlink = cache_symlink_alloc(nnode->tn_size + 1,
6680f01fb01SKonstantin Belousov 			    M_WAITOK);
669618029afSMateusz Guzik 			symlink_smr = true;
670618029afSMateusz Guzik 		}
671618029afSMateusz Guzik 		if (symlink == NULL) {
6720f01fb01SKonstantin Belousov 			symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME,
6730f01fb01SKonstantin Belousov 			    M_WAITOK);
674618029afSMateusz Guzik 			symlink_smr = false;
675618029afSMateusz Guzik 		}
676618029afSMateusz Guzik 		memcpy(symlink, target, nnode->tn_size + 1);
677618029afSMateusz Guzik 
678618029afSMateusz Guzik 		/*
679618029afSMateusz Guzik 		 * Allow safe symlink resolving for lockless lookup.
680618029afSMateusz Guzik 		 * tmpfs_fplookup_symlink references this comment.
681618029afSMateusz Guzik 		 *
682618029afSMateusz Guzik 		 * 1. nnode is not yet visible to the world
683618029afSMateusz Guzik 		 * 2. both tn_link_target and tn_link_smr get populated
684618029afSMateusz Guzik 		 * 3. release fence publishes their content
6850f01fb01SKonstantin Belousov 		 * 4. tn_link_target content is immutable until node
6860f01fb01SKonstantin Belousov 		 *    destruction, where the pointer gets set to NULL
687618029afSMateusz Guzik 		 * 5. tn_link_smr is never changed once set
688618029afSMateusz Guzik 		 *
6890f01fb01SKonstantin Belousov 		 * As a result it is sufficient to issue load consume
6900f01fb01SKonstantin Belousov 		 * on the node pointer to also get the above content
6910f01fb01SKonstantin Belousov 		 * in a stable manner.  Worst case tn_link_smr flag
6920f01fb01SKonstantin Belousov 		 * may be set to true despite being stale, while the
6930f01fb01SKonstantin Belousov 		 * target buffer is already cleared out.
694618029afSMateusz Guzik 		 */
695cc96f92aSMateusz Guzik 		atomic_store_ptr(&nnode->tn_link_target, symlink);
696618029afSMateusz Guzik 		atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr);
697618029afSMateusz Guzik 		atomic_thread_fence_rel();
698d1fa59e9SXin LI 		break;
699d1fa59e9SXin LI 
700d1fa59e9SXin LI 	case VREG:
70180bca63cSKonstantin Belousov 		nnode->tn_reg.tn_aobj =
70228bc23abSKonstantin Belousov 		    vm_pager_allocate(tmpfs_pager_type, NULL, 0,
7034b8365d7SKonstantin Belousov 		    VM_PROT_DEFAULT, 0,
7043364c323SKonstantin Belousov 		    NULL /* XXXKIB - tmpfs needs swap reservation */);
705d9dc64f1SKonstantin Belousov 		nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode;
706d9dc64f1SKonstantin Belousov 		vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS);
707081e36e7SKonstantin Belousov 		nnode->tn_reg.tn_tmp = tmp;
70837aea264SKonstantin Belousov 		nnode->tn_reg.tn_pages = 0;
709d1fa59e9SXin LI 		break;
710d1fa59e9SXin LI 
711d1fa59e9SXin LI 	default:
712bba7ed20SKonstantin Belousov 		panic("tmpfs_alloc_node: type %p %d", nnode,
713bba7ed20SKonstantin Belousov 		    (int)nnode->tn_type);
714d1fa59e9SXin LI 	}
715d1fa59e9SXin LI 
716d1fa59e9SXin LI 	TMPFS_LOCK(tmp);
717d1fa59e9SXin LI 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
71864c25043SKonstantin Belousov 	nnode->tn_attached = true;
719d1fa59e9SXin LI 	tmp->tm_nodes_inuse++;
72064c25043SKonstantin Belousov 	tmp->tm_refcount++;
721d1fa59e9SXin LI 	TMPFS_UNLOCK(tmp);
722d1fa59e9SXin LI 
723d1fa59e9SXin LI 	*node = nnode;
724bba7ed20SKonstantin Belousov 	return (0);
725d1fa59e9SXin LI }
726d1fa59e9SXin LI 
727d1fa59e9SXin LI /*
728d1fa59e9SXin LI  * Destroys the node pointed to by node from the file system 'tmp'.
729bba7ed20SKonstantin Belousov  * If the node references a directory, no entries are allowed.
730d1fa59e9SXin LI  */
731d1fa59e9SXin LI void
732d1fa59e9SXin LI tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
733d1fa59e9SXin LI {
7344601f5f5SKonstantin Belousov 	if (refcount_release_if_not_last(&node->tn_refcount))
7354601f5f5SKonstantin Belousov 		return;
7360ae6383dSXin LI 
737d1fa59e9SXin LI 	TMPFS_LOCK(tmp);
73864c25043SKonstantin Belousov 	TMPFS_NODE_LOCK(node);
73964c25043SKonstantin Belousov 	if (!tmpfs_free_node_locked(tmp, node, false)) {
74064c25043SKonstantin Belousov 		TMPFS_NODE_UNLOCK(node);
74164c25043SKonstantin Belousov 		TMPFS_UNLOCK(tmp);
74264c25043SKonstantin Belousov 	}
74364c25043SKonstantin Belousov }
74464c25043SKonstantin Belousov 
74564c25043SKonstantin Belousov bool
74664c25043SKonstantin Belousov tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
74764c25043SKonstantin Belousov     bool detach)
74864c25043SKonstantin Belousov {
74956242a4cSFedor Uporov 	struct tmpfs_extattr *ea;
75064c25043SKonstantin Belousov 	vm_object_t uobj;
751618029afSMateusz Guzik 	char *symlink;
7524601f5f5SKonstantin Belousov 	bool last;
75364c25043SKonstantin Belousov 
75464c25043SKonstantin Belousov 	TMPFS_MP_ASSERT_LOCKED(tmp);
75564c25043SKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
75664c25043SKonstantin Belousov 
7574601f5f5SKonstantin Belousov 	last = refcount_release(&node->tn_refcount);
7584601f5f5SKonstantin Belousov 	if (node->tn_attached && (detach || last)) {
75964c25043SKonstantin Belousov 		MPASS(tmp->tm_nodes_inuse > 0);
760d1fa59e9SXin LI 		tmp->tm_nodes_inuse--;
76164c25043SKonstantin Belousov 		LIST_REMOVE(node, tn_entries);
76264c25043SKonstantin Belousov 		node->tn_attached = false;
76364c25043SKonstantin Belousov 	}
7644601f5f5SKonstantin Belousov 	if (!last)
76564c25043SKonstantin Belousov 		return (false);
76664c25043SKonstantin Belousov 
767f4aa6452SMateusz Guzik 	TMPFS_NODE_UNLOCK(node);
768f4aa6452SMateusz Guzik 
76964c25043SKonstantin Belousov #ifdef INVARIANTS
77064c25043SKonstantin Belousov 	MPASS(node->tn_vnode == NULL);
77164c25043SKonstantin Belousov 	MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0);
772d1fa59e9SXin LI 
773f4aa6452SMateusz Guzik 	/*
7740f01fb01SKonstantin Belousov 	 * Make sure this is a node type we can deal with. Everything
7750f01fb01SKonstantin Belousov 	 * is explicitly enumerated without the 'default' clause so
7760f01fb01SKonstantin Belousov 	 * the compiler can throw an error in case a new type is
7770f01fb01SKonstantin Belousov 	 * added.
778f4aa6452SMateusz Guzik 	 */
779d1fa59e9SXin LI 	switch (node->tn_type) {
780d1fa59e9SXin LI 	case VBLK:
781d1fa59e9SXin LI 	case VCHR:
782d1fa59e9SXin LI 	case VDIR:
783d1fa59e9SXin LI 	case VFIFO:
784d1fa59e9SXin LI 	case VSOCK:
785d1fa59e9SXin LI 	case VLNK:
786f4aa6452SMateusz Guzik 	case VREG:
787f4aa6452SMateusz Guzik 		break;
788f4aa6452SMateusz Guzik 	case VNON:
789f4aa6452SMateusz Guzik 	case VBAD:
790f4aa6452SMateusz Guzik 	case VMARKER:
7910f01fb01SKonstantin Belousov 		panic("%s: bad type %d for node %p", __func__,
7920f01fb01SKonstantin Belousov 		    (int)node->tn_type, node);
793f4aa6452SMateusz Guzik 	}
794f4aa6452SMateusz Guzik #endif
795f4aa6452SMateusz Guzik 
79656242a4cSFedor Uporov 	while ((ea = LIST_FIRST(&node->tn_extattrs)) != NULL) {
79756242a4cSFedor Uporov 		LIST_REMOVE(ea, ea_extattrs);
79856242a4cSFedor Uporov 		tmpfs_extattr_free(ea);
79956242a4cSFedor Uporov 	}
80056242a4cSFedor Uporov 
801f4aa6452SMateusz Guzik 	switch (node->tn_type) {
802f4aa6452SMateusz Guzik 	case VREG:
803f4aa6452SMateusz Guzik 		uobj = node->tn_reg.tn_aobj;
80437aea264SKonstantin Belousov 		node->tn_reg.tn_aobj = NULL;
80537aea264SKonstantin Belousov 		if (uobj != NULL) {
80637aea264SKonstantin Belousov 			VM_OBJECT_WLOCK(uobj);
80737aea264SKonstantin Belousov 			KASSERT((uobj->flags & OBJ_TMPFS) != 0,
80837aea264SKonstantin Belousov 			    ("tmpfs node %p uobj %p not tmpfs", node, uobj));
80937aea264SKonstantin Belousov 			vm_object_clear_flag(uobj, OBJ_TMPFS);
81037aea264SKonstantin Belousov 			KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages,
81137aea264SKonstantin Belousov 			    ("tmpfs tmp %p node %p pages %jd free %jd", tmp,
81237aea264SKonstantin Belousov 			    node, (uintmax_t)tmp->tm_pages_used,
81337aea264SKonstantin Belousov 			    (uintmax_t)node->tn_reg.tn_pages));
81437aea264SKonstantin Belousov 			atomic_add_long(&tmp->tm_pages_used,
81537aea264SKonstantin Belousov 			    -node->tn_reg.tn_pages);
81637aea264SKonstantin Belousov 			VM_OBJECT_WUNLOCK(uobj);
81737aea264SKonstantin Belousov 		}
818f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
81937aea264SKonstantin Belousov 
82037aea264SKonstantin Belousov 		/*
82137aea264SKonstantin Belousov 		 * vm_object_deallocate() must not be called while
82237aea264SKonstantin Belousov 		 * owning tm_allnode_lock, because deallocate might
82337aea264SKonstantin Belousov 		 * sleep.  Call it after tmpfs_free_tmp() does the
82437aea264SKonstantin Belousov 		 * unlock.
82537aea264SKonstantin Belousov 		 */
826d9dc64f1SKonstantin Belousov 		if (uobj != NULL)
827f4aa6452SMateusz Guzik 			vm_object_deallocate(uobj);
82837aea264SKonstantin Belousov 
829f4aa6452SMateusz Guzik 		break;
830f4aa6452SMateusz Guzik 	case VLNK:
831f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
832f4aa6452SMateusz Guzik 
833618029afSMateusz Guzik 		symlink = node->tn_link_target;
834cc96f92aSMateusz Guzik 		atomic_store_ptr(&node->tn_link_target, NULL);
835618029afSMateusz Guzik 		if (atomic_load_char(&node->tn_link_smr)) {
836618029afSMateusz Guzik 			cache_symlink_free(symlink, node->tn_size + 1);
837618029afSMateusz Guzik 		} else {
838618029afSMateusz Guzik 			free(symlink, M_TMPFSNAME);
839618029afSMateusz Guzik 		}
840d1fa59e9SXin LI 		break;
841d1fa59e9SXin LI 	default:
842f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
843f4aa6452SMateusz Guzik 		break;
844d1fa59e9SXin LI 	}
845d1fa59e9SXin LI 
846172ffe70SMateusz Guzik 	uma_zfree_smr(tmpfs_node_pool, node);
84764c25043SKonstantin Belousov 	return (true);
848d1fa59e9SXin LI }
849d1fa59e9SXin LI 
8504fd5efe7SGleb Kurtsou static __inline uint32_t
8514fd5efe7SGleb Kurtsou tmpfs_dirent_hash(const char *name, u_int len)
8524fd5efe7SGleb Kurtsou {
8534fd5efe7SGleb Kurtsou 	uint32_t hash;
8544fd5efe7SGleb Kurtsou 
8554fd5efe7SGleb Kurtsou 	hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
8564fd5efe7SGleb Kurtsou #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
8574fd5efe7SGleb Kurtsou 	hash &= 0xf;
8584fd5efe7SGleb Kurtsou #endif
8594fd5efe7SGleb Kurtsou 	if (hash < TMPFS_DIRCOOKIE_MIN)
8604fd5efe7SGleb Kurtsou 		hash += TMPFS_DIRCOOKIE_MIN;
8614fd5efe7SGleb Kurtsou 
8624fd5efe7SGleb Kurtsou 	return (hash);
8634fd5efe7SGleb Kurtsou }
8644fd5efe7SGleb Kurtsou 
8654fd5efe7SGleb Kurtsou static __inline off_t
8664fd5efe7SGleb Kurtsou tmpfs_dirent_cookie(struct tmpfs_dirent *de)
8674fd5efe7SGleb Kurtsou {
86862dca316SBryan Drewery 	if (de == NULL)
86962dca316SBryan Drewery 		return (TMPFS_DIRCOOKIE_EOF);
87062dca316SBryan Drewery 
8714fd5efe7SGleb Kurtsou 	MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
8724fd5efe7SGleb Kurtsou 
8734fd5efe7SGleb Kurtsou 	return (de->td_cookie);
8744fd5efe7SGleb Kurtsou }
8754fd5efe7SGleb Kurtsou 
8764fd5efe7SGleb Kurtsou static __inline boolean_t
8774fd5efe7SGleb Kurtsou tmpfs_dirent_dup(struct tmpfs_dirent *de)
8784fd5efe7SGleb Kurtsou {
8794fd5efe7SGleb Kurtsou 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
8804fd5efe7SGleb Kurtsou }
8814fd5efe7SGleb Kurtsou 
8824fd5efe7SGleb Kurtsou static __inline boolean_t
8834fd5efe7SGleb Kurtsou tmpfs_dirent_duphead(struct tmpfs_dirent *de)
8844fd5efe7SGleb Kurtsou {
8854fd5efe7SGleb Kurtsou 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
8864fd5efe7SGleb Kurtsou }
8874fd5efe7SGleb Kurtsou 
8884fd5efe7SGleb Kurtsou void
8894fd5efe7SGleb Kurtsou tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
8904fd5efe7SGleb Kurtsou {
8914fd5efe7SGleb Kurtsou 	de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
8924fd5efe7SGleb Kurtsou 	memcpy(de->ud.td_name, name, namelen);
8934fd5efe7SGleb Kurtsou 	de->td_namelen = namelen;
8944fd5efe7SGleb Kurtsou }
8954fd5efe7SGleb Kurtsou 
896d1fa59e9SXin LI /*
897d1fa59e9SXin LI  * Allocates a new directory entry for the node node with a name of name.
898d1fa59e9SXin LI  * The new directory entry is returned in *de.
899d1fa59e9SXin LI  *
900d1fa59e9SXin LI  * The link count of node is increased by one to reflect the new object
901d1fa59e9SXin LI  * referencing it.
902d1fa59e9SXin LI  *
903d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
904d1fa59e9SXin LI  */
905d1fa59e9SXin LI int
906d1fa59e9SXin LI tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
9074fd5efe7SGleb Kurtsou     const char *name, u_int len, struct tmpfs_dirent **de)
908d1fa59e9SXin LI {
909d1fa59e9SXin LI 	struct tmpfs_dirent *nde;
910d1fa59e9SXin LI 
9117c58c37eSMateusz Guzik 	nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK);
912d1fa59e9SXin LI 	nde->td_node = node;
9134fd5efe7SGleb Kurtsou 	if (name != NULL) {
9144fd5efe7SGleb Kurtsou 		nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
9154fd5efe7SGleb Kurtsou 		tmpfs_dirent_init(nde, name, len);
9164fd5efe7SGleb Kurtsou 	} else
9174fd5efe7SGleb Kurtsou 		nde->td_namelen = 0;
91899d57a6bSEd Schouten 	if (node != NULL)
919d1fa59e9SXin LI 		node->tn_links++;
920d1fa59e9SXin LI 
921d1fa59e9SXin LI 	*de = nde;
922d1fa59e9SXin LI 
923c12118f6SKa Ho Ng 	return (0);
924d1fa59e9SXin LI }
925d1fa59e9SXin LI 
926d1fa59e9SXin LI /*
927d1fa59e9SXin LI  * Frees a directory entry.  It is the caller's responsibility to destroy
928d1fa59e9SXin LI  * the node referenced by it if needed.
929d1fa59e9SXin LI  *
930d1fa59e9SXin LI  * The link count of node is decreased by one to reflect the removal of an
931d1fa59e9SXin LI  * object that referenced it.  This only happens if 'node_exists' is true;
932d1fa59e9SXin LI  * otherwise the function will not access the node referred to by the
933d1fa59e9SXin LI  * directory entry, as it may already have been released from the outside.
934d1fa59e9SXin LI  */
935d1fa59e9SXin LI void
9364fd5efe7SGleb Kurtsou tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
937d1fa59e9SXin LI {
938d1fa59e9SXin LI 	struct tmpfs_node *node;
939d1fa59e9SXin LI 
940d1fa59e9SXin LI 	node = de->td_node;
94199d57a6bSEd Schouten 	if (node != NULL) {
942d1fa59e9SXin LI 		MPASS(node->tn_links > 0);
943d1fa59e9SXin LI 		node->tn_links--;
944d1fa59e9SXin LI 	}
9454fd5efe7SGleb Kurtsou 	if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
9464fd5efe7SGleb Kurtsou 		free(de->ud.td_name, M_TMPFSNAME);
9477c58c37eSMateusz Guzik 	free(de, M_TMPFSDIR);
948d1fa59e9SXin LI }
949d1fa59e9SXin LI 
950158cc900SKonstantin Belousov void
951158cc900SKonstantin Belousov tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
952158cc900SKonstantin Belousov {
953eec2e4efSMateusz Guzik 	bool want_vrele;
954158cc900SKonstantin Belousov 
95555781cb9SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject");
956158cc900SKonstantin Belousov 	if (vp->v_type != VREG || obj == NULL)
957158cc900SKonstantin Belousov 		return;
958158cc900SKonstantin Belousov 
959158cc900SKonstantin Belousov 	VM_OBJECT_WLOCK(obj);
960158cc900SKonstantin Belousov 	VI_LOCK(vp);
96146811949SKonstantin Belousov 	vp->v_object = NULL;
96246811949SKonstantin Belousov 
963eec2e4efSMateusz Guzik 	/*
964eec2e4efSMateusz Guzik 	 * May be going through forced unmount.
965eec2e4efSMateusz Guzik 	 */
966eec2e4efSMateusz Guzik 	want_vrele = false;
967eec2e4efSMateusz Guzik 	if ((obj->flags & OBJ_TMPFS_VREF) != 0) {
968eec2e4efSMateusz Guzik 		vm_object_clear_flag(obj, OBJ_TMPFS_VREF);
969eec2e4efSMateusz Guzik 		want_vrele = true;
970eec2e4efSMateusz Guzik 	}
971eec2e4efSMateusz Guzik 
9723c93d227SKonstantin Belousov 	if (vp->v_writecount < 0)
9733c93d227SKonstantin Belousov 		vp->v_writecount = 0;
974158cc900SKonstantin Belousov 	VI_UNLOCK(vp);
975158cc900SKonstantin Belousov 	VM_OBJECT_WUNLOCK(obj);
976eec2e4efSMateusz Guzik 	if (want_vrele) {
977eec2e4efSMateusz Guzik 		vrele(vp);
978eec2e4efSMateusz Guzik 	}
979158cc900SKonstantin Belousov }
980158cc900SKonstantin Belousov 
981158cc900SKonstantin Belousov /*
982d1fa59e9SXin LI  * Allocates a new vnode for the node node or returns a new reference to
983d1fa59e9SXin LI  * an existing one if the node had already a vnode referencing it.  The
984d1fa59e9SXin LI  * resulting locked vnode is returned in *vpp.
985d1fa59e9SXin LI  *
986d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
987d1fa59e9SXin LI  */
988d1fa59e9SXin LI int
9890ae6383dSXin LI tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
990dfd233edSAttilio Rao     struct vnode **vpp)
991d1fa59e9SXin LI {
992d1fa59e9SXin LI 	struct vnode *vp;
9931abe3656SMateusz Guzik 	enum vgetstate vs;
99464c25043SKonstantin Belousov 	struct tmpfs_mount *tm;
9956f2af3fcSKonstantin Belousov 	vm_object_t object;
9966f2af3fcSKonstantin Belousov 	int error;
997d1fa59e9SXin LI 
9986f2af3fcSKonstantin Belousov 	error = 0;
99964c25043SKonstantin Belousov 	tm = VFS_TO_TMPFS(mp);
10000ae6383dSXin LI 	TMPFS_NODE_LOCK(node);
10014601f5f5SKonstantin Belousov 	tmpfs_ref_node(node);
100264c25043SKonstantin Belousov loop:
100364c25043SKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
1004fb755714SXin LI 	if ((vp = node->tn_vnode) != NULL) {
100582cf92d4SXin LI 		MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
10068239a7a8SKonstantin Belousov 		if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) ||
1007abd80ddbSMateusz Guzik 		    (VN_IS_DOOMED(vp) &&
10088239a7a8SKonstantin Belousov 		     (lkflag & LK_NOWAIT) != 0)) {
10098239a7a8SKonstantin Belousov 			TMPFS_NODE_UNLOCK(node);
10108239a7a8SKonstantin Belousov 			error = ENOENT;
10118239a7a8SKonstantin Belousov 			vp = NULL;
10128239a7a8SKonstantin Belousov 			goto out;
10138239a7a8SKonstantin Belousov 		}
1014abd80ddbSMateusz Guzik 		if (VN_IS_DOOMED(vp)) {
10158239a7a8SKonstantin Belousov 			node->tn_vpstate |= TMPFS_VNODE_WRECLAIM;
10168239a7a8SKonstantin Belousov 			while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) {
10178239a7a8SKonstantin Belousov 				msleep(&node->tn_vnode, TMPFS_NODE_MTX(node),
10188239a7a8SKonstantin Belousov 				    0, "tmpfsE", 0);
10198239a7a8SKonstantin Belousov 			}
102064c25043SKonstantin Belousov 			goto loop;
10218239a7a8SKonstantin Belousov 		}
10221abe3656SMateusz Guzik 		vs = vget_prep(vp);
10230ae6383dSXin LI 		TMPFS_NODE_UNLOCK(node);
10241abe3656SMateusz Guzik 		error = vget_finish(vp, lkflag, vs);
102564c25043SKonstantin Belousov 		if (error == ENOENT) {
102664c25043SKonstantin Belousov 			TMPFS_NODE_LOCK(node);
10278239a7a8SKonstantin Belousov 			goto loop;
102864c25043SKonstantin Belousov 		}
1029ca846258SGleb Kurtsou 		if (error != 0) {
1030ca846258SGleb Kurtsou 			vp = NULL;
1031ca846258SGleb Kurtsou 			goto out;
1032ca846258SGleb Kurtsou 		}
1033d1fa59e9SXin LI 
1034d1fa59e9SXin LI 		/*
1035d1fa59e9SXin LI 		 * Make sure the vnode is still there after
1036d1fa59e9SXin LI 		 * getting the interlock to avoid racing a free.
1037d1fa59e9SXin LI 		 */
1038439d942bSMateusz Guzik 		if (node->tn_vnode != vp) {
1039d1fa59e9SXin LI 			vput(vp);
104064c25043SKonstantin Belousov 			TMPFS_NODE_LOCK(node);
1041d1fa59e9SXin LI 			goto loop;
1042d1fa59e9SXin LI 		}
1043d1fa59e9SXin LI 
1044d1fa59e9SXin LI 		goto out;
1045d1fa59e9SXin LI 	}
1046d1fa59e9SXin LI 
104782cf92d4SXin LI 	if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) ||
104882cf92d4SXin LI 	    (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) {
104982cf92d4SXin LI 		TMPFS_NODE_UNLOCK(node);
105082cf92d4SXin LI 		error = ENOENT;
105182cf92d4SXin LI 		vp = NULL;
105282cf92d4SXin LI 		goto out;
105382cf92d4SXin LI 	}
105482cf92d4SXin LI 
1055d1fa59e9SXin LI 	/*
1056d1fa59e9SXin LI 	 * otherwise lock the vp list while we call getnewvnode
1057d1fa59e9SXin LI 	 * since that can block.
1058d1fa59e9SXin LI 	 */
1059d1fa59e9SXin LI 	if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
1060d1fa59e9SXin LI 		node->tn_vpstate |= TMPFS_VNODE_WANT;
1061fb755714SXin LI 		error = msleep((caddr_t) &node->tn_vpstate,
106264c25043SKonstantin Belousov 		    TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0);
106364c25043SKonstantin Belousov 		if (error != 0)
106464c25043SKonstantin Belousov 			goto out;
1065fb755714SXin LI 		goto loop;
1066fb755714SXin LI 	} else
1067d1fa59e9SXin LI 		node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
1068fb755714SXin LI 
1069d1fa59e9SXin LI 	TMPFS_NODE_UNLOCK(node);
1070d1fa59e9SXin LI 
1071d1fa59e9SXin LI 	/* Get a new vnode and associate it with our node. */
107200ac6a98SKonstantin Belousov 	error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ?
107300ac6a98SKonstantin Belousov 	    &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp);
1074d1fa59e9SXin LI 	if (error != 0)
1075d1fa59e9SXin LI 		goto unlock;
1076d1fa59e9SXin LI 	MPASS(vp != NULL);
1077d1fa59e9SXin LI 
1078fd63693dSKonstantin Belousov 	/* lkflag is ignored, the lock is exclusive */
1079c8b29d12SMateusz Guzik 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1080d1fa59e9SXin LI 
1081d1fa59e9SXin LI 	vp->v_data = node;
1082d1fa59e9SXin LI 	vp->v_type = node->tn_type;
1083d1fa59e9SXin LI 
1084d1fa59e9SXin LI 	/* Type-specific initialization. */
1085d1fa59e9SXin LI 	switch (node->tn_type) {
1086d1fa59e9SXin LI 	case VBLK:
1087d1fa59e9SXin LI 		/* FALLTHROUGH */
1088d1fa59e9SXin LI 	case VCHR:
1089fb755714SXin LI 		/* FALLTHROUGH */
1090d1fa59e9SXin LI 	case VLNK:
1091d1fa59e9SXin LI 		/* FALLTHROUGH */
1092d1fa59e9SXin LI 	case VSOCK:
1093d1fa59e9SXin LI 		break;
1094fb755714SXin LI 	case VFIFO:
1095fb755714SXin LI 		vp->v_op = &tmpfs_fifoop_entries;
1096fb755714SXin LI 		break;
10976f2af3fcSKonstantin Belousov 	case VREG:
10986f2af3fcSKonstantin Belousov 		object = node->tn_reg.tn_aobj;
10996f2af3fcSKonstantin Belousov 		VM_OBJECT_WLOCK(object);
1100eec2e4efSMateusz Guzik 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
1101eec2e4efSMateusz Guzik 		    ("%s: object %p with OBJ_TMPFS_VREF but without vnode",
1102eec2e4efSMateusz Guzik 		    __func__, object));
11036f2af3fcSKonstantin Belousov 		VI_LOCK(vp);
11046f2af3fcSKonstantin Belousov 		KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
11056f2af3fcSKonstantin Belousov 		vp->v_object = object;
11060f613ab8SKonstantin Belousov 		vn_irflag_set_locked(vp, (tm->tm_pgread ? VIRF_PGREAD : 0) |
11070f613ab8SKonstantin Belousov 		    VIRF_TEXT_REF);
11086f2af3fcSKonstantin Belousov 		VI_UNLOCK(vp);
110958d7ac11SKonstantin Belousov 		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
111058d7ac11SKonstantin Belousov 		    ("leaked OBJ_TMPFS_VREF"));
111158d7ac11SKonstantin Belousov 		if (object->un_pager.swp.writemappings > 0) {
111258d7ac11SKonstantin Belousov 			vrefact(vp);
111358d7ac11SKonstantin Belousov 			vlazy(vp);
111458d7ac11SKonstantin Belousov 			vm_object_set_flag(object, OBJ_TMPFS_VREF);
111558d7ac11SKonstantin Belousov 		}
11166f2af3fcSKonstantin Belousov 		VM_OBJECT_WUNLOCK(object);
11176f2af3fcSKonstantin Belousov 		break;
11187871e52bSXin LI 	case VDIR:
111982cf92d4SXin LI 		MPASS(node->tn_dir.tn_parent != NULL);
11207871e52bSXin LI 		if (node->tn_dir.tn_parent == node)
11217871e52bSXin LI 			vp->v_vflag |= VV_ROOT;
11227871e52bSXin LI 		break;
1123d1fa59e9SXin LI 
1124d1fa59e9SXin LI 	default:
11251fa8f5f0SXin LI 		panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
1126d1fa59e9SXin LI 	}
112760c5c866SKonstantin Belousov 	if (vp->v_type != VFIFO)
112860c5c866SKonstantin Belousov 		VN_LOCK_ASHARE(vp);
1129d1fa59e9SXin LI 
113066c5fbcaSKonstantin Belousov 	error = insmntque1(vp, mp);
11315ccdfdabSMateusz Guzik 	if (error != 0) {
113266c5fbcaSKonstantin Belousov 		/* Need to clear v_object for insmntque failure. */
113366c5fbcaSKonstantin Belousov 		tmpfs_destroy_vobject(vp, vp->v_object);
113466c5fbcaSKonstantin Belousov 		vp->v_object = NULL;
113566c5fbcaSKonstantin Belousov 		vp->v_data = NULL;
113666c5fbcaSKonstantin Belousov 		vp->v_op = &dead_vnodeops;
113766c5fbcaSKonstantin Belousov 		vgone(vp);
113866c5fbcaSKonstantin Belousov 		vput(vp);
11390ae6383dSXin LI 		vp = NULL;
1140829f0bcbSMateusz Guzik 	} else {
1141829f0bcbSMateusz Guzik 		vn_set_state(vp, VSTATE_CONSTRUCTED);
11425ccdfdabSMateusz Guzik 	}
1143d1fa59e9SXin LI 
1144d1fa59e9SXin LI unlock:
1145d1fa59e9SXin LI 	TMPFS_NODE_LOCK(node);
11460ae6383dSXin LI 
1147fb755714SXin LI 	MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
1148d1fa59e9SXin LI 	node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
11490ae6383dSXin LI 	node->tn_vnode = vp;
1150d1fa59e9SXin LI 
1151d1fa59e9SXin LI 	if (node->tn_vpstate & TMPFS_VNODE_WANT) {
1152d1fa59e9SXin LI 		node->tn_vpstate &= ~TMPFS_VNODE_WANT;
1153d1fa59e9SXin LI 		TMPFS_NODE_UNLOCK(node);
1154d1fa59e9SXin LI 		wakeup((caddr_t) &node->tn_vpstate);
11558d5892eeSXin LI 	} else
1156d1fa59e9SXin LI 		TMPFS_NODE_UNLOCK(node);
1157d1fa59e9SXin LI 
1158d1fa59e9SXin LI out:
115964c25043SKonstantin Belousov 	if (error == 0) {
1160d1fa59e9SXin LI 		*vpp = vp;
1161d1fa59e9SXin LI 
11620ae6383dSXin LI #ifdef INVARIANTS
11639ff2fbdfSKonstantin Belousov 		MPASS(*vpp != NULL);
11649ff2fbdfSKonstantin Belousov 		ASSERT_VOP_LOCKED(*vpp, __func__);
11650ae6383dSXin LI 		TMPFS_NODE_LOCK(node);
1166d1fa59e9SXin LI 		MPASS(*vpp == node->tn_vnode);
11670ae6383dSXin LI 		TMPFS_NODE_UNLOCK(node);
11680ae6383dSXin LI #endif
116964c25043SKonstantin Belousov 	}
117064c25043SKonstantin Belousov 	tmpfs_free_node(tm, node);
1171d1fa59e9SXin LI 
1172bba7ed20SKonstantin Belousov 	return (error);
1173d1fa59e9SXin LI }
1174d1fa59e9SXin LI 
1175d1fa59e9SXin LI /*
1176d1fa59e9SXin LI  * Destroys the association between the vnode vp and the node it
1177d1fa59e9SXin LI  * references.
1178d1fa59e9SXin LI  */
1179d1fa59e9SXin LI void
1180d1fa59e9SXin LI tmpfs_free_vp(struct vnode *vp)
1181d1fa59e9SXin LI {
1182d1fa59e9SXin LI 	struct tmpfs_node *node;
1183d1fa59e9SXin LI 
1184d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
1185d1fa59e9SXin LI 
1186d2ca06cdSKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
1187d1fa59e9SXin LI 	node->tn_vnode = NULL;
11888239a7a8SKonstantin Belousov 	if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0)
11898239a7a8SKonstantin Belousov 		wakeup(&node->tn_vnode);
11908239a7a8SKonstantin Belousov 	node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM;
1191d1fa59e9SXin LI 	vp->v_data = NULL;
1192d1fa59e9SXin LI }
1193d1fa59e9SXin LI 
1194d1fa59e9SXin LI /*
1195d1fa59e9SXin LI  * Allocates a new file of type 'type' and adds it to the parent directory
1196d1fa59e9SXin LI  * 'dvp'; this addition is done using the component name given in 'cnp'.
1197d1fa59e9SXin LI  * The ownership of the new file is automatically assigned based on the
1198d1fa59e9SXin LI  * credentials of the caller (through 'cnp'), the group is set based on
1199d1fa59e9SXin LI  * the parent directory and the mode is determined from the 'vap' argument.
1200d1fa59e9SXin LI  * If successful, *vpp holds a vnode to the newly created file and zero
1201d1fa59e9SXin LI  * is returned.  Otherwise *vpp is NULL and the function returns an
1202d1fa59e9SXin LI  * appropriate error code.
1203d1fa59e9SXin LI  */
1204d1fa59e9SXin LI int
1205d1fa59e9SXin LI tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
12061493c2eeSBrooks Davis     struct componentname *cnp, const char *target)
1207d1fa59e9SXin LI {
1208d1fa59e9SXin LI 	int error;
1209d1fa59e9SXin LI 	struct tmpfs_dirent *de;
1210d1fa59e9SXin LI 	struct tmpfs_mount *tmp;
1211d1fa59e9SXin LI 	struct tmpfs_node *dnode;
1212d1fa59e9SXin LI 	struct tmpfs_node *node;
1213d1fa59e9SXin LI 	struct tmpfs_node *parent;
1214d1fa59e9SXin LI 
1215e7e6c820SKonstantin Belousov 	ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file");
1216d1fa59e9SXin LI 
1217d1fa59e9SXin LI 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1218d1fa59e9SXin LI 	dnode = VP_TO_TMPFS_DIR(dvp);
1219d1fa59e9SXin LI 	*vpp = NULL;
1220d1fa59e9SXin LI 
1221d1fa59e9SXin LI 	/* If the entry we are creating is a directory, we cannot overflow
1222d1fa59e9SXin LI 	 * the number of links of its parent, because it will get a new
1223d1fa59e9SXin LI 	 * link. */
1224d1fa59e9SXin LI 	if (vap->va_type == VDIR) {
1225d1fa59e9SXin LI 		/* Ensure that we do not overflow the maximum number of links
1226d1fa59e9SXin LI 		 * imposed by the system. */
122735b1a3abSJohn Baldwin 		MPASS(dnode->tn_links <= TMPFS_LINK_MAX);
122835b1a3abSJohn Baldwin 		if (dnode->tn_links == TMPFS_LINK_MAX) {
12297a41bc2fSKonstantin Belousov 			return (EMLINK);
1230d1fa59e9SXin LI 		}
1231d1fa59e9SXin LI 
1232d1fa59e9SXin LI 		parent = dnode;
12337871e52bSXin LI 		MPASS(parent != NULL);
1234d1fa59e9SXin LI 	} else
1235d1fa59e9SXin LI 		parent = NULL;
1236d1fa59e9SXin LI 
1237d1fa59e9SXin LI 	/* Allocate a node that represents the new file. */
12384cda7f7eSKonstantin Belousov 	error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type,
1239bba7ed20SKonstantin Belousov 	    cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent,
1240bba7ed20SKonstantin Belousov 	    target, vap->va_rdev, &node);
1241d1fa59e9SXin LI 	if (error != 0)
12427a41bc2fSKonstantin Belousov 		return (error);
1243d1fa59e9SXin LI 
1244d1fa59e9SXin LI 	/* Allocate a directory entry that points to the new file. */
1245d1fa59e9SXin LI 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
1246d1fa59e9SXin LI 	    &de);
1247d1fa59e9SXin LI 	if (error != 0) {
1248d1fa59e9SXin LI 		tmpfs_free_node(tmp, node);
12497a41bc2fSKonstantin Belousov 		return (error);
1250d1fa59e9SXin LI 	}
1251d1fa59e9SXin LI 
1252d1fa59e9SXin LI 	/* Allocate a vnode for the new file. */
1253dfd233edSAttilio Rao 	error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
1254d1fa59e9SXin LI 	if (error != 0) {
12554fd5efe7SGleb Kurtsou 		tmpfs_free_dirent(tmp, de);
1256d1fa59e9SXin LI 		tmpfs_free_node(tmp, node);
12577a41bc2fSKonstantin Belousov 		return (error);
1258d1fa59e9SXin LI 	}
1259d1fa59e9SXin LI 
1260d1fa59e9SXin LI 	/* Now that all required items are allocated, we can proceed to
1261d1fa59e9SXin LI 	 * insert the new node into the directory, an operation that
1262d1fa59e9SXin LI 	 * cannot fail. */
126399d57a6bSEd Schouten 	if (cnp->cn_flags & ISWHITEOUT)
126499d57a6bSEd Schouten 		tmpfs_dir_whiteout_remove(dvp, cnp);
1265d1fa59e9SXin LI 	tmpfs_dir_attach(dvp, de);
12667a41bc2fSKonstantin Belousov 	return (0);
1267d1fa59e9SXin LI }
1268d1fa59e9SXin LI 
12691c07d69bSKonstantin Belousov struct tmpfs_dirent *
12704fd5efe7SGleb Kurtsou tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
12714fd5efe7SGleb Kurtsou {
12724fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
12734fd5efe7SGleb Kurtsou 
12744fd5efe7SGleb Kurtsou 	de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
12754fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
12764fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de))
12774fd5efe7SGleb Kurtsou 		de = LIST_FIRST(&de->ud.td_duphead);
12784fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
12794fd5efe7SGleb Kurtsou 
12804fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
12814fd5efe7SGleb Kurtsou }
12824fd5efe7SGleb Kurtsou 
12831c07d69bSKonstantin Belousov struct tmpfs_dirent *
12844fd5efe7SGleb Kurtsou tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
12854fd5efe7SGleb Kurtsou {
12864fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
12874fd5efe7SGleb Kurtsou 
12884fd5efe7SGleb Kurtsou 	MPASS(dc->tdc_tree != NULL);
12894fd5efe7SGleb Kurtsou 	if (tmpfs_dirent_dup(dc->tdc_current)) {
12904fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
12914fd5efe7SGleb Kurtsou 		if (dc->tdc_current != NULL)
12924fd5efe7SGleb Kurtsou 			return (dc->tdc_current);
12934fd5efe7SGleb Kurtsou 	}
12944fd5efe7SGleb Kurtsou 	dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
12954fd5efe7SGleb Kurtsou 	    &dnode->tn_dir.tn_dirhead, dc->tdc_tree);
12964fd5efe7SGleb Kurtsou 	if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
12974fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
12984fd5efe7SGleb Kurtsou 		MPASS(dc->tdc_current != NULL);
12994fd5efe7SGleb Kurtsou 	}
13004fd5efe7SGleb Kurtsou 
13014fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
13024fd5efe7SGleb Kurtsou }
13034fd5efe7SGleb Kurtsou 
13044fd5efe7SGleb Kurtsou /* Lookup directory entry in RB-Tree. Function may return duphead entry. */
13054fd5efe7SGleb Kurtsou static struct tmpfs_dirent *
13064fd5efe7SGleb Kurtsou tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
13074fd5efe7SGleb Kurtsou {
13084fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, dekey;
13094fd5efe7SGleb Kurtsou 
13104fd5efe7SGleb Kurtsou 	dekey.td_hash = hash;
13114fd5efe7SGleb Kurtsou 	de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
13124fd5efe7SGleb Kurtsou 	return (de);
13134fd5efe7SGleb Kurtsou }
13144fd5efe7SGleb Kurtsou 
13154fd5efe7SGleb Kurtsou /* Lookup directory entry by cookie, initialize directory cursor accordingly. */
13164fd5efe7SGleb Kurtsou static struct tmpfs_dirent *
13174fd5efe7SGleb Kurtsou tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
13184fd5efe7SGleb Kurtsou     struct tmpfs_dir_cursor *dc)
13194fd5efe7SGleb Kurtsou {
13204fd5efe7SGleb Kurtsou 	struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
13214fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, dekey;
13224fd5efe7SGleb Kurtsou 
13234fd5efe7SGleb Kurtsou 	MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
13244fd5efe7SGleb Kurtsou 
13254fd5efe7SGleb Kurtsou 	if (cookie == node->tn_dir.tn_readdir_lastn &&
13264fd5efe7SGleb Kurtsou 	    (de = node->tn_dir.tn_readdir_lastp) != NULL) {
13274fd5efe7SGleb Kurtsou 		/* Protect against possible race, tn_readdir_last[pn]
13284fd5efe7SGleb Kurtsou 		 * may be updated with only shared vnode lock held. */
13294fd5efe7SGleb Kurtsou 		if (cookie == tmpfs_dirent_cookie(de))
13304fd5efe7SGleb Kurtsou 			goto out;
13314fd5efe7SGleb Kurtsou 	}
13324fd5efe7SGleb Kurtsou 
13334fd5efe7SGleb Kurtsou 	if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
13344fd5efe7SGleb Kurtsou 		LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
13354fd5efe7SGleb Kurtsou 		    uh.td_dup.index_entries) {
13364fd5efe7SGleb Kurtsou 			MPASS(tmpfs_dirent_dup(de));
13374fd5efe7SGleb Kurtsou 			if (de->td_cookie == cookie)
13384fd5efe7SGleb Kurtsou 				goto out;
13394fd5efe7SGleb Kurtsou 			/* dupindex list is sorted. */
13404fd5efe7SGleb Kurtsou 			if (de->td_cookie < cookie) {
13414fd5efe7SGleb Kurtsou 				de = NULL;
13424fd5efe7SGleb Kurtsou 				goto out;
13434fd5efe7SGleb Kurtsou 			}
13444fd5efe7SGleb Kurtsou 		}
13454fd5efe7SGleb Kurtsou 		MPASS(de == NULL);
13464fd5efe7SGleb Kurtsou 		goto out;
13474fd5efe7SGleb Kurtsou 	}
13484fd5efe7SGleb Kurtsou 
134915ad3e51SKonstantin Belousov 	if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) {
135015ad3e51SKonstantin Belousov 		de = NULL;
135115ad3e51SKonstantin Belousov 	} else {
13524fd5efe7SGleb Kurtsou 		dekey.td_hash = cookie;
13534fd5efe7SGleb Kurtsou 		/* Recover if direntry for cookie was removed */
13544fd5efe7SGleb Kurtsou 		de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
135515ad3e51SKonstantin Belousov 	}
13564fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
13574fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
13584fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de)) {
13594fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
13604fd5efe7SGleb Kurtsou 		MPASS(dc->tdc_current != NULL);
13614fd5efe7SGleb Kurtsou 	}
13624fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
13634fd5efe7SGleb Kurtsou 
13644fd5efe7SGleb Kurtsou out:
13654fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
13664fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
13674fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_dup(de))
13684fd5efe7SGleb Kurtsou 		dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
13694fd5efe7SGleb Kurtsou 		    de->td_hash);
13704fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
13714fd5efe7SGleb Kurtsou }
13724fd5efe7SGleb Kurtsou 
13734fd5efe7SGleb Kurtsou /*
13744fd5efe7SGleb Kurtsou  * Looks for a directory entry in the directory represented by node.
13754fd5efe7SGleb Kurtsou  * 'cnp' describes the name of the entry to look for.  Note that the .
13764fd5efe7SGleb Kurtsou  * and .. components are not allowed as they do not physically exist
13774fd5efe7SGleb Kurtsou  * within directories.
13784fd5efe7SGleb Kurtsou  *
13794fd5efe7SGleb Kurtsou  * Returns a pointer to the entry when found, otherwise NULL.
13804fd5efe7SGleb Kurtsou  */
13814fd5efe7SGleb Kurtsou struct tmpfs_dirent *
13824fd5efe7SGleb Kurtsou tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
13834fd5efe7SGleb Kurtsou     struct componentname *cnp)
13844fd5efe7SGleb Kurtsou {
13854fd5efe7SGleb Kurtsou 	struct tmpfs_dir_duphead *duphead;
13864fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
13874fd5efe7SGleb Kurtsou 	uint32_t hash;
13884fd5efe7SGleb Kurtsou 
13894fd5efe7SGleb Kurtsou 	MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
13904fd5efe7SGleb Kurtsou 	MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
13914fd5efe7SGleb Kurtsou 	    cnp->cn_nameptr[1] == '.')));
13924fd5efe7SGleb Kurtsou 	TMPFS_VALIDATE_DIR(node);
13934fd5efe7SGleb Kurtsou 
13944fd5efe7SGleb Kurtsou 	hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
13954fd5efe7SGleb Kurtsou 	de = tmpfs_dir_xlookup_hash(node, hash);
13964fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de)) {
13974fd5efe7SGleb Kurtsou 		duphead = &de->ud.td_duphead;
13984fd5efe7SGleb Kurtsou 		LIST_FOREACH(de, duphead, uh.td_dup.entries) {
13994fd5efe7SGleb Kurtsou 			if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
14004fd5efe7SGleb Kurtsou 			    cnp->cn_namelen))
14014fd5efe7SGleb Kurtsou 				break;
14024fd5efe7SGleb Kurtsou 		}
14034fd5efe7SGleb Kurtsou 	} else if (de != NULL) {
14044fd5efe7SGleb Kurtsou 		if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
14054fd5efe7SGleb Kurtsou 		    cnp->cn_namelen))
14064fd5efe7SGleb Kurtsou 			de = NULL;
14074fd5efe7SGleb Kurtsou 	}
14084fd5efe7SGleb Kurtsou 	if (de != NULL && f != NULL && de->td_node != f)
14094fd5efe7SGleb Kurtsou 		de = NULL;
14104fd5efe7SGleb Kurtsou 
14114fd5efe7SGleb Kurtsou 	return (de);
14124fd5efe7SGleb Kurtsou }
14134fd5efe7SGleb Kurtsou 
14144fd5efe7SGleb Kurtsou /*
14154fd5efe7SGleb Kurtsou  * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
14164fd5efe7SGleb Kurtsou  * list, allocate new cookie value.
14174fd5efe7SGleb Kurtsou  */
14184fd5efe7SGleb Kurtsou static void
14194fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
14204fd5efe7SGleb Kurtsou     struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
14214fd5efe7SGleb Kurtsou {
14224fd5efe7SGleb Kurtsou 	struct tmpfs_dir_duphead *dupindex;
14234fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, *pde;
14244fd5efe7SGleb Kurtsou 
14254fd5efe7SGleb Kurtsou 	dupindex = &dnode->tn_dir.tn_dupindex;
14264fd5efe7SGleb Kurtsou 	de = LIST_FIRST(dupindex);
14274fd5efe7SGleb Kurtsou 	if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
14284fd5efe7SGleb Kurtsou 		if (de == NULL)
14294fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
14304fd5efe7SGleb Kurtsou 		else
14314fd5efe7SGleb Kurtsou 			nde->td_cookie = de->td_cookie + 1;
14324fd5efe7SGleb Kurtsou 		MPASS(tmpfs_dirent_dup(nde));
14334fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
14344fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14354fd5efe7SGleb Kurtsou 		return;
14364fd5efe7SGleb Kurtsou 	}
14374fd5efe7SGleb Kurtsou 
14384fd5efe7SGleb Kurtsou 	/*
14394fd5efe7SGleb Kurtsou 	 * Cookie numbers are near exhaustion. Scan dupindex list for unused
14404fd5efe7SGleb Kurtsou 	 * numbers. dupindex list is sorted in descending order. Keep it so
14414fd5efe7SGleb Kurtsou 	 * after inserting nde.
14424fd5efe7SGleb Kurtsou 	 */
14434fd5efe7SGleb Kurtsou 	while (1) {
14444fd5efe7SGleb Kurtsou 		pde = de;
14454fd5efe7SGleb Kurtsou 		de = LIST_NEXT(de, uh.td_dup.index_entries);
14464fd5efe7SGleb Kurtsou 		if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
14474fd5efe7SGleb Kurtsou 			/*
14484fd5efe7SGleb Kurtsou 			 * Last element of the index doesn't have minimal cookie
14494fd5efe7SGleb Kurtsou 			 * value, use it.
14504fd5efe7SGleb Kurtsou 			 */
14514fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
14524fd5efe7SGleb Kurtsou 			LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
14534fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14544fd5efe7SGleb Kurtsou 			return;
14554fd5efe7SGleb Kurtsou 		} else if (de == NULL) {
14564fd5efe7SGleb Kurtsou 			/*
14574fd5efe7SGleb Kurtsou 			 * We are so lucky have 2^30 hash duplicates in single
14584fd5efe7SGleb Kurtsou 			 * directory :) Return largest possible cookie value.
14594fd5efe7SGleb Kurtsou 			 * It should be fine except possible issues with
14604fd5efe7SGleb Kurtsou 			 * VOP_READDIR restart.
14614fd5efe7SGleb Kurtsou 			 */
14624fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
14634fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(dupindex, nde,
14644fd5efe7SGleb Kurtsou 			    uh.td_dup.index_entries);
14654fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14664fd5efe7SGleb Kurtsou 			return;
14674fd5efe7SGleb Kurtsou 		}
14684fd5efe7SGleb Kurtsou 		if (de->td_cookie + 1 == pde->td_cookie ||
14694fd5efe7SGleb Kurtsou 		    de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
14704fd5efe7SGleb Kurtsou 			continue;	/* No hole or invalid cookie. */
14714fd5efe7SGleb Kurtsou 		nde->td_cookie = de->td_cookie + 1;
14724fd5efe7SGleb Kurtsou 		MPASS(tmpfs_dirent_dup(nde));
14734fd5efe7SGleb Kurtsou 		MPASS(pde->td_cookie > nde->td_cookie);
14744fd5efe7SGleb Kurtsou 		MPASS(nde->td_cookie > de->td_cookie);
14754fd5efe7SGleb Kurtsou 		LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
14764fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14774fd5efe7SGleb Kurtsou 		return;
147874b8d63dSPedro F. Giffuni 	}
14794fd5efe7SGleb Kurtsou }
14804fd5efe7SGleb Kurtsou 
1481d1fa59e9SXin LI /*
1482d1fa59e9SXin LI  * Attaches the directory entry de to the directory represented by vp.
1483d1fa59e9SXin LI  * Note that this does not change the link count of the node pointed by
1484d1fa59e9SXin LI  * the directory entry, as this is done by tmpfs_alloc_dirent.
1485d1fa59e9SXin LI  */
1486d1fa59e9SXin LI void
1487d1fa59e9SXin LI tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
1488d1fa59e9SXin LI {
1489d1fa59e9SXin LI 	struct tmpfs_node *dnode;
14904fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *xde, *nde;
1491d1fa59e9SXin LI 
1492fb755714SXin LI 	ASSERT_VOP_ELOCKED(vp, __func__);
14934fd5efe7SGleb Kurtsou 	MPASS(de->td_namelen > 0);
14944fd5efe7SGleb Kurtsou 	MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
14954fd5efe7SGleb Kurtsou 	MPASS(de->td_cookie == de->td_hash);
14964fd5efe7SGleb Kurtsou 
1497d1fa59e9SXin LI 	dnode = VP_TO_TMPFS_DIR(vp);
14984fd5efe7SGleb Kurtsou 	dnode->tn_dir.tn_readdir_lastn = 0;
14994fd5efe7SGleb Kurtsou 	dnode->tn_dir.tn_readdir_lastp = NULL;
15004fd5efe7SGleb Kurtsou 
15014fd5efe7SGleb Kurtsou 	MPASS(!tmpfs_dirent_dup(de));
15024fd5efe7SGleb Kurtsou 	xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
15034fd5efe7SGleb Kurtsou 	if (xde != NULL && tmpfs_dirent_duphead(xde))
15044fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
15054fd5efe7SGleb Kurtsou 	else if (xde != NULL) {
15064fd5efe7SGleb Kurtsou 		/*
15074fd5efe7SGleb Kurtsou 		 * Allocate new duphead. Swap xde with duphead to avoid
15084fd5efe7SGleb Kurtsou 		 * adding/removing elements with the same hash.
15094fd5efe7SGleb Kurtsou 		 */
15104fd5efe7SGleb Kurtsou 		MPASS(!tmpfs_dirent_dup(xde));
15114fd5efe7SGleb Kurtsou 		tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
15124fd5efe7SGleb Kurtsou 		    &nde);
15134fd5efe7SGleb Kurtsou 		/* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
15144fd5efe7SGleb Kurtsou 		memcpy(nde, xde, sizeof(*xde));
15154fd5efe7SGleb Kurtsou 		xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
15164fd5efe7SGleb Kurtsou 		LIST_INIT(&xde->ud.td_duphead);
15174fd5efe7SGleb Kurtsou 		xde->td_namelen = 0;
15184fd5efe7SGleb Kurtsou 		xde->td_node = NULL;
15194fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
15204fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
15214fd5efe7SGleb Kurtsou 	}
1522d1fa59e9SXin LI 	dnode->tn_size += sizeof(struct tmpfs_dirent);
1523016b7c7eSKonstantin Belousov 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1524016b7c7eSKonstantin Belousov 	dnode->tn_accessed = true;
1525e0a60ae1SKonstantin Belousov 	tmpfs_update(vp);
1526d1fa59e9SXin LI }
1527d1fa59e9SXin LI 
1528d1fa59e9SXin LI /*
1529d1fa59e9SXin LI  * Detaches the directory entry de from the directory represented by vp.
1530d1fa59e9SXin LI  * Note that this does not change the link count of the node pointed by
1531d1fa59e9SXin LI  * the directory entry, as this is done by tmpfs_free_dirent.
1532d1fa59e9SXin LI  */
1533d1fa59e9SXin LI void
1534d1fa59e9SXin LI tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
1535d1fa59e9SXin LI {
15364fd5efe7SGleb Kurtsou 	struct tmpfs_mount *tmp;
15374fd5efe7SGleb Kurtsou 	struct tmpfs_dir *head;
1538d1fa59e9SXin LI 	struct tmpfs_node *dnode;
15394fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *xde;
1540d1fa59e9SXin LI 
1541fb755714SXin LI 	ASSERT_VOP_ELOCKED(vp, __func__);
1542d1fa59e9SXin LI 
15434fd5efe7SGleb Kurtsou 	dnode = VP_TO_TMPFS_DIR(vp);
15444fd5efe7SGleb Kurtsou 	head = &dnode->tn_dir.tn_dirhead;
1545d1fa59e9SXin LI 	dnode->tn_dir.tn_readdir_lastn = 0;
1546d1fa59e9SXin LI 	dnode->tn_dir.tn_readdir_lastp = NULL;
1547d1fa59e9SXin LI 
15484fd5efe7SGleb Kurtsou 	if (tmpfs_dirent_dup(de)) {
15494fd5efe7SGleb Kurtsou 		/* Remove duphead if de was last entry. */
15504fd5efe7SGleb Kurtsou 		if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
15514fd5efe7SGleb Kurtsou 			xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
15524fd5efe7SGleb Kurtsou 			MPASS(tmpfs_dirent_duphead(xde));
15534fd5efe7SGleb Kurtsou 		} else
15544fd5efe7SGleb Kurtsou 			xde = NULL;
15554fd5efe7SGleb Kurtsou 		LIST_REMOVE(de, uh.td_dup.entries);
15564fd5efe7SGleb Kurtsou 		LIST_REMOVE(de, uh.td_dup.index_entries);
15574fd5efe7SGleb Kurtsou 		if (xde != NULL) {
15584fd5efe7SGleb Kurtsou 			if (LIST_EMPTY(&xde->ud.td_duphead)) {
15594fd5efe7SGleb Kurtsou 				RB_REMOVE(tmpfs_dir, head, xde);
15604fd5efe7SGleb Kurtsou 				tmp = VFS_TO_TMPFS(vp->v_mount);
15614fd5efe7SGleb Kurtsou 				MPASS(xde->td_node == NULL);
15624fd5efe7SGleb Kurtsou 				tmpfs_free_dirent(tmp, xde);
15634fd5efe7SGleb Kurtsou 			}
15644fd5efe7SGleb Kurtsou 		}
156585512850SKonstantin Belousov 		de->td_cookie = de->td_hash;
15664fd5efe7SGleb Kurtsou 	} else
15674fd5efe7SGleb Kurtsou 		RB_REMOVE(tmpfs_dir, head, de);
15684fd5efe7SGleb Kurtsou 
1569d1fa59e9SXin LI 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
1570016b7c7eSKonstantin Belousov 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1571016b7c7eSKonstantin Belousov 	dnode->tn_accessed = true;
1572e0a60ae1SKonstantin Belousov 	tmpfs_update(vp);
1573d1fa59e9SXin LI }
1574d1fa59e9SXin LI 
15754fd5efe7SGleb Kurtsou void
15764fd5efe7SGleb Kurtsou tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
1577d1fa59e9SXin LI {
15784fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, *dde, *nde;
1579d1fa59e9SXin LI 
15804fd5efe7SGleb Kurtsou 	RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
15814fd5efe7SGleb Kurtsou 		RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
15824fd5efe7SGleb Kurtsou 		/* Node may already be destroyed. */
15834fd5efe7SGleb Kurtsou 		de->td_node = NULL;
15844fd5efe7SGleb Kurtsou 		if (tmpfs_dirent_duphead(de)) {
15854fd5efe7SGleb Kurtsou 			while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
15864fd5efe7SGleb Kurtsou 				LIST_REMOVE(dde, uh.td_dup.entries);
15874fd5efe7SGleb Kurtsou 				dde->td_node = NULL;
15884fd5efe7SGleb Kurtsou 				tmpfs_free_dirent(tmp, dde);
1589d1fa59e9SXin LI 			}
1590d1fa59e9SXin LI 		}
15914fd5efe7SGleb Kurtsou 		tmpfs_free_dirent(tmp, de);
15924fd5efe7SGleb Kurtsou 	}
1593d1fa59e9SXin LI }
1594d1fa59e9SXin LI 
1595d1fa59e9SXin LI /*
1596d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
1597d1fa59e9SXin LI  * directory and returns it in the uio space.  The function returns 0
1598d1fa59e9SXin LI  * on success, -1 if there was not enough space in the uio structure to
1599d1fa59e9SXin LI  * hold the directory entry or an appropriate error code if another
1600d1fa59e9SXin LI  * error happens.
1601d1fa59e9SXin LI  */
16024fd5efe7SGleb Kurtsou static int
1603e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
1604e1cdc30fSKonstantin Belousov     struct uio *uio)
1605d1fa59e9SXin LI {
1606d1fa59e9SXin LI 	int error;
1607d1fa59e9SXin LI 	struct dirent dent;
1608d1fa59e9SXin LI 
1609d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1610d1fa59e9SXin LI 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
1611d1fa59e9SXin LI 
1612d1fa59e9SXin LI 	dent.d_fileno = node->tn_id;
161390f580b9SMark Johnston 	dent.d_off = TMPFS_DIRCOOKIE_DOTDOT;
1614d1fa59e9SXin LI 	dent.d_type = DT_DIR;
1615d1fa59e9SXin LI 	dent.d_namlen = 1;
1616d1fa59e9SXin LI 	dent.d_name[0] = '.';
1617d1fa59e9SXin LI 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
16186d2e2df7SMark Johnston 	dirent_terminate(&dent);
1619d1fa59e9SXin LI 
1620d1fa59e9SXin LI 	if (dent.d_reclen > uio->uio_resid)
16214fd5efe7SGleb Kurtsou 		error = EJUSTRETURN;
16224fd5efe7SGleb Kurtsou 	else
1623d1fa59e9SXin LI 		error = uiomove(&dent, dent.d_reclen, uio);
1624d1fa59e9SXin LI 
1625016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1626d1fa59e9SXin LI 
16275dc11286SKonstantin Belousov 	return (error);
1628d1fa59e9SXin LI }
1629d1fa59e9SXin LI 
1630d1fa59e9SXin LI /*
1631d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
1632d1fa59e9SXin LI  * directory and returns it in the uio space.  The function returns 0
1633d1fa59e9SXin LI  * on success, -1 if there was not enough space in the uio structure to
1634d1fa59e9SXin LI  * hold the directory entry or an appropriate error code if another
1635d1fa59e9SXin LI  * error happens.
1636d1fa59e9SXin LI  */
16374fd5efe7SGleb Kurtsou static int
1638e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
163990f580b9SMark Johnston     struct uio *uio, off_t next)
1640d1fa59e9SXin LI {
1641c5dac63cSKonstantin Belousov 	struct tmpfs_node *parent;
1642d1fa59e9SXin LI 	struct dirent dent;
1643c5dac63cSKonstantin Belousov 	int error;
1644d1fa59e9SXin LI 
1645d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1646d1fa59e9SXin LI 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
1647d1fa59e9SXin LI 
164882cf92d4SXin LI 	/*
164982cf92d4SXin LI 	 * Return ENOENT if the current node is already removed.
165082cf92d4SXin LI 	 */
165182cf92d4SXin LI 	TMPFS_ASSERT_LOCKED(node);
1652c5dac63cSKonstantin Belousov 	parent = node->tn_dir.tn_parent;
1653c5dac63cSKonstantin Belousov 	if (parent == NULL)
165482cf92d4SXin LI 		return (ENOENT);
165582cf92d4SXin LI 
1656c5dac63cSKonstantin Belousov 	dent.d_fileno = parent->tn_id;
165790f580b9SMark Johnston 	dent.d_off = next;
1658d1fa59e9SXin LI 	dent.d_type = DT_DIR;
1659d1fa59e9SXin LI 	dent.d_namlen = 2;
1660d1fa59e9SXin LI 	dent.d_name[0] = '.';
1661d1fa59e9SXin LI 	dent.d_name[1] = '.';
1662d1fa59e9SXin LI 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
16636d2e2df7SMark Johnston 	dirent_terminate(&dent);
1664d1fa59e9SXin LI 
1665d1fa59e9SXin LI 	if (dent.d_reclen > uio->uio_resid)
16664fd5efe7SGleb Kurtsou 		error = EJUSTRETURN;
1667d1fa59e9SXin LI 	else
16684fd5efe7SGleb Kurtsou 		error = uiomove(&dent, dent.d_reclen, uio);
1669d1fa59e9SXin LI 
1670016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1671d1fa59e9SXin LI 
16725dc11286SKonstantin Belousov 	return (error);
1673d1fa59e9SXin LI }
1674d1fa59e9SXin LI 
1675d1fa59e9SXin LI /*
1676d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Returns as much directory entries
1677d1fa59e9SXin LI  * as can fit in the uio space.  The read starts at uio->uio_offset.
1678d1fa59e9SXin LI  * The function returns 0 on success, -1 if there was not enough space
1679d1fa59e9SXin LI  * in the uio structure to hold the directory entry or an appropriate
1680d1fa59e9SXin LI  * error code if another error happens.
1681d1fa59e9SXin LI  */
1682d1fa59e9SXin LI int
1683e1cdc30fSKonstantin Belousov tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node,
1684b214fcceSAlan Somers     struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies)
1685d1fa59e9SXin LI {
16864fd5efe7SGleb Kurtsou 	struct tmpfs_dir_cursor dc;
168790f580b9SMark Johnston 	struct tmpfs_dirent *de, *nde;
16884fd5efe7SGleb Kurtsou 	off_t off;
16894fd5efe7SGleb Kurtsou 	int error;
1690d1fa59e9SXin LI 
1691d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1692d1fa59e9SXin LI 
16934fd5efe7SGleb Kurtsou 	off = 0;
169462dca316SBryan Drewery 
169562dca316SBryan Drewery 	/*
169662dca316SBryan Drewery 	 * Lookup the node from the current offset.  The starting offset of
169762dca316SBryan Drewery 	 * 0 will lookup both '.' and '..', and then the first real entry,
169862dca316SBryan Drewery 	 * or EOF if there are none.  Then find all entries for the dir that
169962dca316SBryan Drewery 	 * fit into the buffer.  Once no more entries are found (de == NULL),
170062dca316SBryan Drewery 	 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next
170162dca316SBryan Drewery 	 * call to return 0.
170262dca316SBryan Drewery 	 */
17034fd5efe7SGleb Kurtsou 	switch (uio->uio_offset) {
17044fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_DOT:
1705e1cdc30fSKonstantin Belousov 		error = tmpfs_dir_getdotdent(tm, node, uio);
17064fd5efe7SGleb Kurtsou 		if (error != 0)
17074fd5efe7SGleb Kurtsou 			return (error);
170890f580b9SMark Johnston 		uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT;
1709ac09d109SBryan Drewery 		if (cookies != NULL)
171090f580b9SMark Johnston 			cookies[(*ncookies)++] = off;
1711504bde01SBryan Drewery 		/* FALLTHROUGH */
17124fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_DOTDOT:
171390f580b9SMark Johnston 		de = tmpfs_dir_first(node, &dc);
171490f580b9SMark Johnston 		off = tmpfs_dirent_cookie(de);
171590f580b9SMark Johnston 		error = tmpfs_dir_getdotdotdent(tm, node, uio, off);
17164fd5efe7SGleb Kurtsou 		if (error != 0)
17174fd5efe7SGleb Kurtsou 			return (error);
171890f580b9SMark Johnston 		uio->uio_offset = off;
1719ac09d109SBryan Drewery 		if (cookies != NULL)
172090f580b9SMark Johnston 			cookies[(*ncookies)++] = off;
172162dca316SBryan Drewery 		/* EOF. */
17224fd5efe7SGleb Kurtsou 		if (de == NULL)
17234fd5efe7SGleb Kurtsou 			return (0);
17244fd5efe7SGleb Kurtsou 		break;
17254fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_EOF:
17264fd5efe7SGleb Kurtsou 		return (0);
17274fd5efe7SGleb Kurtsou 	default:
17284fd5efe7SGleb Kurtsou 		de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
17294fd5efe7SGleb Kurtsou 		if (de == NULL)
17304fd5efe7SGleb Kurtsou 			return (EINVAL);
1731ac09d109SBryan Drewery 		if (cookies != NULL)
17324fd5efe7SGleb Kurtsou 			off = tmpfs_dirent_cookie(de);
1733d1fa59e9SXin LI 	}
1734d1fa59e9SXin LI 
173590f580b9SMark Johnston 	/*
173690f580b9SMark Johnston 	 * Read as much entries as possible; i.e., until we reach the end of the
173790f580b9SMark Johnston 	 * directory or we exhaust uio space.
173890f580b9SMark Johnston 	 */
1739d1fa59e9SXin LI 	do {
1740d1fa59e9SXin LI 		struct dirent d;
1741d1fa59e9SXin LI 
174290f580b9SMark Johnston 		/*
174390f580b9SMark Johnston 		 * Create a dirent structure representing the current tmpfs_node
174490f580b9SMark Johnston 		 * and fill it.
174590f580b9SMark Johnston 		 */
174699d57a6bSEd Schouten 		if (de->td_node == NULL) {
174799d57a6bSEd Schouten 			d.d_fileno = 1;
174899d57a6bSEd Schouten 			d.d_type = DT_WHT;
174999d57a6bSEd Schouten 		} else {
1750d1fa59e9SXin LI 			d.d_fileno = de->td_node->tn_id;
1751d1fa59e9SXin LI 			switch (de->td_node->tn_type) {
1752d1fa59e9SXin LI 			case VBLK:
1753d1fa59e9SXin LI 				d.d_type = DT_BLK;
1754d1fa59e9SXin LI 				break;
1755d1fa59e9SXin LI 
1756d1fa59e9SXin LI 			case VCHR:
1757d1fa59e9SXin LI 				d.d_type = DT_CHR;
1758d1fa59e9SXin LI 				break;
1759d1fa59e9SXin LI 
1760d1fa59e9SXin LI 			case VDIR:
1761d1fa59e9SXin LI 				d.d_type = DT_DIR;
1762d1fa59e9SXin LI 				break;
1763d1fa59e9SXin LI 
1764d1fa59e9SXin LI 			case VFIFO:
1765d1fa59e9SXin LI 				d.d_type = DT_FIFO;
1766d1fa59e9SXin LI 				break;
1767d1fa59e9SXin LI 
1768d1fa59e9SXin LI 			case VLNK:
1769d1fa59e9SXin LI 				d.d_type = DT_LNK;
1770d1fa59e9SXin LI 				break;
1771d1fa59e9SXin LI 
1772d1fa59e9SXin LI 			case VREG:
1773d1fa59e9SXin LI 				d.d_type = DT_REG;
1774d1fa59e9SXin LI 				break;
1775d1fa59e9SXin LI 
1776d1fa59e9SXin LI 			case VSOCK:
1777d1fa59e9SXin LI 				d.d_type = DT_SOCK;
1778d1fa59e9SXin LI 				break;
1779d1fa59e9SXin LI 
1780d1fa59e9SXin LI 			default:
17811fa8f5f0SXin LI 				panic("tmpfs_dir_getdents: type %p %d",
17821fa8f5f0SXin LI 				    de->td_node, (int)de->td_node->tn_type);
1783d1fa59e9SXin LI 			}
178499d57a6bSEd Schouten 		}
1785d1fa59e9SXin LI 		d.d_namlen = de->td_namelen;
1786d1fa59e9SXin LI 		MPASS(de->td_namelen < sizeof(d.d_name));
17874fd5efe7SGleb Kurtsou 		(void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
1788d1fa59e9SXin LI 		d.d_reclen = GENERIC_DIRSIZ(&d);
1789d1fa59e9SXin LI 
179090f580b9SMark Johnston 		/*
179190f580b9SMark Johnston 		 * Stop reading if the directory entry we are treating is bigger
179290f580b9SMark Johnston 		 * than the amount of data that can be returned.
179390f580b9SMark Johnston 		 */
1794d1fa59e9SXin LI 		if (d.d_reclen > uio->uio_resid) {
17954fd5efe7SGleb Kurtsou 			error = EJUSTRETURN;
1796d1fa59e9SXin LI 			break;
1797d1fa59e9SXin LI 		}
1798d1fa59e9SXin LI 
179990f580b9SMark Johnston 		nde = tmpfs_dir_next(node, &dc);
180090f580b9SMark Johnston 		d.d_off = tmpfs_dirent_cookie(nde);
180190f580b9SMark Johnston 		dirent_terminate(&d);
180290f580b9SMark Johnston 
180390f580b9SMark Johnston 		/*
180490f580b9SMark Johnston 		 * Copy the new dirent structure into the output buffer and
180590f580b9SMark Johnston 		 * advance pointers.
180690f580b9SMark Johnston 		 */
1807d1fa59e9SXin LI 		error = uiomove(&d, d.d_reclen, uio);
18089fb9c623SKonstantin Belousov 		if (error == 0) {
180990f580b9SMark Johnston 			de = nde;
1810ac09d109SBryan Drewery 			if (cookies != NULL) {
18114fd5efe7SGleb Kurtsou 				off = tmpfs_dirent_cookie(de);
1812ac09d109SBryan Drewery 				MPASS(*ncookies < maxcookies);
18134fd5efe7SGleb Kurtsou 				cookies[(*ncookies)++] = off;
18144fd5efe7SGleb Kurtsou 			}
18159fb9c623SKonstantin Belousov 		}
1816d1fa59e9SXin LI 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
1817d1fa59e9SXin LI 
1818ac09d109SBryan Drewery 	/* Skip setting off when using cookies as it is already done above. */
1819ac09d109SBryan Drewery 	if (cookies == NULL)
18204fd5efe7SGleb Kurtsou 		off = tmpfs_dirent_cookie(de);
1821d1fa59e9SXin LI 
1822ac09d109SBryan Drewery 	/* Update the offset and cache. */
18234fd5efe7SGleb Kurtsou 	uio->uio_offset = off;
18244fd5efe7SGleb Kurtsou 	node->tn_dir.tn_readdir_lastn = off;
18254fd5efe7SGleb Kurtsou 	node->tn_dir.tn_readdir_lastp = de;
18264fd5efe7SGleb Kurtsou 
1827016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1828016b7c7eSKonstantin Belousov 	return (error);
1829d1fa59e9SXin LI }
1830d1fa59e9SXin LI 
183199d57a6bSEd Schouten int
183299d57a6bSEd Schouten tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
183399d57a6bSEd Schouten {
183499d57a6bSEd Schouten 	struct tmpfs_dirent *de;
1835*8fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
183699d57a6bSEd Schouten 	int error;
183799d57a6bSEd Schouten 
183899d57a6bSEd Schouten 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
183999d57a6bSEd Schouten 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
184099d57a6bSEd Schouten 	if (error != 0)
184199d57a6bSEd Schouten 		return (error);
1842*8fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
184399d57a6bSEd Schouten 	tmpfs_dir_attach(dvp, de);
1844*8fa5e0f2SJason A. Harmening 	dnode->tn_dir.tn_wht_size += sizeof(*de);
184599d57a6bSEd Schouten 	return (0);
184699d57a6bSEd Schouten }
184799d57a6bSEd Schouten 
184899d57a6bSEd Schouten void
184999d57a6bSEd Schouten tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
185099d57a6bSEd Schouten {
185199d57a6bSEd Schouten 	struct tmpfs_dirent *de;
1852*8fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
185399d57a6bSEd Schouten 
1854*8fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
1855*8fa5e0f2SJason A. Harmening 	de = tmpfs_dir_lookup(dnode, NULL, cnp);
185699d57a6bSEd Schouten 	MPASS(de != NULL && de->td_node == NULL);
1857*8fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de));
1858*8fa5e0f2SJason A. Harmening 	dnode->tn_dir.tn_wht_size -= sizeof(*de);
185999d57a6bSEd Schouten 	tmpfs_dir_detach(dvp, de);
18604fd5efe7SGleb Kurtsou 	tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
186199d57a6bSEd Schouten }
186299d57a6bSEd Schouten 
1863d1fa59e9SXin LI /*
1864*8fa5e0f2SJason A. Harmening  * Frees any dirents still associated with the directory represented
1865*8fa5e0f2SJason A. Harmening  * by dvp in preparation for the removal of the directory.  This is
1866*8fa5e0f2SJason A. Harmening  * required when removing a directory which contains only whiteout
1867*8fa5e0f2SJason A. Harmening  * entries.
1868*8fa5e0f2SJason A. Harmening  */
1869*8fa5e0f2SJason A. Harmening void
1870*8fa5e0f2SJason A. Harmening tmpfs_dir_clear_whiteouts(struct vnode *dvp)
1871*8fa5e0f2SJason A. Harmening {
1872*8fa5e0f2SJason A. Harmening 	struct tmpfs_dir_cursor dc;
1873*8fa5e0f2SJason A. Harmening 	struct tmpfs_dirent *de;
1874*8fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
1875*8fa5e0f2SJason A. Harmening 
1876*8fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
1877*8fa5e0f2SJason A. Harmening 
1878*8fa5e0f2SJason A. Harmening 	while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) {
1879*8fa5e0f2SJason A. Harmening 		KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p",
1880*8fa5e0f2SJason A. Harmening 		    __func__, de));
1881*8fa5e0f2SJason A. Harmening 		dnode->tn_dir.tn_wht_size -= sizeof(*de);
1882*8fa5e0f2SJason A. Harmening 		tmpfs_dir_detach(dvp, de);
1883*8fa5e0f2SJason A. Harmening 		tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
1884*8fa5e0f2SJason A. Harmening 	}
1885*8fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_size == 0);
1886*8fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_dir.tn_wht_size == 0);
1887*8fa5e0f2SJason A. Harmening }
1888*8fa5e0f2SJason A. Harmening 
1889*8fa5e0f2SJason A. Harmening /*
18904673c751SAlan Cox  * Resizes the aobj associated with the regular file pointed to by 'vp' to the
18914673c751SAlan Cox  * size 'newsize'.  'vp' must point to a vnode that represents a regular file.
18924673c751SAlan Cox  * 'newsize' must be positive.
1893d1fa59e9SXin LI  *
1894d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
1895d1fa59e9SXin LI  */
1896d1fa59e9SXin LI int
18970b05cac3SAlan Cox tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
1898d1fa59e9SXin LI {
1899d1fa59e9SXin LI 	struct tmpfs_node *node;
1900b10d1d5dSAlan Cox 	vm_object_t uobj;
19012971897dSAlan Cox 	vm_pindex_t idx, newpages, oldpages;
1902d1fa59e9SXin LI 	off_t oldsize;
1903399be910SKa Ho Ng 	int base, error;
1904d1fa59e9SXin LI 
1905d1fa59e9SXin LI 	MPASS(vp->v_type == VREG);
1906d1fa59e9SXin LI 	MPASS(newsize >= 0);
1907d1fa59e9SXin LI 
1908d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
1909b10d1d5dSAlan Cox 	uobj = node->tn_reg.tn_aobj;
1910d1fa59e9SXin LI 
19114673c751SAlan Cox 	/*
19124673c751SAlan Cox 	 * Convert the old and new sizes to the number of pages needed to
1913d1fa59e9SXin LI 	 * store them.  It may happen that we do not need to do anything
1914d1fa59e9SXin LI 	 * because the last allocated page can accommodate the change on
19154673c751SAlan Cox 	 * its own.
19164673c751SAlan Cox 	 */
1917d1fa59e9SXin LI 	oldsize = node->tn_size;
1918b10d1d5dSAlan Cox 	oldpages = OFF_TO_IDX(oldsize + PAGE_MASK);
1919b10d1d5dSAlan Cox 	MPASS(oldpages == uobj->size);
1920b10d1d5dSAlan Cox 	newpages = OFF_TO_IDX(newsize + PAGE_MASK);
1921e3e10c39SMateusz Guzik 
1922e3e10c39SMateusz Guzik 	if (__predict_true(newpages == oldpages && newsize >= oldsize)) {
1923e3e10c39SMateusz Guzik 		node->tn_size = newsize;
1924e3e10c39SMateusz Guzik 		return (0);
1925e3e10c39SMateusz Guzik 	}
1926e3e10c39SMateusz Guzik 
192789f6b863SAttilio Rao 	VM_OBJECT_WLOCK(uobj);
1928d1fa59e9SXin LI 	if (newsize < oldsize) {
1929d1fa59e9SXin LI 		/*
19302971897dSAlan Cox 		 * Zero the truncated part of the last page.
19312971897dSAlan Cox 		 */
19322971897dSAlan Cox 		base = newsize & PAGE_MASK;
19332971897dSAlan Cox 		if (base != 0) {
19342971897dSAlan Cox 			idx = OFF_TO_IDX(newsize);
1935399be910SKa Ho Ng 			error = tmpfs_partial_page_invalidate(uobj, idx, base,
1936399be910SKa Ho Ng 			    PAGE_SIZE, ignerr);
1937399be910SKa Ho Ng 			if (error != 0) {
1938d6e13f3bSJeff Roberson 				VM_OBJECT_WUNLOCK(uobj);
1939399be910SKa Ho Ng 				return (error);
19402971897dSAlan Cox 			}
19412971897dSAlan Cox 		}
19422971897dSAlan Cox 
19432971897dSAlan Cox 		/*
19444673c751SAlan Cox 		 * Release any swap space and free any whole pages.
1945d1fa59e9SXin LI 		 */
194684242cf6SMark Johnston 		if (newpages < oldpages)
19476bbee8e2SAlan Cox 			vm_object_page_remove(uobj, newpages, 0, 0);
1948d1fa59e9SXin LI 	}
1949b10d1d5dSAlan Cox 	uobj->size = newpages;
195089f6b863SAttilio Rao 	VM_OBJECT_WUNLOCK(uobj);
19512971897dSAlan Cox 
19522971897dSAlan Cox 	node->tn_size = newsize;
19534673c751SAlan Cox 	return (0);
1954d1fa59e9SXin LI }
1955d1fa59e9SXin LI 
19568d7cd10bSKa Ho Ng /*
19578d7cd10bSKa Ho Ng  * Punch hole in the aobj associated with the regular file pointed to by 'vp'.
19588d7cd10bSKa Ho Ng  * Requests completely beyond the end-of-file are converted to no-op.
19598d7cd10bSKa Ho Ng  *
19608d7cd10bSKa Ho Ng  * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on
19618d7cd10bSKa Ho Ng  * failure.
19628d7cd10bSKa Ho Ng  */
19638d7cd10bSKa Ho Ng int
19648d7cd10bSKa Ho Ng tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length)
19658d7cd10bSKa Ho Ng {
19668d7cd10bSKa Ho Ng 	struct tmpfs_node *node;
19678d7cd10bSKa Ho Ng 	vm_object_t object;
19688d7cd10bSKa Ho Ng 	vm_pindex_t pistart, pi, piend;
19698d7cd10bSKa Ho Ng 	int startofs, endofs, end;
19708d7cd10bSKa Ho Ng 	off_t off, len;
19718d7cd10bSKa Ho Ng 	int error;
19728d7cd10bSKa Ho Ng 
19738d7cd10bSKa Ho Ng 	KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows",
19748d7cd10bSKa Ho Ng 	    __func__));
19758d7cd10bSKa Ho Ng 	node = VP_TO_TMPFS_NODE(vp);
19768d7cd10bSKa Ho Ng 	KASSERT(node->tn_type == VREG, ("%s: node is not regular file",
19778d7cd10bSKa Ho Ng 	    __func__));
19788d7cd10bSKa Ho Ng 	object = node->tn_reg.tn_aobj;
19798d7cd10bSKa Ho Ng 	off = *offset;
19808d7cd10bSKa Ho Ng 	len = omin(node->tn_size - off, *length);
19818d7cd10bSKa Ho Ng 	startofs = off & PAGE_MASK;
19828d7cd10bSKa Ho Ng 	endofs = (off + len) & PAGE_MASK;
19838d7cd10bSKa Ho Ng 	pistart = OFF_TO_IDX(off);
19848d7cd10bSKa Ho Ng 	piend = OFF_TO_IDX(off + len);
19858d7cd10bSKa Ho Ng 	pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK);
19868d7cd10bSKa Ho Ng 	error = 0;
19878d7cd10bSKa Ho Ng 
19888d7cd10bSKa Ho Ng 	/* Handle the case when offset is on or beyond file size. */
19898d7cd10bSKa Ho Ng 	if (len <= 0) {
19908d7cd10bSKa Ho Ng 		*length = 0;
19918d7cd10bSKa Ho Ng 		return (0);
19928d7cd10bSKa Ho Ng 	}
19938d7cd10bSKa Ho Ng 
19948d7cd10bSKa Ho Ng 	VM_OBJECT_WLOCK(object);
19958d7cd10bSKa Ho Ng 
19968d7cd10bSKa Ho Ng 	/*
19978d7cd10bSKa Ho Ng 	 * If there is a partial page at the beginning of the hole-punching
19988d7cd10bSKa Ho Ng 	 * request, fill the partial page with zeroes.
19998d7cd10bSKa Ho Ng 	 */
20008d7cd10bSKa Ho Ng 	if (startofs != 0) {
20018d7cd10bSKa Ho Ng 		end = pistart != piend ? PAGE_SIZE : endofs;
20028d7cd10bSKa Ho Ng 		error = tmpfs_partial_page_invalidate(object, pistart, startofs,
20038d7cd10bSKa Ho Ng 		    end, FALSE);
20048d7cd10bSKa Ho Ng 		if (error != 0)
20058d7cd10bSKa Ho Ng 			goto out;
20068d7cd10bSKa Ho Ng 		off += end - startofs;
20078d7cd10bSKa Ho Ng 		len -= end - startofs;
20088d7cd10bSKa Ho Ng 	}
20098d7cd10bSKa Ho Ng 
20108d7cd10bSKa Ho Ng 	/*
20118d7cd10bSKa Ho Ng 	 * Toss away the full pages in the affected area.
20128d7cd10bSKa Ho Ng 	 */
20138d7cd10bSKa Ho Ng 	if (pi < piend) {
20148d7cd10bSKa Ho Ng 		vm_object_page_remove(object, pi, piend, 0);
20158d7cd10bSKa Ho Ng 		off += IDX_TO_OFF(piend - pi);
20168d7cd10bSKa Ho Ng 		len -= IDX_TO_OFF(piend - pi);
20178d7cd10bSKa Ho Ng 	}
20188d7cd10bSKa Ho Ng 
20198d7cd10bSKa Ho Ng 	/*
20208d7cd10bSKa Ho Ng 	 * If there is a partial page at the end of the hole-punching request,
20218d7cd10bSKa Ho Ng 	 * fill the partial page with zeroes.
20228d7cd10bSKa Ho Ng 	 */
20238d7cd10bSKa Ho Ng 	if (endofs != 0 && pistart != piend) {
20248d7cd10bSKa Ho Ng 		error = tmpfs_partial_page_invalidate(object, piend, 0, endofs,
20258d7cd10bSKa Ho Ng 		    FALSE);
20268d7cd10bSKa Ho Ng 		if (error != 0)
20278d7cd10bSKa Ho Ng 			goto out;
20288d7cd10bSKa Ho Ng 		off += endofs;
20298d7cd10bSKa Ho Ng 		len -= endofs;
20308d7cd10bSKa Ho Ng 	}
20318d7cd10bSKa Ho Ng 
20328d7cd10bSKa Ho Ng out:
20338d7cd10bSKa Ho Ng 	VM_OBJECT_WUNLOCK(object);
20348d7cd10bSKa Ho Ng 	*offset = off;
20358d7cd10bSKa Ho Ng 	*length = len;
20368d7cd10bSKa Ho Ng 	return (error);
20378d7cd10bSKa Ho Ng }
20388d7cd10bSKa Ho Ng 
2039f40cb1c6SKonstantin Belousov void
2040f40cb1c6SKonstantin Belousov tmpfs_check_mtime(struct vnode *vp)
2041f40cb1c6SKonstantin Belousov {
2042f40cb1c6SKonstantin Belousov 	struct tmpfs_node *node;
2043f40cb1c6SKonstantin Belousov 	struct vm_object *obj;
2044f40cb1c6SKonstantin Belousov 
2045f40cb1c6SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "check_mtime");
2046f40cb1c6SKonstantin Belousov 	if (vp->v_type != VREG)
2047f40cb1c6SKonstantin Belousov 		return;
2048f40cb1c6SKonstantin Belousov 	obj = vp->v_object;
204928bc23abSKonstantin Belousov 	KASSERT(obj->type == tmpfs_pager_type &&
20504b8365d7SKonstantin Belousov 	    (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) ==
20514b8365d7SKonstantin Belousov 	    (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj"));
2052f40cb1c6SKonstantin Belousov 	/* unlocked read */
205367d0e293SJeff Roberson 	if (obj->generation != obj->cleangeneration) {
2054f40cb1c6SKonstantin Belousov 		VM_OBJECT_WLOCK(obj);
205567d0e293SJeff Roberson 		if (obj->generation != obj->cleangeneration) {
205667d0e293SJeff Roberson 			obj->cleangeneration = obj->generation;
2057f40cb1c6SKonstantin Belousov 			node = VP_TO_TMPFS_NODE(vp);
2058311d39f2SKonstantin Belousov 			node->tn_status |= TMPFS_NODE_MODIFIED |
2059311d39f2SKonstantin Belousov 			    TMPFS_NODE_CHANGED;
2060f40cb1c6SKonstantin Belousov 		}
2061f40cb1c6SKonstantin Belousov 		VM_OBJECT_WUNLOCK(obj);
2062f40cb1c6SKonstantin Belousov 	}
2063f40cb1c6SKonstantin Belousov }
2064f40cb1c6SKonstantin Belousov 
2065d1fa59e9SXin LI /*
2066d1fa59e9SXin LI  * Change flags of the given vnode.
2067d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2068d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2069d1fa59e9SXin LI  */
2070d1fa59e9SXin LI int
2071b4b2596bSPawel Jakub Dawidek tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
20720f01fb01SKonstantin Belousov     struct thread *td)
2073d1fa59e9SXin LI {
2074d1fa59e9SXin LI 	int error;
2075d1fa59e9SXin LI 	struct tmpfs_node *node;
2076d1fa59e9SXin LI 
2077305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chflags");
2078d1fa59e9SXin LI 
2079d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2080d1fa59e9SXin LI 
20813b5f179dSKenneth D. Merry 	if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK |
20823b5f179dSKenneth D. Merry 	    UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP |
20833b5f179dSKenneth D. Merry 	    UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
20843b5f179dSKenneth D. Merry 	    UF_SPARSE | UF_SYSTEM)) != 0)
2085587fdb53SJaakko Heinonen 		return (EOPNOTSUPP);
2086587fdb53SJaakko Heinonen 
2087d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2088d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
208923f90714SKonstantin Belousov 		return (EROFS);
2090d1fa59e9SXin LI 
2091d1fa59e9SXin LI 	/*
2092d1fa59e9SXin LI 	 * Callers may only modify the file flags on objects they
2093d1fa59e9SXin LI 	 * have VADMIN rights for.
2094d1fa59e9SXin LI 	 */
20950f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2096d1fa59e9SXin LI 		return (error);
2097d1fa59e9SXin LI 	/*
2098d1fa59e9SXin LI 	 * Unprivileged processes are not permitted to unset system
2099d1fa59e9SXin LI 	 * flags, or modify flags if any system flags are set.
2100d1fa59e9SXin LI 	 */
2101cc426dd3SMateusz Guzik 	if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
2102587fdb53SJaakko Heinonen 		if (node->tn_flags &
2103587fdb53SJaakko Heinonen 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
2104d1fa59e9SXin LI 			error = securelevel_gt(cred, 0);
2105d1fa59e9SXin LI 			if (error)
2106d1fa59e9SXin LI 				return (error);
2107d1fa59e9SXin LI 		}
2108d1fa59e9SXin LI 	} else {
2109587fdb53SJaakko Heinonen 		if (node->tn_flags &
2110587fdb53SJaakko Heinonen 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
2111587fdb53SJaakko Heinonen 		    ((flags ^ node->tn_flags) & SF_SETTABLE))
2112d1fa59e9SXin LI 			return (EPERM);
2113d1fa59e9SXin LI 	}
2114587fdb53SJaakko Heinonen 	node->tn_flags = flags;
2115d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2116d1fa59e9SXin LI 
2117305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chflags2");
2118d1fa59e9SXin LI 
2119305b4229SKonstantin Belousov 	return (0);
2120d1fa59e9SXin LI }
2121d1fa59e9SXin LI 
2122d1fa59e9SXin LI /*
2123d1fa59e9SXin LI  * Change access mode on the given vnode.
2124d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2125d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2126d1fa59e9SXin LI  */
2127d1fa59e9SXin LI int
21280f01fb01SKonstantin Belousov tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred,
21290f01fb01SKonstantin Belousov     struct thread *td)
2130d1fa59e9SXin LI {
2131d1fa59e9SXin LI 	int error;
2132d1fa59e9SXin LI 	struct tmpfs_node *node;
2133172ffe70SMateusz Guzik 	mode_t newmode;
2134d1fa59e9SXin LI 
2135305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chmod");
2136172ffe70SMateusz Guzik 	ASSERT_VOP_IN_SEQC(vp);
2137d1fa59e9SXin LI 
2138d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2139d1fa59e9SXin LI 
2140d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2141d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2142c12118f6SKa Ho Ng 		return (EROFS);
2143d1fa59e9SXin LI 
2144d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2145d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
2146c12118f6SKa Ho Ng 		return (EPERM);
2147d1fa59e9SXin LI 
2148d1fa59e9SXin LI 	/*
2149d1fa59e9SXin LI 	 * To modify the permissions on a file, must possess VADMIN
2150d1fa59e9SXin LI 	 * for that file.
2151d1fa59e9SXin LI 	 */
21520f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2153d1fa59e9SXin LI 		return (error);
2154d1fa59e9SXin LI 
2155d1fa59e9SXin LI 	/*
2156d1fa59e9SXin LI 	 * Privileged processes may set the sticky bit on non-directories,
2157d1fa59e9SXin LI 	 * as well as set the setgid bit on a file with a group that the
2158d1fa59e9SXin LI 	 * process is not a member of.
2159d1fa59e9SXin LI 	 */
2160d1fa59e9SXin LI 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
2161cc426dd3SMateusz Guzik 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
2162d1fa59e9SXin LI 			return (EFTYPE);
2163d1fa59e9SXin LI 	}
2164d1fa59e9SXin LI 	if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) {
2165cc426dd3SMateusz Guzik 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
2166d1fa59e9SXin LI 		if (error)
2167d1fa59e9SXin LI 			return (error);
2168d1fa59e9SXin LI 	}
2169d1fa59e9SXin LI 
2170172ffe70SMateusz Guzik 	newmode = node->tn_mode & ~ALLPERMS;
2171172ffe70SMateusz Guzik 	newmode |= mode & ALLPERMS;
2172172ffe70SMateusz Guzik 	atomic_store_short(&node->tn_mode, newmode);
2173d1fa59e9SXin LI 
2174d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2175d1fa59e9SXin LI 
2176305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chmod2");
2177d1fa59e9SXin LI 
2178305b4229SKonstantin Belousov 	return (0);
2179d1fa59e9SXin LI }
2180d1fa59e9SXin LI 
2181d1fa59e9SXin LI /*
2182d1fa59e9SXin LI  * Change ownership of the given vnode.  At least one of uid or gid must
2183d1fa59e9SXin LI  * be different than VNOVAL.  If one is set to that value, the attribute
2184d1fa59e9SXin LI  * is unchanged.
2185d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2186d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2187d1fa59e9SXin LI  */
2188d1fa59e9SXin LI int
2189d1fa59e9SXin LI tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
21900f01fb01SKonstantin Belousov     struct thread *td)
2191d1fa59e9SXin LI {
2192d1fa59e9SXin LI 	int error;
2193d1fa59e9SXin LI 	struct tmpfs_node *node;
2194d1fa59e9SXin LI 	uid_t ouid;
2195d1fa59e9SXin LI 	gid_t ogid;
2196172ffe70SMateusz Guzik 	mode_t newmode;
2197d1fa59e9SXin LI 
2198305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chown");
2199172ffe70SMateusz Guzik 	ASSERT_VOP_IN_SEQC(vp);
2200d1fa59e9SXin LI 
2201d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2202d1fa59e9SXin LI 
2203d1fa59e9SXin LI 	/* Assign default values if they are unknown. */
2204d1fa59e9SXin LI 	MPASS(uid != VNOVAL || gid != VNOVAL);
2205d1fa59e9SXin LI 	if (uid == VNOVAL)
2206d1fa59e9SXin LI 		uid = node->tn_uid;
2207d1fa59e9SXin LI 	if (gid == VNOVAL)
2208d1fa59e9SXin LI 		gid = node->tn_gid;
2209d1fa59e9SXin LI 	MPASS(uid != VNOVAL && gid != VNOVAL);
2210d1fa59e9SXin LI 
2211d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2212d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
221323f90714SKonstantin Belousov 		return (EROFS);
2214d1fa59e9SXin LI 
2215d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2216d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
221723f90714SKonstantin Belousov 		return (EPERM);
2218d1fa59e9SXin LI 
2219d1fa59e9SXin LI 	/*
2220d1fa59e9SXin LI 	 * To modify the ownership of a file, must possess VADMIN for that
2221d1fa59e9SXin LI 	 * file.
2222d1fa59e9SXin LI 	 */
22230f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2224d1fa59e9SXin LI 		return (error);
2225d1fa59e9SXin LI 
2226d1fa59e9SXin LI 	/*
2227d1fa59e9SXin LI 	 * To change the owner of a file, or change the group of a file to a
2228d1fa59e9SXin LI 	 * group of which we are not a member, the caller must have
2229d1fa59e9SXin LI 	 * privilege.
2230d1fa59e9SXin LI 	 */
2231d1fa59e9SXin LI 	if ((uid != node->tn_uid ||
2232d1fa59e9SXin LI 	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
2233cc426dd3SMateusz Guzik 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
2234d1fa59e9SXin LI 		return (error);
2235d1fa59e9SXin LI 
2236d1fa59e9SXin LI 	ogid = node->tn_gid;
2237d1fa59e9SXin LI 	ouid = node->tn_uid;
2238d1fa59e9SXin LI 
2239d1fa59e9SXin LI 	node->tn_uid = uid;
2240d1fa59e9SXin LI 	node->tn_gid = gid;
2241d1fa59e9SXin LI 
2242d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2243d1fa59e9SXin LI 
22440f01fb01SKonstantin Belousov 	if ((node->tn_mode & (S_ISUID | S_ISGID)) != 0 &&
22450f01fb01SKonstantin Belousov 	    (ouid != uid || ogid != gid)) {
2246172ffe70SMateusz Guzik 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
2247172ffe70SMateusz Guzik 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
2248172ffe70SMateusz Guzik 			atomic_store_short(&node->tn_mode, newmode);
2249172ffe70SMateusz Guzik 		}
2250d1fa59e9SXin LI 	}
2251d1fa59e9SXin LI 
2252305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chown2");
2253d1fa59e9SXin LI 
2254305b4229SKonstantin Belousov 	return (0);
2255d1fa59e9SXin LI }
2256d1fa59e9SXin LI 
2257d1fa59e9SXin LI /*
2258d1fa59e9SXin LI  * Change size of the given vnode.
2259d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2260d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2261d1fa59e9SXin LI  */
2262d1fa59e9SXin LI int
2263d1fa59e9SXin LI tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
22640f01fb01SKonstantin Belousov     struct thread *td)
2265d1fa59e9SXin LI {
2266d1fa59e9SXin LI 	int error;
2267d1fa59e9SXin LI 	struct tmpfs_node *node;
2268d1fa59e9SXin LI 
2269305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chsize");
2270d1fa59e9SXin LI 
2271d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2272d1fa59e9SXin LI 
2273d1fa59e9SXin LI 	/* Decide whether this is a valid operation based on the file type. */
2274d1fa59e9SXin LI 	error = 0;
2275d1fa59e9SXin LI 	switch (vp->v_type) {
2276d1fa59e9SXin LI 	case VDIR:
227723f90714SKonstantin Belousov 		return (EISDIR);
2278d1fa59e9SXin LI 
2279d1fa59e9SXin LI 	case VREG:
2280d1fa59e9SXin LI 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
228123f90714SKonstantin Belousov 			return (EROFS);
2282d1fa59e9SXin LI 		break;
2283d1fa59e9SXin LI 
2284d1fa59e9SXin LI 	case VBLK:
2285d1fa59e9SXin LI 		/* FALLTHROUGH */
2286d1fa59e9SXin LI 	case VCHR:
2287d1fa59e9SXin LI 		/* FALLTHROUGH */
2288d1fa59e9SXin LI 	case VFIFO:
228923f90714SKonstantin Belousov 		/*
229023f90714SKonstantin Belousov 		 * Allow modifications of special files even if in the file
2291d1fa59e9SXin LI 		 * system is mounted read-only (we are not modifying the
229223f90714SKonstantin Belousov 		 * files themselves, but the objects they represent).
229323f90714SKonstantin Belousov 		 */
229423f90714SKonstantin Belousov 		return (0);
2295d1fa59e9SXin LI 
2296d1fa59e9SXin LI 	default:
2297d1fa59e9SXin LI 		/* Anything else is unsupported. */
229823f90714SKonstantin Belousov 		return (EOPNOTSUPP);
2299d1fa59e9SXin LI 	}
2300d1fa59e9SXin LI 
2301d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2302d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
230323f90714SKonstantin Belousov 		return (EPERM);
2304d1fa59e9SXin LI 
2305b5b16659SKonstantin Belousov 	error = vn_rlimit_trunc(size, td);
2306b5b16659SKonstantin Belousov 	if (error != 0)
2307b5b16659SKonstantin Belousov 		return (error);
2308b5b16659SKonstantin Belousov 
2309d1fa59e9SXin LI 	error = tmpfs_truncate(vp, size);
231023f90714SKonstantin Belousov 	/*
231123f90714SKonstantin Belousov 	 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
231223f90714SKonstantin Belousov 	 * for us, as will update tn_status; no need to do that here.
231323f90714SKonstantin Belousov 	 */
2314d1fa59e9SXin LI 
2315305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chsize2");
2316d1fa59e9SXin LI 
2317305b4229SKonstantin Belousov 	return (error);
2318d1fa59e9SXin LI }
2319d1fa59e9SXin LI 
2320d1fa59e9SXin LI /*
2321d1fa59e9SXin LI  * Change access and modification times of the given vnode.
2322d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2323d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2324d1fa59e9SXin LI  */
2325d1fa59e9SXin LI int
23267b81a399SKonstantin Belousov tmpfs_chtimes(struct vnode *vp, struct vattr *vap,
23270f01fb01SKonstantin Belousov     struct ucred *cred, struct thread *td)
2328d1fa59e9SXin LI {
2329d1fa59e9SXin LI 	int error;
2330d1fa59e9SXin LI 	struct tmpfs_node *node;
2331d1fa59e9SXin LI 
2332305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chtimes");
2333d1fa59e9SXin LI 
2334d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2335d1fa59e9SXin LI 
2336d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2337d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
233823f90714SKonstantin Belousov 		return (EROFS);
2339d1fa59e9SXin LI 
2340d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2341d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
234223f90714SKonstantin Belousov 		return (EPERM);
2343d1fa59e9SXin LI 
23440f01fb01SKonstantin Belousov 	error = vn_utimes_perm(vp, vap, cred, td);
23457b81a399SKonstantin Belousov 	if (error != 0)
23469b258fcaSXin LI 		return (error);
2347d1fa59e9SXin LI 
2348382353e2SChristian Brueffer 	if (vap->va_atime.tv_sec != VNOVAL)
2349016b7c7eSKonstantin Belousov 		node->tn_accessed = true;
2350382353e2SChristian Brueffer 	if (vap->va_mtime.tv_sec != VNOVAL)
2351d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_MODIFIED;
2352382353e2SChristian Brueffer 	if (vap->va_birthtime.tv_sec != VNOVAL)
2353d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_MODIFIED;
23547b81a399SKonstantin Belousov 	tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime);
2355382353e2SChristian Brueffer 	if (vap->va_birthtime.tv_sec != VNOVAL)
23567b81a399SKonstantin Belousov 		node->tn_birthtime = vap->va_birthtime;
2357305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chtimes2");
2358d1fa59e9SXin LI 
2359305b4229SKonstantin Belousov 	return (0);
2360d1fa59e9SXin LI }
2361d1fa59e9SXin LI 
2362d1fa59e9SXin LI void
2363e1cdc30fSKonstantin Belousov tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status)
23645dc11286SKonstantin Belousov {
23655dc11286SKonstantin Belousov 
2366e1cdc30fSKonstantin Belousov 	if ((node->tn_status & status) == status || tm->tm_ronly)
23675dc11286SKonstantin Belousov 		return;
23685dc11286SKonstantin Belousov 	TMPFS_NODE_LOCK(node);
23695dc11286SKonstantin Belousov 	node->tn_status |= status;
23705dc11286SKonstantin Belousov 	TMPFS_NODE_UNLOCK(node);
23715dc11286SKonstantin Belousov }
23725dc11286SKonstantin Belousov 
2373016b7c7eSKonstantin Belousov void
2374016b7c7eSKonstantin Belousov tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node)
2375016b7c7eSKonstantin Belousov {
2376016b7c7eSKonstantin Belousov 	if (node->tn_accessed || tm->tm_ronly)
2377016b7c7eSKonstantin Belousov 		return;
2378016b7c7eSKonstantin Belousov 	atomic_store_8(&node->tn_accessed, true);
2379016b7c7eSKonstantin Belousov }
2380016b7c7eSKonstantin Belousov 
23815dc11286SKonstantin Belousov /* Sync timestamps */
23823b622fc8SMateusz Guzik void
23833b622fc8SMateusz Guzik tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
2384d1fa59e9SXin LI     const struct timespec *mod)
2385d1fa59e9SXin LI {
23863b622fc8SMateusz Guzik 	struct tmpfs_node *node;
2387d1fa59e9SXin LI 	struct timespec now;
2388d1fa59e9SXin LI 
23893b622fc8SMateusz Guzik 	ASSERT_VOP_LOCKED(vp, "tmpfs_itimes");
23903b622fc8SMateusz Guzik 	node = VP_TO_TMPFS_NODE(vp);
2391d1fa59e9SXin LI 
2392016b7c7eSKonstantin Belousov 	if (!node->tn_accessed &&
2393016b7c7eSKonstantin Belousov 	    (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0)
2394d1fa59e9SXin LI 		return;
2395d1fa59e9SXin LI 
2396b746bf08SXin LI 	vfs_timestamp(&now);
23973b622fc8SMateusz Guzik 	TMPFS_NODE_LOCK(node);
2398016b7c7eSKonstantin Belousov 	if (node->tn_accessed) {
2399d1fa59e9SXin LI 		if (acc == NULL)
2400d1fa59e9SXin LI 			 acc = &now;
2401d1fa59e9SXin LI 		node->tn_atime = *acc;
2402d1fa59e9SXin LI 	}
2403d1fa59e9SXin LI 	if (node->tn_status & TMPFS_NODE_MODIFIED) {
2404d1fa59e9SXin LI 		if (mod == NULL)
2405d1fa59e9SXin LI 			mod = &now;
2406d1fa59e9SXin LI 		node->tn_mtime = *mod;
2407d1fa59e9SXin LI 	}
24085dc11286SKonstantin Belousov 	if (node->tn_status & TMPFS_NODE_CHANGED)
2409d1fa59e9SXin LI 		node->tn_ctime = now;
2410016b7c7eSKonstantin Belousov 	node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
2411016b7c7eSKonstantin Belousov 	node->tn_accessed = false;
24125dc11286SKonstantin Belousov 	TMPFS_NODE_UNLOCK(node);
24135dc11286SKonstantin Belousov 
2414d1b06863SMark Murray 	/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
241519fa89e9SMark Murray 	random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME);
2416d1fa59e9SXin LI }
2417d1fa59e9SXin LI 
2418d1fa59e9SXin LI int
2419d1fa59e9SXin LI tmpfs_truncate(struct vnode *vp, off_t length)
2420d1fa59e9SXin LI {
2421d1fa59e9SXin LI 	struct tmpfs_node *node;
2422860399ebSKonstantin Belousov 	int error;
2423d1fa59e9SXin LI 
2424860399ebSKonstantin Belousov 	if (length < 0)
2425860399ebSKonstantin Belousov 		return (EINVAL);
2426d1fa59e9SXin LI 	if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
2427d1fa59e9SXin LI 		return (EFBIG);
2428d1fa59e9SXin LI 
2429860399ebSKonstantin Belousov 	node = VP_TO_TMPFS_NODE(vp);
2430860399ebSKonstantin Belousov 	error = node->tn_size == length ? 0 : tmpfs_reg_resize(vp, length,
2431860399ebSKonstantin Belousov 	    FALSE);
24325dc11286SKonstantin Belousov 	if (error == 0)
2433d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
2434d1fa59e9SXin LI 	tmpfs_update(vp);
2435d1fa59e9SXin LI 
24365dc11286SKonstantin Belousov 	return (error);
2437d1fa59e9SXin LI }
24384fd5efe7SGleb Kurtsou 
24394fd5efe7SGleb Kurtsou static __inline int
24404fd5efe7SGleb Kurtsou tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
24414fd5efe7SGleb Kurtsou {
24424fd5efe7SGleb Kurtsou 	if (a->td_hash > b->td_hash)
24434fd5efe7SGleb Kurtsou 		return (1);
24444fd5efe7SGleb Kurtsou 	else if (a->td_hash < b->td_hash)
24454fd5efe7SGleb Kurtsou 		return (-1);
24464fd5efe7SGleb Kurtsou 	return (0);
24474fd5efe7SGleb Kurtsou }
24484fd5efe7SGleb Kurtsou 
24494fd5efe7SGleb Kurtsou RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
2450