xref: /openbsd-src/sys/tmpfs/tmpfs_subr.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: tmpfs_subr.c,v 1.23 2019/10/17 11:23:49 millert Exp $	*/
2 /*	$NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $	*/
3 
4 /*
5  * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Mindaugas Rasiukevicius.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Efficient memory file system: interfaces for inode and directory entry
37  * construction, destruction and manipulation.
38  *
39  * Reference counting
40  *
41  *	The link count of inode (tmpfs_node_t::tn_links) is used as a
42  *	reference counter.  However, it has slightly different semantics.
43  *
44  *	For directories - link count represents directory entries, which
45  *	refer to the directories.  In other words, it represents the count
46  *	of sub-directories.  It also takes into account the virtual '.'
47  *	entry (which has no real entry in the list).  For files - link count
48  *	represents the hard links.  Since only empty directories can be
49  *	removed - link count aligns the reference counting requirements
50  *	enough.  Note: to check whether directory is not empty, the inode
51  *	size (tmpfs_node_t::tn_size) can be used.
52  *
53  *	The inode itself, as an object, gathers its first reference when
54  *	directory entry is attached via tmpfs_dir_attach(9).  For instance,
55  *	after regular tmpfs_create(), a file would have a link count of 1,
56  *	while directory after tmpfs_mkdir() would have 2 (due to '.').
57  *
58  * Reclamation
59  *
60  *	It should be noted that tmpfs inodes rely on a combination of vnode
61  *	reference counting and link counting.  That is, an inode can only be
62  *	destroyed if its associated vnode is inactive.  The destruction is
63  *	done on vnode reclamation i.e. tmpfs_reclaim().  It should be noted
64  *	that tmpfs_node_t::tn_links being 0 is a destruction criterion.
65  *
66  *	If an inode has references within the file system (tn_links > 0) and
67  *	its inactive vnode gets reclaimed/recycled - then the association is
68  *	broken in tmpfs_reclaim().  In such case, an inode will always pass
69  *	tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
70  *
71  * Lock order
72  *
73  *	tmpfs_node_t::tn_nlock ->
74  *		struct vnode::v_vlock ->
75  *			struct vnode::v_interlock
76  */
77 
78 #include <sys/param.h>
79 #include <sys/dirent.h>
80 #include <sys/event.h>
81 #include <sys/mount.h>
82 #include <sys/namei.h>
83 #include <sys/time.h>
84 #include <sys/proc.h>
85 #include <sys/stat.h>
86 #include <sys/systm.h>
87 #include <sys/vnode.h>
88 
89 #include <uvm/uvm_aobj.h>
90 
91 #include <tmpfs/tmpfs.h>
92 #include <tmpfs/tmpfs_vnops.h>
93 
94 
95 /* Local functions. */
96 void	tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *);
97 int	tmpfs_dir_getdotents(tmpfs_node_t *, struct dirent *, struct uio *);
98 
99 /*
100  * tmpfs_alloc_node: allocate a new inode of a specified type and
101  * insert it into the list of specified mount point.
102  */
103 int
104 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
105     mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
106 {
107 	tmpfs_node_t *nnode;
108 	struct uvm_object *uobj;
109 
110 	nnode = tmpfs_node_get(tmp);
111 	if (nnode == NULL) {
112 		return ENOSPC;
113 	}
114 
115 	/* Initially, no references and no associations. */
116 	nnode->tn_links = 0;
117 	nnode->tn_vnode = NULL;
118 	nnode->tn_dirent_hint = NULL;
119 
120 	rw_enter_write(&tmp->tm_acc_lock);
121 	nnode->tn_id = ++tmp->tm_highest_inode;
122 	if (nnode->tn_id == 0) {
123 		--tmp->tm_highest_inode;
124 		rw_exit_write(&tmp->tm_acc_lock);
125 		tmpfs_node_put(tmp, nnode);
126 		return ENOSPC;
127 	}
128 	 rw_exit_write(&tmp->tm_acc_lock);
129 
130 	/* Generic initialization. */
131 	nnode->tn_type = type;
132 	nnode->tn_size = 0;
133 	nnode->tn_flags = 0;
134 	nnode->tn_lockf = NULL;
135 	nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
136 
137 	nanotime(&nnode->tn_atime);
138 	nnode->tn_birthtime = nnode->tn_atime;
139 	nnode->tn_ctime = nnode->tn_atime;
140 	nnode->tn_mtime = nnode->tn_atime;
141 
142 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
143 
144 	nnode->tn_uid = uid;
145 	nnode->tn_gid = gid;
146 	nnode->tn_mode = mode;
147 
148 	/* Type-specific initialization. */
149 	switch (nnode->tn_type) {
150 	case VBLK:
151 	case VCHR:
152 		/* Character/block special device. */
153 		KASSERT(rdev != VNOVAL);
154 		nnode->tn_spec.tn_dev.tn_rdev = rdev;
155 		break;
156 	case VDIR:
157 		/* Directory. */
158 		TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
159 		nnode->tn_spec.tn_dir.tn_parent = NULL;
160 		nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
161 		nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
162 
163 		/* Extra link count for the virtual '.' entry. */
164 		nnode->tn_links++;
165 		break;
166 	case VFIFO:
167 	case VSOCK:
168 		break;
169 	case VLNK:
170 		/* Symbolic link.  Target specifies the file name. */
171 		KASSERT(target && strlen(target) < MAXPATHLEN);
172 
173 		nnode->tn_size = strlen(target);
174 		if (nnode->tn_size == 0) {
175 			nnode->tn_spec.tn_lnk.tn_link = NULL;
176 			break;
177 		}
178 		nnode->tn_spec.tn_lnk.tn_link =
179 		    tmpfs_strname_alloc(tmp, nnode->tn_size);
180 		if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
181 			tmpfs_node_put(tmp, nnode);
182 			return ENOSPC;
183 		}
184 		memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
185 		break;
186 	case VREG:
187 		/* Regular file.  Create an underlying UVM object. */
188 		uobj = uao_create(0, UAO_FLAG_CANFAIL);
189 		if (uobj == NULL) {
190 			tmpfs_node_put(tmp, nnode);
191 			return ENOSPC;
192 		}
193 		nnode->tn_spec.tn_reg.tn_aobj = uobj;
194 		nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
195 		nnode->tn_spec.tn_reg.tn_aobj_pgptr = (vaddr_t)NULL;
196 		nnode->tn_spec.tn_reg.tn_aobj_pgnum = (voff_t)-1;
197 		break;
198 	default:
199 		KASSERT(0);
200 	}
201 
202 	rw_init(&nnode->tn_nlock, "tvlk");
203 
204 	rw_enter_write(&tmp->tm_lock);
205 	LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
206 	rw_exit_write(&tmp->tm_lock);
207 
208 	*node = nnode;
209 	return 0;
210 }
211 
212 /*
213  * tmpfs_free_node: remove the inode from a list in the mount point and
214  * destroy the inode structures.
215  */
216 void
217 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
218 {
219 	size_t objsz;
220 
221 	rw_enter_write(&tmp->tm_lock);
222 	LIST_REMOVE(node, tn_entries);
223 	rw_exit_write(&tmp->tm_lock);
224 
225 	switch (node->tn_type) {
226 	case VLNK:
227 		if (node->tn_size > 0) {
228 			KASSERT(node->tn_size <= SIZE_MAX);
229 			tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
230 			    node->tn_size);
231 		}
232 		break;
233 	case VREG:
234 		/*
235 		 * Calculate the size of inode data, decrease the used-memory
236 		 * counter, and destroy the underlying UVM object (if any).
237 		 */
238 		objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
239 		if (objsz != 0) {
240 			tmpfs_mem_decr(tmp, objsz);
241 		}
242 		if (node->tn_spec.tn_reg.tn_aobj != NULL) {
243 			uao_detach(node->tn_spec.tn_reg.tn_aobj);
244 			node->tn_spec.tn_reg.tn_aobj = NULL;
245 		}
246 		break;
247 	case VDIR:
248 		KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
249 		KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
250 		    node == tmp->tm_root);
251 		break;
252 	default:
253 		break;
254 	}
255 
256 	rw_enter_write(&tmp->tm_acc_lock);
257 	if (node->tn_id == tmp->tm_highest_inode)
258 		--tmp->tm_highest_inode;
259 	rw_exit_write(&tmp->tm_acc_lock);
260 
261 	/* mutex_destroy(&node->tn_nlock); */
262 	tmpfs_node_put(tmp, node);
263 }
264 
265 /*
266  * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
267  *
268  * => Must be called with tmpfs_node_t::tn_nlock held.
269  * => Returns vnode (*vpp) locked.
270  */
271 int
272 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, struct vnode **vpp)
273 {
274 	struct vnode *vp, *nvp;
275 	/* kmutex_t *slock; */
276 	int error;
277 again:
278 	/* If there is already a vnode, try to reclaim it. */
279 	if ((vp = node->tn_vnode) != NULL) {
280 		/* atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); */
281 		node->tn_gen |= TMPFS_RECLAIMING_BIT;
282 		rw_exit_write(&node->tn_nlock);
283 		error = vget(vp, LK_EXCLUSIVE);
284 		if (error == ENOENT) {
285 			rw_enter_write(&node->tn_nlock);
286 			goto again;
287 		}
288 		/* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
289 		node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
290 		*vpp = vp;
291 		return error;
292 	}
293 	if (TMPFS_NODE_RECLAIMING(node)) {
294 		/* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
295 		node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
296 	}
297 
298 	/*
299 	 * Get a new vnode and associate it with our inode.  Share the
300 	 * lock with underlying UVM object, if there is one (VREG case).
301 	 */
302 #if 0
303 	if (node->tn_type == VREG) {
304 		struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
305 		slock = uobj->vmobjlock;
306 	} else {
307 		slock = NULL;
308 	}
309 #endif
310 	error = getnewvnode(VT_TMPFS, mp, &tmpfs_vops, &vp);
311 	if (error) {
312 		rw_exit_write(&node->tn_nlock);
313 		return error;
314 	}
315 
316 	rrw_init_flags(&node->tn_vlock, "tnode", RWL_DUPOK | RWL_IS_VNODE);
317 	vp->v_type = node->tn_type;
318 
319 	/* Type-specific initialization. */
320 	switch (node->tn_type) {
321 	case VBLK:
322 	case VCHR:
323 		vp->v_op = &tmpfs_specvops;
324 		if ((nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp))) {
325 			nvp->v_data = vp->v_data;
326 			vp->v_data = NULL;
327 			vp->v_op = &spec_vops;
328 			vrele(vp);
329 			vgone(vp);
330 			vp = nvp;
331 			node->tn_vnode = vp;
332 		}
333 		break;
334 	case VDIR:
335 		vp->v_flag |= node->tn_spec.tn_dir.tn_parent == node ?
336 		    VROOT : 0;
337 		break;
338 #ifdef FIFO
339 	case VFIFO:
340 		vp->v_op = &tmpfs_fifovops;
341 		break;
342 #endif
343 	case VLNK:
344 	case VREG:
345 	case VSOCK:
346 		break;
347 	default:
348 		KASSERT(0);
349 	}
350 
351 	uvm_vnp_setsize(vp, node->tn_size);
352 	vp->v_data = node;
353 	node->tn_vnode = vp;
354 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
355 	rw_exit_write(&node->tn_nlock);
356 
357 	KASSERT(VOP_ISLOCKED(vp));
358 	*vpp = vp;
359 	return 0;
360 }
361 
362 /*
363  * tmpfs_alloc_file: allocate a new file of specified type and adds it
364  * into the parent directory.
365  *
366  * => Credentials of the caller are used.
367  */
368 int
369 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
370     struct componentname *cnp, char *target)
371 {
372 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
373 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
374 	tmpfs_dirent_t *de;
375 	int error;
376 
377 	KASSERT(VOP_ISLOCKED(dvp));
378 	*vpp = NULL;
379 
380 	/* Check for the maximum number of links limit. */
381 	if (vap->va_type == VDIR) {
382 		/* Check for maximum links limit. */
383 		if (dnode->tn_links == LINK_MAX) {
384 			error = EMLINK;
385 			goto out;
386 		}
387 		KASSERT(dnode->tn_links < LINK_MAX);
388 	}
389 
390 	if (TMPFS_DIRSEQ_FULL(dnode)) {
391 		error = ENOSPC;
392 		goto out;
393 	}
394 
395 	if (dnode->tn_links == 0) {
396 		error = ENOENT;
397 		goto out;
398 	}
399 
400 	/* Allocate a node that represents the new file. */
401 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
402 	    dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
403 	if (error)
404 		goto out;
405 
406 	/* Allocate a directory entry that points to the new file. */
407 	error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
408 	if (error) {
409 		tmpfs_free_node(tmp, node);
410 		goto out;
411 	}
412 
413 	/* Get a vnode for the new file. */
414 	rw_enter_write(&node->tn_nlock);
415 	error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
416 	if (error) {
417 		tmpfs_free_dirent(tmp, de);
418 		tmpfs_free_node(tmp, node);
419 		goto out;
420 	}
421 
422 	/* Associate inode and attach the entry into the directory. */
423 	tmpfs_dir_attach(dnode, de, node);
424 
425 out:
426 	if (error == 0 && (cnp->cn_flags & SAVESTART) == 0)
427 		pool_put(&namei_pool, cnp->cn_pnbuf);
428 	return error;
429 }
430 
431 /*
432  * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
433  * The directory entry contains a path name component.
434  */
435 int
436 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
437     tmpfs_dirent_t **de)
438 {
439 	tmpfs_dirent_t *nde;
440 
441 	nde = tmpfs_dirent_get(tmp);
442 	if (nde == NULL)
443 		return ENOSPC;
444 
445 	nde->td_name = tmpfs_strname_alloc(tmp, len);
446 	if (nde->td_name == NULL) {
447 		tmpfs_dirent_put(tmp, nde);
448 		return ENOSPC;
449 	}
450 	nde->td_namelen = len;
451 	memcpy(nde->td_name, name, len);
452 	nde->td_seq = TMPFS_DIRSEQ_NONE;
453 
454 	*de = nde;
455 	return 0;
456 }
457 
458 /*
459  * tmpfs_free_dirent: free a directory entry.
460  */
461 void
462 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
463 {
464 
465 	KASSERT(de->td_node == NULL);
466 	KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
467 	tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
468 	tmpfs_dirent_put(tmp, de);
469 }
470 
471 /*
472  * tmpfs_dir_attach: associate directory entry with a specified inode,
473  * and attach the entry into the directory, specified by vnode.
474  *
475  * => Increases link count on the associated node.
476  * => Increases link count on directory node, if our node is VDIR.
477  *    It is caller's responsibility to check for the LINK_MAX limit.
478  * => Triggers kqueue events here.
479  */
480 void
481 tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node)
482 {
483 	struct vnode *dvp = dnode->tn_vnode;
484 	int events = NOTE_WRITE;
485 
486 	KASSERT(dvp != NULL);
487 	KASSERT(VOP_ISLOCKED(dvp));
488 
489 	/* Get a new sequence number. */
490 	KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
491 	de->td_seq = tmpfs_dir_getseq(dnode, de);
492 
493 	/* Associate directory entry and the inode. */
494 	de->td_node = node;
495 	KASSERT(node->tn_links < LINK_MAX);
496 	node->tn_links++;
497 
498 	/* Save the hint (might overwrite). */
499 	node->tn_dirent_hint = de;
500 
501 	/* Insert the entry to the directory (parent of inode). */
502 	TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
503 	dnode->tn_size += sizeof(tmpfs_dirent_t);
504 	tmpfs_update(dnode, TMPFS_NODE_STATUSALL);
505 	uvm_vnp_setsize(dvp, dnode->tn_size);
506 
507 	if (node->tn_type == VDIR) {
508 		/* Set parent. */
509 		KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
510 		node->tn_spec.tn_dir.tn_parent = dnode;
511 
512 		/* Increase the link count of parent. */
513 		KASSERT(dnode->tn_links < LINK_MAX);
514 		dnode->tn_links++;
515 		events |= NOTE_LINK;
516 
517 		TMPFS_VALIDATE_DIR(node);
518 	}
519 	VN_KNOTE(dvp, events);
520 }
521 
522 /*
523  * tmpfs_dir_detach: disassociate directory entry and its inode,
524  * and detach the entry from the directory, specified by vnode.
525  *
526  * => Decreases link count on the associated node.
527  * => Decreases the link count on directory node, if our node is VDIR.
528  * => Triggers kqueue events here.
529  */
530 void
531 tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
532 {
533 	tmpfs_node_t *node = de->td_node;
534 	struct vnode *vp, *dvp = dnode->tn_vnode;
535 	int events = NOTE_WRITE;
536 
537 	KASSERT(dvp == NULL || VOP_ISLOCKED(dvp));
538 
539 	/* Deassociate the inode and entry. */
540 	de->td_node = NULL;
541 	node->tn_dirent_hint = NULL;
542 
543 	KASSERT(node->tn_links > 0);
544 	node->tn_links--;
545 	if ((vp = node->tn_vnode) != NULL) {
546 		KASSERT(VOP_ISLOCKED(vp));
547 		VN_KNOTE(vp, node->tn_links ?  NOTE_LINK : NOTE_DELETE);
548 	}
549 
550 	/* If directory - decrease the link count of parent. */
551 	if (node->tn_type == VDIR) {
552 		KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
553 		node->tn_spec.tn_dir.tn_parent = NULL;
554 
555 		KASSERT(dnode->tn_links > 0);
556 		dnode->tn_links--;
557 		events |= NOTE_LINK;
558 	}
559 
560 	/* Remove the entry from the directory. */
561 	if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
562 		dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
563 	}
564 	TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
565 
566 	dnode->tn_size -= sizeof(tmpfs_dirent_t);
567 	tmpfs_update(dnode, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
568 	tmpfs_dir_putseq(dnode, de);
569 	if (dvp) {
570 		tmpfs_update(dnode, 0);
571 		uvm_vnp_setsize(dvp, dnode->tn_size);
572 		VN_KNOTE(dvp, events);
573 	}
574 }
575 
576 /*
577  * tmpfs_dir_lookup: find a directory entry in the specified inode.
578  *
579  * Note that the . and .. components are not allowed as they do not
580  * physically exist within directories.
581  */
582 tmpfs_dirent_t *
583 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
584 {
585 	const char *name = cnp->cn_nameptr;
586 	const uint16_t nlen = cnp->cn_namelen;
587 	tmpfs_dirent_t *de;
588 
589 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
590 	KASSERT(nlen != 1 || !(name[0] == '.'));
591 	KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
592 	TMPFS_VALIDATE_DIR(node);
593 
594 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
595 		if (de->td_namelen != nlen)
596 			continue;
597 		if (memcmp(de->td_name, name, nlen) != 0)
598 			continue;
599 		break;
600 	}
601 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
602 	return de;
603 }
604 
605 /*
606  * tmpfs_dir_cached: get a cached directory entry if it is valid.  Used to
607  * avoid unnecessary tmpfs_dir_lookup().
608  *
609  * => The vnode must be locked.
610  */
611 tmpfs_dirent_t *
612 tmpfs_dir_cached(tmpfs_node_t *node)
613 {
614 	tmpfs_dirent_t *de = node->tn_dirent_hint;
615 
616 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
617 
618 	if (de == NULL) {
619 		return NULL;
620 	}
621 	KASSERT(de->td_node == node);
622 
623 	/*
624 	 * Directories always have a valid hint.  For files, check if there
625 	 * are any hard links.  If there are - hint might be invalid.
626 	 */
627 	return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
628 }
629 
630 /*
631  * tmpfs_dir_getseq: get a per-directory sequence number for the entry.
632  */
633 uint64_t
634 tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
635 {
636 	uint64_t seq = de->td_seq;
637 
638 	TMPFS_VALIDATE_DIR(dnode);
639 
640 	if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) {
641 		/* Already set. */
642 		KASSERT(seq >= TMPFS_DIRSEQ_START);
643 		return seq;
644 	}
645 
646 	/*
647 	 * The "." and ".." and the end-of-directory have reserved numbers.
648 	 * The other sequence numbers are allocated incrementally.
649 	 */
650 
651 	seq = dnode->tn_spec.tn_dir.tn_next_seq;
652 	KASSERT(seq >= TMPFS_DIRSEQ_START);
653 	KASSERT(seq < TMPFS_DIRSEQ_END);
654 	dnode->tn_spec.tn_dir.tn_next_seq++;
655 	return seq;
656 }
657 
658 void
659 tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
660 {
661 	uint64_t seq = de->td_seq;
662 
663 	TMPFS_VALIDATE_DIR(dnode);
664 	KASSERT(seq == TMPFS_DIRSEQ_NONE || seq >= TMPFS_DIRSEQ_START);
665 	KASSERT(seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END);
666 
667 	de->td_seq = TMPFS_DIRSEQ_NONE;
668 
669 	/* Empty?  We can reset. */
670 	if (dnode->tn_size == 0) {
671 		dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
672 	} else if (seq != TMPFS_DIRSEQ_NONE &&
673 		seq == dnode->tn_spec.tn_dir.tn_next_seq - 1) {
674 		dnode->tn_spec.tn_dir.tn_next_seq--;
675 	}
676 }
677 
678 /*
679  * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number.
680  */
681 tmpfs_dirent_t *
682 tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq)
683 {
684 	tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp;
685 
686 	TMPFS_VALIDATE_DIR(node);
687 
688 	/*
689 	 * First, check the cache.  If does not match - perform a lookup.
690 	 */
691 	if (de && de->td_seq == seq) {
692 		KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
693 		KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
694 		return de;
695 	}
696 
697 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
698 		KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
699 		KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
700 		if (de->td_seq == seq)
701 			return de;
702 	}
703 	return NULL;
704 }
705 
706 /*
707  * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the
708  * dot meta entries, that is, "." or "..".  Copy it to the UIO space.
709  */
710 int
711 tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio)
712 {
713 	tmpfs_dirent_t *de;
714 	off_t next = 0;
715 	int error;
716 
717 	switch (uio->uio_offset) {
718 		case TMPFS_DIRSEQ_DOT:
719 			dp->d_fileno = node->tn_id;
720 			strlcpy(dp->d_name, ".", sizeof(dp->d_name));
721 			next = TMPFS_DIRSEQ_DOTDOT;
722 			break;
723 		case TMPFS_DIRSEQ_DOTDOT:
724 			dp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
725 			strlcpy(dp->d_name, "..", sizeof(dp->d_name));
726 			de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
727 			next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
728 			break;
729 		default:
730 			KASSERT(false);
731 	}
732 	dp->d_type = DT_DIR;
733 	dp->d_namlen = strlen(dp->d_name);
734 	dp->d_reclen = DIRENT_SIZE(dp);
735 	dp->d_off = next;
736 
737 	if (dp->d_reclen > uio->uio_resid) {
738 		return EJUSTRETURN;
739 	}
740 
741 	if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) {
742 		return error;
743 	}
744 
745 	uio->uio_offset = next;
746 	return error;
747 }
748 
749 /*
750  * tmpfs_dir_getdents: helper function for tmpfs_readdir.
751  *
752  * => Returns as much directory entries as can fit in the uio space.
753  * => The read starts at uio->uio_offset.
754  */
755 int
756 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio)
757 {
758 	tmpfs_dirent_t *de, *next_de;
759 	struct dirent dent;
760 	int error = 0;
761 
762 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
763 	TMPFS_VALIDATE_DIR(node);
764 	memset(&dent, 0, sizeof(dent));
765 
766 	if (uio->uio_offset == TMPFS_DIRSEQ_DOT) {
767 		if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
768 			goto done;
769 		}
770 	}
771 	if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) {
772 		if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
773 			goto done;
774 		}
775 	}
776 	/* Done if we reached the end. */
777 	if (uio->uio_offset == TMPFS_DIRSEQ_EOF) {
778 		goto done;
779 	}
780 
781 	/* Locate the directory entry given by the given sequence number. */
782 	de = tmpfs_dir_lookupbyseq(node, uio->uio_offset);
783 	if (de == NULL) {
784 		error = EINVAL;
785 		goto done;
786 	}
787 
788 	/*
789 	 * Read as many entries as possible; i.e., until we reach the end
790 	 * of the directory or we exhaust UIO space.
791 	 */
792 	do {
793 		dent.d_fileno = de->td_node->tn_id;
794 		switch (de->td_node->tn_type) {
795 		case VBLK:
796 			dent.d_type = DT_BLK;
797 			break;
798 		case VCHR:
799 			dent.d_type = DT_CHR;
800 			break;
801 		case VDIR:
802 			dent.d_type = DT_DIR;
803 			break;
804 		case VFIFO:
805 			dent.d_type = DT_FIFO;
806 			break;
807 		case VLNK:
808 			dent.d_type = DT_LNK;
809 			break;
810 		case VREG:
811 			dent.d_type = DT_REG;
812 			break;
813 		case VSOCK:
814 			dent.d_type = DT_SOCK;
815 			break;
816 		default:
817 			KASSERT(0);
818 		}
819 		dent.d_namlen = de->td_namelen;
820 		KASSERT(de->td_namelen < sizeof(dent.d_name));
821 		memcpy(dent.d_name, de->td_name, de->td_namelen);
822 		dent.d_name[de->td_namelen] = '\0';
823 		dent.d_reclen = DIRENT_SIZE(&dent);
824 
825 		next_de = TAILQ_NEXT(de, td_entries);
826 		if (next_de == NULL)
827 			dent.d_off = TMPFS_DIRSEQ_EOF;
828 		else
829 			dent.d_off = tmpfs_dir_getseq(node, next_de);
830 
831 		if (dent.d_reclen > uio->uio_resid) {
832 			/* Exhausted UIO space. */
833 			error = EJUSTRETURN;
834 			break;
835 		}
836 
837 		/* Copy out the directory entry and continue. */
838 		error = uiomove(&dent, dent.d_reclen, uio);
839 		if (error) {
840 			break;
841 		}
842 		de = TAILQ_NEXT(de, td_entries);
843 
844 	} while (uio->uio_resid > 0 && de);
845 
846 	/* Cache the last entry or clear and mark EOF. */
847 	uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
848 	node->tn_spec.tn_dir.tn_readdir_lastp = de;
849 done:
850 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
851 
852 	if (error == EJUSTRETURN) {
853 		/* Exhausted UIO space - just return. */
854 		error = 0;
855 	}
856 	KASSERT(error >= 0);
857 	return error;
858 }
859 
860 /*
861  * tmpfs_reg_resize: resize the underlying UVM object associated with the
862  * specified regular file.
863  */
864 
865 int
866 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
867 {
868 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
869 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
870 	struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
871 	size_t newpages, oldpages, bytes;
872 	off_t oldsize;
873 	vaddr_t pgoff;
874 	int error;
875 
876 	KASSERT(vp->v_type == VREG);
877 	KASSERT(newsize >= 0);
878 
879 	oldsize = node->tn_size;
880 	oldpages = round_page(oldsize) >> PAGE_SHIFT;
881 	newpages = round_page(newsize) >> PAGE_SHIFT;
882 	KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
883 
884 	if (newpages > oldpages) {
885 		/* Increase the used-memory counter if getting extra pages. */
886 		bytes = (newpages - oldpages) << PAGE_SHIFT;
887 		if (tmpfs_mem_incr(tmp, bytes) == 0)
888 			return ENOSPC;
889 		if (uao_grow(uobj, newpages) != 0) {
890 			tmpfs_mem_decr(tmp, bytes);
891 			return ENOSPC;
892 		}
893 	}
894 
895 	node->tn_spec.tn_reg.tn_aobj_pages = newpages;
896 	node->tn_size = newsize;
897 	uvm_vnp_setsize(vp, newsize);
898 	uvm_vnp_uncache(vp);
899 
900 	/*
901 	 * Free "backing store".
902 	 */
903 	if (newpages < oldpages) {
904 		if (tmpfs_uio_cached(node))
905 			tmpfs_uio_uncache(node);
906 		if (uao_shrink(uobj, newpages))
907 			panic("shrink failed");
908 		/* Decrease the used-memory counter. */
909 		tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
910 	}
911 	if (newsize > oldsize) {
912 		if (tmpfs_uio_cached(node))
913 			tmpfs_uio_uncache(node);
914 		pgoff = oldsize & PAGE_MASK;
915 		if (pgoff != 0) {
916 			/*
917 			 * Growing from an offset which is not at a page
918 			 * boundary; zero out unused bytes in current page.
919 			 */
920 			error = tmpfs_zeropg(node, trunc_page(oldsize), pgoff);
921 			if (error)
922 				panic("tmpfs_zeropg: error %d", error);
923 		}
924 		VN_KNOTE(vp, NOTE_EXTEND);
925 	}
926 	return 0;
927 }
928 
929 /*
930  * tmpfs_chflags: change flags of the given vnode.
931  *
932  */
933 int
934 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
935 {
936 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
937 	int error;
938 
939 	KASSERT(VOP_ISLOCKED(vp));
940 
941 	/* Disallow this operation if the file system is mounted read-only. */
942 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
943 		return EROFS;
944 
945 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
946 		return error;
947 
948 	if (cred->cr_uid == 0) {
949 		if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) &&
950 		    securelevel > 0)
951 			return EPERM;
952 		node->tn_flags = flags;
953 	} else {
954 		if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) ||
955 		    (flags & UF_SETTABLE) != flags)
956 			return EPERM;
957 		node->tn_flags &= SF_SETTABLE;
958 		node->tn_flags |= (flags & UF_SETTABLE);
959 	}
960 
961 	tmpfs_update(node, TMPFS_NODE_CHANGED);
962 	VN_KNOTE(vp, NOTE_ATTRIB);
963 	return 0;
964 }
965 
966 /*
967  * tmpfs_chmod: change access mode on the given vnode.
968  *
969  */
970 int
971 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
972 {
973 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
974 	int error;
975 
976 	KASSERT(VOP_ISLOCKED(vp));
977 
978 	/* Disallow this operation if the file system is mounted read-only. */
979 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
980 		return EROFS;
981 
982 	/* Immutable or append-only files cannot be modified, either. */
983 	if (node->tn_flags & (IMMUTABLE | APPEND))
984 		return EPERM;
985 
986 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
987 		return error;
988 	if (cred->cr_uid != 0) {
989 		if (vp->v_type != VDIR && (mode & S_ISTXT))
990 			return EFTYPE;
991 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
992 			return EPERM;
993 	}
994 
995 	node->tn_mode = (mode & ALLPERMS);
996 	tmpfs_update(node, TMPFS_NODE_CHANGED);
997 	if ((vp->v_flag & VTEXT) && (node->tn_mode & S_ISTXT) == 0)
998 		uvm_vnp_uncache(vp);
999 	VN_KNOTE(vp, NOTE_ATTRIB);
1000 	return 0;
1001 }
1002 
1003 /*
1004  * tmpfs_chown: change ownership of the given vnode.
1005  *
1006  * => At least one of uid or gid must be different than VNOVAL.
1007  * => Attribute is unchanged for VNOVAL case.
1008  */
1009 int
1010 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p)
1011 {
1012 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1013 	int error;
1014 
1015 	KASSERT(VOP_ISLOCKED(vp));
1016 
1017 	/* Assign default values if they are unknown. */
1018 	KASSERT(uid != VNOVAL || gid != VNOVAL);
1019 	if (uid == VNOVAL) {
1020 		uid = node->tn_uid;
1021 	}
1022 	if (gid == VNOVAL) {
1023 		gid = node->tn_gid;
1024 	}
1025 
1026 	/* Disallow this operation if the file system is mounted read-only. */
1027 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1028 		return EROFS;
1029 
1030 	/* Immutable or append-only files cannot be modified, either. */
1031 	if (node->tn_flags & (IMMUTABLE | APPEND))
1032 		return EPERM;
1033 
1034 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
1035 	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
1036 	    (error = suser_ucred(cred)))
1037 	    	return error;
1038 
1039 	node->tn_uid = uid;
1040 	node->tn_gid = gid;
1041 	tmpfs_update(node, TMPFS_NODE_CHANGED);
1042 	VN_KNOTE(vp, NOTE_ATTRIB);
1043 	return 0;
1044 }
1045 
1046 /*
1047  * tmpfs_chsize: change size of the given vnode.
1048  */
1049 int
1050 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct proc *p)
1051 {
1052 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1053 
1054 	KASSERT(VOP_ISLOCKED(vp));
1055 
1056 	/* Decide whether this is a valid operation based on the file type. */
1057 	switch (vp->v_type) {
1058 	case VDIR:
1059 		return EISDIR;
1060 	case VREG:
1061 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
1062 			return EROFS;
1063 		}
1064 		break;
1065 	case VBLK:
1066 	case VCHR:
1067 	case VFIFO:
1068 		/*
1069 		 * Allow modifications of special files even if in the file
1070 		 * system is mounted read-only (we are not modifying the
1071 		 * files themselves, but the objects they represent).
1072 		 */
1073 		return 0;
1074 	default:
1075 		return EOPNOTSUPP;
1076 	}
1077 
1078 	/* Immutable or append-only files cannot be modified, either. */
1079 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
1080 		return EPERM;
1081 	}
1082 
1083 	/* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
1084 	return tmpfs_truncate(vp, size);
1085 }
1086 
1087 /*
1088  * tmpfs_chtimes: change access and modification times for vnode.
1089  */
1090 int
1091 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime,
1092     const struct timespec *mtime, int vaflags, struct ucred *cred,
1093     struct proc *p)
1094 {
1095 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1096 	int error;
1097 
1098 	KASSERT(VOP_ISLOCKED(vp));
1099 
1100 	/* Disallow this operation if the file system is mounted read-only. */
1101 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1102 		return EROFS;
1103 
1104 	/* Immutable or append-only files cannot be modified, either. */
1105 	if (node->tn_flags & (IMMUTABLE | APPEND))
1106 		return EPERM;
1107 
1108 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)) &&
1109 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
1110 	    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1111 	    	return error;
1112 
1113 	if (atime->tv_nsec != VNOVAL)
1114 		node->tn_atime = *atime;
1115 
1116 	if (mtime->tv_nsec != VNOVAL)
1117 		node->tn_mtime = *mtime;
1118 
1119 	if (mtime->tv_nsec != VNOVAL || (vaflags & VA_UTIMES_CHANGE))
1120 		tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_CHANGED);
1121 
1122 	VN_KNOTE(vp, NOTE_ATTRIB);
1123 
1124 	return 0;
1125 }
1126 
1127 /*
1128  * tmpfs_update: update timestamps, et al.
1129  */
1130 void
1131 tmpfs_update(tmpfs_node_t *node, int flags)
1132 {
1133 	struct timespec nowtm;
1134 
1135 	nanotime(&nowtm);
1136 
1137 	if (flags & TMPFS_NODE_ACCESSED) {
1138 		node->tn_atime = nowtm;
1139  	}
1140 	if (flags & TMPFS_NODE_MODIFIED) {
1141 		node->tn_mtime = nowtm;
1142  	}
1143 	if (flags & TMPFS_NODE_CHANGED) {
1144  		node->tn_ctime = nowtm;
1145  	}
1146 }
1147 
1148 int
1149 tmpfs_truncate(struct vnode *vp, off_t length)
1150 {
1151 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1152 	int error;
1153 
1154 	if (length < 0) {
1155 		error = EINVAL;
1156 		goto out;
1157 	}
1158 	if (node->tn_size == length) {
1159 		error = 0;
1160 		goto out;
1161 	}
1162 	error = tmpfs_reg_resize(vp, length);
1163 	if (error == 0) {
1164 		tmpfs_update(node, TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED);
1165 	}
1166 out:
1167 	return error;
1168 }
1169 
1170 int
1171 tmpfs_uio_cached(tmpfs_node_t *node)
1172 {
1173 	int pgnum_valid = (node->tn_pgnum != (voff_t)-1);
1174 	int pgptr_valid = (node->tn_pgptr != (vaddr_t)NULL);
1175 	KASSERT(pgnum_valid == pgptr_valid);
1176 	return pgnum_valid && pgptr_valid;
1177 }
1178 
1179 vaddr_t
1180 tmpfs_uio_lookup(tmpfs_node_t *node, voff_t pgnum)
1181 {
1182 	if (tmpfs_uio_cached(node) == 1 && node->tn_pgnum == pgnum)
1183 		return node->tn_pgptr;
1184 
1185 	return (vaddr_t)NULL;
1186 }
1187 
1188 void
1189 tmpfs_uio_uncache(tmpfs_node_t *node)
1190 {
1191 	KASSERT(node->tn_pgnum != (voff_t)-1);
1192 	KASSERT(node->tn_pgptr != (vaddr_t)NULL);
1193 	uvm_unmap(kernel_map, node->tn_pgptr, node->tn_pgptr + PAGE_SIZE);
1194 	node->tn_pgnum = (voff_t)-1;
1195 	node->tn_pgptr = (vaddr_t)NULL;
1196 }
1197 
1198 void
1199 tmpfs_uio_cache(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgptr)
1200 {
1201 	KASSERT(node->tn_pgnum == (voff_t)-1);
1202 	KASSERT(node->tn_pgptr == (vaddr_t)NULL);
1203 	node->tn_pgnum = pgnum;
1204 	node->tn_pgptr = pgptr;
1205 }
1206 
1207 /*
1208  * Be gentle to kernel_map, don't allow more than 4MB in a single transaction.
1209  */
1210 #define TMPFS_UIO_MAXBYTES	((1 << 22) - PAGE_SIZE)
1211 
1212 int
1213 tmpfs_uiomove(tmpfs_node_t *node, struct uio *uio, vsize_t len)
1214 {
1215 	vaddr_t va, pgoff;
1216 	int error, adv;
1217 	voff_t pgnum;
1218 	vsize_t sz;
1219 
1220 	pgnum = trunc_page(uio->uio_offset);
1221 	pgoff = uio->uio_offset & PAGE_MASK;
1222 
1223 	if (pgoff + len < PAGE_SIZE) {
1224 		va = tmpfs_uio_lookup(node, pgnum);
1225 		if (va != (vaddr_t)NULL)
1226 			return uiomove((void *)va + pgoff, len, uio);
1227 	}
1228 
1229 	if (len >= TMPFS_UIO_MAXBYTES) {
1230 		sz = TMPFS_UIO_MAXBYTES;
1231 		adv = MADV_NORMAL;
1232 	} else {
1233 		sz = len;
1234 		adv = MADV_SEQUENTIAL;
1235 	}
1236 
1237 	if (tmpfs_uio_cached(node))
1238 		tmpfs_uio_uncache(node);
1239 
1240 	uao_reference(node->tn_uobj);
1241 
1242 	error = uvm_map(kernel_map, &va, round_page(pgoff + sz), node->tn_uobj,
1243 	    trunc_page(uio->uio_offset), 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1244 	    PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, adv, 0));
1245 	if (error) {
1246 		uao_detach(node->tn_uobj); /* Drop reference. */
1247 		return error;
1248 	}
1249 
1250 	error = uiomove((void *)va + pgoff, sz, uio);
1251 	if (error == 0 && pgoff + sz < PAGE_SIZE)
1252 		tmpfs_uio_cache(node, pgnum, va);
1253 	else
1254 		uvm_unmap(kernel_map, va, va + round_page(pgoff + sz));
1255 
1256 	return error;
1257 }
1258 
1259 int
1260 tmpfs_zeropg(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgoff)
1261 {
1262 	vaddr_t va;
1263 	int error;
1264 
1265 	KASSERT(tmpfs_uio_cached(node) == 0);
1266 
1267 	uao_reference(node->tn_uobj);
1268 
1269 	error = uvm_map(kernel_map, &va, PAGE_SIZE, node->tn_uobj, pgnum, 0,
1270 	    UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
1271 	    MAP_INHERIT_NONE, MADV_NORMAL, 0));
1272 	if (error) {
1273 		uao_detach(node->tn_uobj); /* Drop reference. */
1274 		return error;
1275 	}
1276 
1277 	bzero((void *)va + pgoff, PAGE_SIZE - pgoff);
1278 	uvm_unmap(kernel_map, va, va + PAGE_SIZE);
1279 
1280 	return 0;
1281 }
1282 
1283