xref: /netbsd-src/sys/fs/tmpfs/tmpfs_subr.c (revision aaf4ece63a859a04e37cf3a7229b5fab0157cc06)
1 /*	$NetBSD: tmpfs_subr.c,v 1.17 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *        This product includes software developed by the NetBSD
22  *        Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Efficient memory file system supporting functions.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.17 2005/12/11 12:24:29 christos Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/dirent.h>
49 #include <sys/event.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/namei.h>
53 #include <sys/time.h>
54 #include <sys/stat.h>
55 #include <sys/systm.h>
56 #include <sys/swap.h>
57 #include <sys/vnode.h>
58 
59 #include <uvm/uvm.h>
60 
61 #include <miscfs/specfs/specdev.h>
62 #include <fs/tmpfs/tmpfs.h>
63 #include <fs/tmpfs/tmpfs_fifoops.h>
64 #include <fs/tmpfs/tmpfs_specops.h>
65 #include <fs/tmpfs/tmpfs_vnops.h>
66 
67 /* --------------------------------------------------------------------- */
68 
69 /*
70  * Allocates a new node of type 'type' inside the 'tmp' mount point, with
71  * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
72  * using the credentials of the process 'p'.
73  *
74  * If the node type is set to 'VDIR', then the parent parameter must point
75  * to the parent directory of the node being created.  It may only be NULL
76  * while allocating the root node.
77  *
78  * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
79  * specifies the device the node represents.
80  *
81  * If the node type is set to 'VLNK', then the parameter target specifies
82  * the file name of the target file for the symbolic link that is being
83  * created.
84  *
85  * Note that new nodes are retrieved from the available list if it has
86  * items or, if it is empty, from the node pool as long as there is enough
87  * space to create them.
88  *
89  * Returns zero on success or an appropriate error code on failure.
90  */
91 int
92 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
93     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
94     char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
95 {
96 	struct tmpfs_node *nnode;
97 
98 	/* If the root directory of the 'tmp' file system is not yet
99 	 * allocated, this must be the request to do it. */
100 	KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
101 
102 	KASSERT(IFF(type == VLNK, target != NULL));
103 	KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
104 
105 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
106 
107 	nnode = NULL;
108 	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
109 		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
110 		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
111 			return ENOSPC;
112 
113 		nnode =
114 		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
115 		if (nnode == NULL)
116 			return ENOSPC;
117 		nnode->tn_id = tmp->tm_nodes_last++;
118 		nnode->tn_gen = 0;
119 	} else {
120 		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
121 		LIST_REMOVE(nnode, tn_entries);
122 		nnode->tn_gen++;
123 	}
124 	KASSERT(nnode != NULL);
125 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
126 
127 	/* Generic initialization. */
128 	nnode->tn_type = type;
129 	nnode->tn_size = 0;
130 	nnode->tn_status = 0;
131 	nnode->tn_flags = 0;
132 	nnode->tn_links = 0;
133 	(void)nanotime(&nnode->tn_atime);
134 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
135 	    nnode->tn_atime;
136 	nnode->tn_uid = uid;
137 	nnode->tn_gid = gid;
138 	nnode->tn_mode = mode;
139 	nnode->tn_lockf = NULL;
140 	nnode->tn_vnode = NULL;
141 
142 	/* Type-specific initialization. */
143 	switch (nnode->tn_type) {
144 	case VBLK:
145 	case VCHR:
146 		nnode->tn_rdev = rdev;
147 		break;
148 
149 	case VDIR:
150 		TAILQ_INIT(&nnode->tn_dir);
151 		nnode->tn_parent = (parent == NULL) ? nnode : parent;
152 		nnode->tn_readdir_lastn = 0;
153 		nnode->tn_readdir_lastp = NULL;
154 		nnode->tn_links++;
155 		nnode->tn_parent->tn_links++;
156 		break;
157 
158 	case VFIFO:
159 		/* FALLTHROUGH */
160 	case VSOCK:
161 		break;
162 
163 	case VLNK:
164 		KASSERT(strlen(target) < MAXPATHLEN);
165 		nnode->tn_size = strlen(target);
166 		nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
167 		    nnode->tn_size, 0);
168 		if (nnode->tn_link == NULL) {
169 			nnode->tn_type = VNON;
170 			tmpfs_free_node(tmp, nnode);
171 			return ENOSPC;
172 		}
173 		memcpy(nnode->tn_link, target, nnode->tn_size);
174 		break;
175 
176 	case VREG:
177 		nnode->tn_aobj = uao_create(INT32_MAX - PAGE_SIZE, 0);
178 		nnode->tn_aobj_pages = 0;
179 		break;
180 
181 	default:
182 		KASSERT(0);
183 	}
184 
185 	*node = nnode;
186 	return 0;
187 }
188 
189 /* --------------------------------------------------------------------- */
190 
191 /*
192  * Destroys the node pointed to by node from the file system 'tmp'.
193  * If the node does not belong to the given mount point, the results are
194  * unpredicted.
195  *
196  * If the node references a directory; no entries are allowed because
197  * their removal could need a recursive algorithm, something forbidden in
198  * kernel space.  Furthermore, there is not need to provide such
199  * functionality (recursive removal) because the only primitives offered
200  * to the user are the removal of empty directories and the deletion of
201  * individual files.
202  *
203  * Note that nodes are not really deleted; in fact, when a node has been
204  * allocated, it cannot be deleted during the whole life of the file
205  * system.  Instead, they are moved to the available list and remain there
206  * until reused.
207  */
208 void
209 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
210 {
211 	ino_t id;
212 	unsigned long gen;
213 	size_t pages;
214 
215 	switch (node->tn_type) {
216 	case VNON:
217 		/* Do not do anything.  VNON is provided to let the
218 		 * allocation routine clean itself easily by avoiding
219 		 * duplicating code in it. */
220 		/* FALLTHROUGH */
221 	case VBLK:
222 		/* FALLTHROUGH */
223 	case VCHR:
224 		/* FALLTHROUGH */
225 	case VDIR:
226 		/* FALLTHROUGH */
227 	case VFIFO:
228 		/* FALLTHROUGH */
229 	case VSOCK:
230 		pages = 0;
231 		break;
232 
233 	case VLNK:
234 		tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
235 		    node->tn_size);
236 		pages = 0;
237 		break;
238 
239 	case VREG:
240 		if (node->tn_aobj != NULL)
241 			uao_detach(node->tn_aobj);
242 		pages = node->tn_aobj_pages;
243 		break;
244 
245 	default:
246 		KASSERT(0);
247 		pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
248 		break;
249 	}
250 
251 	tmp->tm_pages_used -= pages;
252 
253 	LIST_REMOVE(node, tn_entries);
254 	id = node->tn_id;
255 	gen = node->tn_gen;
256 	memset(node, 0, sizeof(struct tmpfs_node));
257 	node->tn_id = id;
258 	node->tn_type = VNON;
259 	node->tn_gen = gen;
260 	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
261 }
262 
263 /* --------------------------------------------------------------------- */
264 
265 /*
266  * Allocates a new directory entry for the node node with a name of name.
267  * The new directory entry is returned in *de.
268  *
269  * The link count of node is increased by one to reflect the new object
270  * referencing it.
271  *
272  * Returns zero on success or an appropriate error code on failure.
273  */
274 int
275 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
276     const char *name, uint16_t len, struct tmpfs_dirent **de)
277 {
278 	struct tmpfs_dirent *nde;
279 
280 	nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
281 	if (nde == NULL)
282 		return ENOSPC;
283 
284 	nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
285 	if (nde->td_name == NULL) {
286 		TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
287 		return ENOSPC;
288 	}
289 	nde->td_namelen = len;
290 	memcpy(nde->td_name, name, len);
291 	nde->td_node = node;
292 
293 	node->tn_links++;
294 	*de = nde;
295 
296 	return 0;
297 }
298 
299 /* --------------------------------------------------------------------- */
300 
301 /*
302  * Frees a directory entry.  It is the caller's responsibility to destroy
303  * the node referenced by it if needed.
304  *
305  * The link count of node is decreased by one to reflect the removal of an
306  * object that referenced it.  This only happens if 'node_exists' is true;
307  * otherwise the function will not access the node referred to by the
308  * directory entry, as it may already have been released from the outside.
309  */
310 void
311 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
312     boolean_t node_exists)
313 {
314 	if (node_exists) {
315 		struct tmpfs_node *node;
316 
317 		node = de->td_node;
318 
319 		KASSERT(node->tn_links > 0);
320 		node->tn_links--;
321 	}
322 
323 	tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
324 	TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
325 }
326 
327 /* --------------------------------------------------------------------- */
328 
329 /*
330  * Allocates a new vnode for the node node or returns a new reference to
331  * an existing one if the node had already a vnode referencing it.  The
332  * resulting locked vnode is returned in *vpp.
333  *
334  * Returns zero on success or an appropriate error code on failure.
335  */
336 int
337 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
338 {
339 	int error;
340 	struct vnode *nvp;
341 	struct vnode *vp;
342 
343 	vp = NULL;
344 
345 	if (node->tn_vnode != NULL) {
346 		vp = node->tn_vnode;
347 		vget(vp, LK_EXCLUSIVE | LK_RETRY);
348 		error = 0;
349 		goto out;
350 	}
351 
352 	/* Get a new vnode and associate it with our node. */
353 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
354 	if (error != 0)
355 		goto out;
356 	KASSERT(vp != NULL);
357 
358 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
359 	if (error != 0) {
360 		vp->v_data = NULL;
361 		ungetnewvnode(vp);
362 		vp = NULL;
363 		goto out;
364 	}
365 
366 	vp->v_data = node;
367 	vp->v_type = node->tn_type;
368 
369 	/* Type-specific initialization. */
370 	switch (node->tn_type) {
371 	case VBLK:
372 		/* FALLTHROUGH */
373 	case VCHR:
374 		vp->v_op = tmpfs_specop_p;
375 		nvp = checkalias(vp, node->tn_rdev, mp);
376 		if (nvp != NULL) {
377 			/* Discard unneeded vnode, but save its inode. */
378 			nvp->v_data = vp->v_data;
379 			vp->v_data = NULL;
380 
381 			/* XXX spec_vnodeops has no locking, so we have to
382 			 * do it explicitly. */
383 			VOP_UNLOCK(vp, 0);
384 			vp->v_op = spec_vnodeop_p;
385 			vp->v_flag &= ~VLOCKSWORK;
386 			vrele(vp);
387 			vgone(vp);
388 
389 			/* Reinitialize aliased node. */
390 			vp = nvp;
391 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
392 			if (error != 0) {
393 				vp->v_data = NULL;
394 				vp = NULL;
395 				goto out;
396 			}
397 		}
398 		break;
399 
400 	case VDIR:
401 		vp->v_flag = node->tn_parent == node ? VROOT : 0;
402 		break;
403 
404 	case VFIFO:
405 		vp->v_op = tmpfs_fifoop_p;
406 		break;
407 
408 	case VLNK:
409 		/* FALLTHROUGH */
410 	case VREG:
411 		/* FALLTHROUGH */
412 	case VSOCK:
413 		break;
414 
415 	default:
416 		KASSERT(0);
417 	}
418 
419 	uvm_vnp_setsize(vp, node->tn_size);
420 
421 	error = 0;
422 
423 out:
424 	*vpp = node->tn_vnode = vp;
425 
426 	KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
427 	KASSERT(*vpp == node->tn_vnode);
428 
429 	return error;
430 }
431 
432 /* --------------------------------------------------------------------- */
433 
434 /*
435  * Destroys the association between the vnode vp and the node it
436  * references.
437  */
438 void
439 tmpfs_free_vp(struct vnode *vp)
440 {
441 	struct tmpfs_node *node;
442 
443 	node = VP_TO_TMPFS_NODE(vp);
444 
445 	node->tn_vnode = NULL;
446 	vp->v_data = NULL;
447 }
448 
449 /* --------------------------------------------------------------------- */
450 
451 /*
452  * Allocates a new file of type 'type' and adds it to the parent directory
453  * 'dvp'; this addition is done using the component name given in 'cnp'.
454  * The ownership of the new file is automatically assigned based on the
455  * credentials of the caller (through 'cnp'), the group is set based on
456  * the parent directory and the mode is determined from the 'vap' argument.
457  * If successful, *vpp holds a vnode to the newly created file and zero
458  * is returned.  Otherwise *vpp is NULL and the function returns an
459  * appropriate error code.
460  */
461 int
462 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
463     struct componentname *cnp, char *target)
464 {
465 	int error;
466 	struct tmpfs_dirent *de;
467 	struct tmpfs_mount *tmp;
468 	struct tmpfs_node *dnode;
469 	struct tmpfs_node *node;
470 	struct tmpfs_node *parent;
471 
472 	KASSERT(VOP_ISLOCKED(dvp));
473 	KASSERT(cnp->cn_flags & HASBUF);
474 
475 	tmp = VFS_TO_TMPFS(dvp->v_mount);
476 	dnode = VP_TO_TMPFS_DIR(dvp);
477 	*vpp = NULL;
478 
479 	/* If the entry we are creating is a directory, we cannot overflow
480 	 * the number of links of its parent, because it will get a new
481 	 * link. */
482 	if (vap->va_type == VDIR) {
483 		/* Ensure that we do not overflow the maximum number of links
484 		 * imposed by the system. */
485 		KASSERT(dnode->tn_links <= LINK_MAX);
486 		if (dnode->tn_links == LINK_MAX) {
487 			error = EMLINK;
488 			goto out;
489 		}
490 
491 		parent = dnode;
492 	} else
493 		parent = NULL;
494 
495 	/* Allocate a node that represents the new file. */
496 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
497 	    dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
498 	    cnp->cn_lwp->l_proc, &node);
499 	if (error != 0)
500 		goto out;
501 
502 	/* Allocate a directory entry that points to the new file. */
503 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
504 	    &de);
505 	if (error != 0) {
506 		tmpfs_free_node(tmp, node);
507 		goto out;
508 	}
509 
510 	/* Allocate a vnode for the new file. */
511 	error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
512 	if (error != 0) {
513 		tmpfs_free_dirent(tmp, de, TRUE);
514 		tmpfs_free_node(tmp, node);
515 		goto out;
516 	}
517 
518 	/* Now that all required items are allocated, we can proceed to
519 	 * insert the new node into the directory, an operation that
520 	 * cannot fail. */
521 	tmpfs_dir_attach(dvp, de);
522 	VN_KNOTE(dvp, NOTE_WRITE);
523 
524 out:
525 	if (error != 0 || !(cnp->cn_flags & SAVESTART))
526 		PNBUF_PUT(cnp->cn_pnbuf);
527 	vput(dvp);
528 
529 	KASSERT(!VOP_ISLOCKED(dvp));
530 	KASSERT(IFF(error == 0, *vpp != NULL));
531 
532 	return error;
533 }
534 
535 /* --------------------------------------------------------------------- */
536 
537 /*
538  * Attaches the directory entry de to the directory represented by vp.
539  * Note that this does not change the link count of the node pointed by
540  * the directory entry, as this is done by tmpfs_alloc_dirent.
541  */
542 void
543 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
544 {
545 	struct tmpfs_node *dnode;
546 
547 	dnode = VP_TO_TMPFS_DIR(vp);
548 
549 	TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
550 	dnode->tn_size += sizeof(struct tmpfs_dirent);
551 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
552 	    TMPFS_NODE_MODIFIED;
553 	uvm_vnp_setsize(vp, dnode->tn_size);
554 }
555 
556 /* --------------------------------------------------------------------- */
557 
558 /*
559  * Detaches the directory entry de from the directory represented by vp.
560  * Note that this does not change the link count of the node pointed by
561  * the directory entry, as this is done by tmpfs_free_dirent.
562  */
563 void
564 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
565 {
566 	struct tmpfs_node *dnode;
567 
568 	KASSERT(VOP_ISLOCKED(vp));
569 
570 	dnode = VP_TO_TMPFS_DIR(vp);
571 
572 	if (dnode->tn_readdir_lastp == de) {
573 		dnode->tn_readdir_lastn = 0;
574 		dnode->tn_readdir_lastp = NULL;
575 	}
576 
577 	TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
578 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
579 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
580 	    TMPFS_NODE_MODIFIED;
581 	uvm_vnp_setsize(vp, dnode->tn_size);
582 }
583 
584 /* --------------------------------------------------------------------- */
585 
586 /*
587  * Looks for a directory entry in the directory represented by node.
588  * 'cnp' describes the name of the entry to look for.  Note that the .
589  * and .. components are not allowed as they do not physically exist
590  * within directories.
591  *
592  * Returns a pointer to the entry when found, otherwise NULL.
593  */
594 struct tmpfs_dirent *
595 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
596 {
597 	boolean_t found;
598 	struct tmpfs_dirent *de;
599 
600 	KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
601 	KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
602 	    cnp->cn_nameptr[1] == '.')));
603 	TMPFS_VALIDATE_DIR(node);
604 
605 	node->tn_status |= TMPFS_NODE_ACCESSED;
606 
607 	found = 0;
608 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
609 		KASSERT(cnp->cn_namelen < 0xffff);
610 		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
611 		    memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
612 			found = 1;
613 			break;
614 		}
615 	}
616 
617 	return found ? de : NULL;
618 }
619 
620 /* --------------------------------------------------------------------- */
621 
622 /*
623  * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
624  * directory and returns it in the uio space.  The function returns 0
625  * on success, -1 if there was not enough space in the uio structure to
626  * hold the directory entry or an appropriate error code if another
627  * error happens.
628  */
629 int
630 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
631 {
632 	int error;
633 	struct dirent dent;
634 
635 	TMPFS_VALIDATE_DIR(node);
636 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
637 
638 	dent.d_fileno = node->tn_id;
639 	dent.d_type = DT_DIR;
640 	dent.d_namlen = 1;
641 	dent.d_name[0] = '.';
642 	dent.d_name[1] = '\0';
643 	dent.d_reclen = _DIRENT_SIZE(&dent);
644 
645 	if (dent.d_reclen > uio->uio_resid)
646 		error = -1;
647 	else {
648 		error = uiomove(&dent, dent.d_reclen, uio);
649 		if (error == 0)
650 			uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
651 	}
652 
653 	node->tn_status |= TMPFS_NODE_ACCESSED;
654 
655 	return error;
656 }
657 
658 /* --------------------------------------------------------------------- */
659 
660 /*
661  * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
662  * directory and returns it in the uio space.  The function returns 0
663  * on success, -1 if there was not enough space in the uio structure to
664  * hold the directory entry or an appropriate error code if another
665  * error happens.
666  */
667 int
668 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
669 {
670 	int error;
671 	struct dirent dent;
672 
673 	TMPFS_VALIDATE_DIR(node);
674 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
675 
676 	dent.d_fileno = node->tn_parent->tn_id;
677 	dent.d_type = DT_DIR;
678 	dent.d_namlen = 2;
679 	dent.d_name[0] = '.';
680 	dent.d_name[1] = '.';
681 	dent.d_name[2] = '\0';
682 	dent.d_reclen = _DIRENT_SIZE(&dent);
683 
684 	if (dent.d_reclen > uio->uio_resid)
685 		error = -1;
686 	else {
687 		error = uiomove(&dent, dent.d_reclen, uio);
688 		if (error == 0) {
689 			struct tmpfs_dirent *de;
690 
691 			de = TAILQ_FIRST(&node->tn_dir);
692 			if (de == NULL)
693 				uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
694 			else
695 				uio->uio_offset = TMPFS_DIRCOOKIE(de);
696 		}
697 	}
698 
699 	node->tn_status |= TMPFS_NODE_ACCESSED;
700 
701 	return error;
702 }
703 
704 /* --------------------------------------------------------------------- */
705 
706 /*
707  * Lookup a directory entry by its associated cookie.
708  */
709 struct tmpfs_dirent *
710 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
711 {
712 	struct tmpfs_dirent *de;
713 
714 	if (cookie == node->tn_readdir_lastn &&
715 	    node->tn_readdir_lastp != NULL) {
716 		return node->tn_readdir_lastp;
717 	}
718 
719 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
720 		if (TMPFS_DIRCOOKIE(de) == cookie) {
721 			break;
722 		}
723 	}
724 
725 	return de;
726 }
727 
728 /* --------------------------------------------------------------------- */
729 
730 /*
731  * Helper function for tmpfs_readdir.  Returns as much directory entries
732  * as can fit in the uio space.  The read starts at uio->uio_offset.
733  * The function returns 0 on success, -1 if there was not enough space
734  * in the uio structure to hold the directory entry or an appropriate
735  * error code if another error happens.
736  */
737 int
738 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
739 {
740 	int error;
741 	off_t startcookie;
742 	struct tmpfs_dirent *de;
743 
744 	TMPFS_VALIDATE_DIR(node);
745 
746 	/* Locate the first directory entry we have to return.  We have cached
747 	 * the last readdir in the node, so use those values if appropriate.
748 	 * Otherwise do a linear scan to find the requested entry. */
749 	startcookie = uio->uio_offset;
750 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
751 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
752 	if (startcookie == TMPFS_DIRCOOKIE_EOF) {
753 		return 0;
754 	} else {
755 		de = tmpfs_dir_lookupbycookie(node, startcookie);
756 	}
757 	if (de == NULL) {
758 		return EINVAL;
759 	}
760 
761 	/* Read as much entries as possible; i.e., until we reach the end of
762 	 * the directory or we exhaust uio space. */
763 	do {
764 		struct dirent d;
765 
766 		/* Create a dirent structure representing the current
767 		 * tmpfs_node and fill it. */
768 		d.d_fileno = de->td_node->tn_id;
769 		switch (de->td_node->tn_type) {
770 		case VBLK:
771 			d.d_type = DT_BLK;
772 			break;
773 
774 		case VCHR:
775 			d.d_type = DT_CHR;
776 			break;
777 
778 		case VDIR:
779 			d.d_type = DT_DIR;
780 			break;
781 
782 		case VFIFO:
783 			d.d_type = DT_FIFO;
784 			break;
785 
786 		case VLNK:
787 			d.d_type = DT_LNK;
788 			break;
789 
790 		case VREG:
791 			d.d_type = DT_REG;
792 			break;
793 
794 		case VSOCK:
795 			d.d_type = DT_SOCK;
796 			break;
797 
798 		default:
799 			KASSERT(0);
800 		}
801 		d.d_namlen = de->td_namelen;
802 		KASSERT(de->td_namelen < sizeof(d.d_name));
803 		(void)memcpy(d.d_name, de->td_name, de->td_namelen);
804 		d.d_name[de->td_namelen] = '\0';
805 		d.d_reclen = _DIRENT_SIZE(&d);
806 
807 		/* Stop reading if the directory entry we are treating is
808 		 * bigger than the amount of data that can be returned. */
809 		if (d.d_reclen > uio->uio_resid) {
810 			error = -1;
811 			break;
812 		}
813 
814 		/* Copy the new dirent structure into the output buffer and
815 		 * advance pointers. */
816 		error = uiomove(&d, d.d_reclen, uio);
817 
818 		(*cntp)++;
819 		de = TAILQ_NEXT(de, td_entries);
820 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
821 
822 	/* Update the offset and cache. */
823 	if (de == NULL) {
824 		uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
825 		node->tn_readdir_lastn = 0;
826 		node->tn_readdir_lastp = NULL;
827 	} else {
828 		node->tn_readdir_lastn = uio->uio_offset = TMPFS_DIRCOOKIE(de);
829 		node->tn_readdir_lastp = de;
830 	}
831 
832 	node->tn_status |= TMPFS_NODE_ACCESSED;
833 
834 	return error;
835 }
836 
837 /* --------------------------------------------------------------------- */
838 
839 /*
840  * Resizes the aobj associated to the regular file pointed to by vp to
841  * the size newsize.  'vp' must point to a vnode that represents a regular
842  * file.  'newsize' must be positive.
843  *
844  * Returns zero on success or an appropriate error code on failure.
845  */
846 int
847 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
848 {
849 	int error;
850 	size_t newpages, oldpages;
851 	struct tmpfs_mount *tmp;
852 	struct tmpfs_node *node;
853 	off_t oldsize;
854 
855 	KASSERT(vp->v_type == VREG);
856 	KASSERT(newsize >= 0);
857 
858 	node = VP_TO_TMPFS_NODE(vp);
859 	tmp = VFS_TO_TMPFS(vp->v_mount);
860 
861 	/* Convert the old and new sizes to the number of pages needed to
862 	 * store them.  It may happen that we do not need to do anything
863 	 * because the last allocated page can accommodate the change on
864 	 * its own. */
865 	oldsize = node->tn_size;
866 	oldpages = round_page(oldsize) / PAGE_SIZE;
867 	KASSERT(oldpages == node->tn_aobj_pages);
868 	newpages = round_page(newsize) / PAGE_SIZE;
869 
870 	if (newpages > oldpages &&
871 	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
872 		error = ENOSPC;
873 		goto out;
874 	}
875 
876 	node->tn_aobj_pages = newpages;
877 
878 	tmp->tm_pages_used += (newpages - oldpages);
879 	node->tn_size = newsize;
880 	uvm_vnp_setsize(vp, newsize);
881 	if (newsize < oldsize) {
882 		int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
883 
884 		/*
885 		 * free "backing store"
886 		 */
887 
888 		if (newpages < oldpages) {
889 			struct uvm_object *uobj = node->tn_aobj;
890 
891 			simple_lock(&uobj->vmobjlock);
892 			uao_dropswap_range(uobj, newpages, oldpages);
893 			simple_unlock(&uobj->vmobjlock);
894 		}
895 
896 		/*
897 		 * zero out the truncated part of the last page.
898 		 */
899 
900 		uvm_vnp_zerorange(vp, newsize, zerolen);
901 	}
902 
903 	error = 0;
904 
905 out:
906 	return error;
907 }
908 
909 /* --------------------------------------------------------------------- */
910 
911 /*
912  * Returns information about the number of available memory pages,
913  * including physical and virtual ones.
914  *
915  * If 'total' is TRUE, the value returned is the total amount of memory
916  * pages configured for the system (either in use or free).
917  * If it is FALSE, the value returned is the amount of free memory pages.
918  *
919  * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
920  * excessive memory usage.
921  *
922  */
923 size_t
924 tmpfs_mem_info(boolean_t total)
925 {
926 	size_t size;
927 
928 	size = 0;
929 	size += uvmexp.swpgavail;
930 	if (!total) {
931 		size -= uvmexp.swpgonly;
932 	}
933 	size += uvmexp.free;
934 	size += uvmexp.filepages;
935 	if (size > uvmexp.wired) {
936 		size -= uvmexp.wired;
937 	} else {
938 		size = 0;
939 	}
940 
941 	return size;
942 }
943 
944 /* --------------------------------------------------------------------- */
945 
946 /*
947  * Change flags of the given vnode.
948  * Caller should execute tmpfs_update on vp after a successful execution.
949  * The vnode must be locked on entry and remain locked on exit.
950  */
951 int
952 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
953 {
954 	int error;
955 	struct tmpfs_node *node;
956 
957 	KASSERT(VOP_ISLOCKED(vp));
958 
959 	node = VP_TO_TMPFS_NODE(vp);
960 
961 	/* Disallow this operation if the file system is mounted read-only. */
962 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
963 		return EROFS;
964 
965 	/* XXX: The following comes from UFS code, and can be found in
966 	 * several other file systems.  Shouldn't this be centralized
967 	 * somewhere? */
968 	if (cred->cr_uid != node->tn_uid &&
969 	    (error = suser(cred, &p->p_acflag)))
970 		return error;
971 	if (cred->cr_uid == 0) {
972 		/* The super-user is only allowed to change flags if the file
973 		 * wasn't protected before and the securelevel is zero. */
974 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
975 		    securelevel > 0)
976 			return EPERM;
977 		node->tn_flags = flags;
978 	} else {
979 		/* Regular users can change flags provided they only want to
980 		 * change user-specific ones, not those reserved for the
981 		 * super-user. */
982 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
983 		    (flags & UF_SETTABLE) != flags)
984 			return EPERM;
985 		if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
986 			return EPERM;
987 		node->tn_flags &= SF_SETTABLE;
988 		node->tn_flags |= (flags & UF_SETTABLE);
989 	}
990 
991 	node->tn_status |= TMPFS_NODE_CHANGED;
992 	VN_KNOTE(vp, NOTE_ATTRIB);
993 
994 	KASSERT(VOP_ISLOCKED(vp));
995 
996 	return 0;
997 }
998 
999 /* --------------------------------------------------------------------- */
1000 
1001 /*
1002  * Change access mode on the given vnode.
1003  * Caller should execute tmpfs_update on vp after a successful execution.
1004  * The vnode must be locked on entry and remain locked on exit.
1005  */
1006 int
1007 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
1008 {
1009 	int error;
1010 	struct tmpfs_node *node;
1011 
1012 	KASSERT(VOP_ISLOCKED(vp));
1013 
1014 	node = VP_TO_TMPFS_NODE(vp);
1015 
1016 	/* Disallow this operation if the file system is mounted read-only. */
1017 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1018 		return EROFS;
1019 
1020 	/* Immutable or append-only files cannot be modified, either. */
1021 	if (node->tn_flags & (IMMUTABLE | APPEND))
1022 		return EPERM;
1023 
1024 	/* XXX: The following comes from UFS code, and can be found in
1025 	 * several other file systems.  Shouldn't this be centralized
1026 	 * somewhere? */
1027 	if (cred->cr_uid != node->tn_uid &&
1028 	    (error = suser(cred, &p->p_acflag)))
1029 		return error;
1030 	if (cred->cr_uid != 0) {
1031 		if (vp->v_type != VDIR && (mode & S_ISTXT))
1032 			return EFTYPE;
1033 
1034 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
1035 			return EPERM;
1036 	}
1037 
1038 	node->tn_mode = (mode & ALLPERMS);
1039 
1040 	node->tn_status |= TMPFS_NODE_CHANGED;
1041 	VN_KNOTE(vp, NOTE_ATTRIB);
1042 
1043 	KASSERT(VOP_ISLOCKED(vp));
1044 
1045 	return 0;
1046 }
1047 
1048 /* --------------------------------------------------------------------- */
1049 
1050 /*
1051  * Change ownership of the given vnode.  At least one of uid or gid must
1052  * be different than VNOVAL.  If one is set to that value, the attribute
1053  * is unchanged.
1054  * Caller should execute tmpfs_update on vp after a successful execution.
1055  * The vnode must be locked on entry and remain locked on exit.
1056  */
1057 int
1058 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
1059     struct proc *p)
1060 {
1061 	int error;
1062 	struct tmpfs_node *node;
1063 
1064 	KASSERT(VOP_ISLOCKED(vp));
1065 
1066 	node = VP_TO_TMPFS_NODE(vp);
1067 
1068 	/* Assign default values if they are unknown. */
1069 	KASSERT(uid != VNOVAL || gid != VNOVAL);
1070 	if (uid == VNOVAL)
1071 		uid = node->tn_uid;
1072 	if (gid == VNOVAL)
1073 		gid = node->tn_gid;
1074 	KASSERT(uid != VNOVAL && gid != VNOVAL);
1075 
1076 	/* Disallow this operation if the file system is mounted read-only. */
1077 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1078 		return EROFS;
1079 
1080 	/* Immutable or append-only files cannot be modified, either. */
1081 	if (node->tn_flags & (IMMUTABLE | APPEND))
1082 		return EPERM;
1083 
1084 	/* XXX: The following comes from UFS code, and can be found in
1085 	 * several other file systems.  Shouldn't this be centralized
1086 	 * somewhere? */
1087 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
1088 	    (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
1089 	     groupmember(gid, cred)))) &&
1090 	    ((error = suser(cred, &p->p_acflag)) != 0))
1091 		return error;
1092 
1093 	node->tn_uid = uid;
1094 	node->tn_gid = gid;
1095 
1096 	node->tn_status |= TMPFS_NODE_CHANGED;
1097 	VN_KNOTE(vp, NOTE_ATTRIB);
1098 
1099 	KASSERT(VOP_ISLOCKED(vp));
1100 
1101 	return 0;
1102 }
1103 
1104 /* --------------------------------------------------------------------- */
1105 
1106 /*
1107  * Change size of the given vnode.
1108  * Caller should execute tmpfs_update on vp after a successful execution.
1109  * The vnode must be locked on entry and remain locked on exit.
1110  */
1111 int
1112 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
1113     struct proc *p)
1114 {
1115 	int error;
1116 	struct tmpfs_node *node;
1117 
1118 	KASSERT(VOP_ISLOCKED(vp));
1119 
1120 	node = VP_TO_TMPFS_NODE(vp);
1121 
1122 	/* Decide whether this is a valid operation based on the file type. */
1123 	error = 0;
1124 	switch (vp->v_type) {
1125 	case VDIR:
1126 		return EISDIR;
1127 
1128 	case VREG:
1129 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
1130 			return EROFS;
1131 		break;
1132 
1133 	case VBLK:
1134 		/* FALLTHROUGH */
1135 	case VCHR:
1136 		/* FALLTHROUGH */
1137 	case VFIFO:
1138 		/* Allow modifications of special files even if in the file
1139 		 * system is mounted read-only (we are not modifying the
1140 		 * files themselves, but the objects they represent). */
1141 		return 0;
1142 
1143 	default:
1144 		/* Anything else is unsupported. */
1145 		return EOPNOTSUPP;
1146 	}
1147 
1148 	/* Immutable or append-only files cannot be modified, either. */
1149 	if (node->tn_flags & (IMMUTABLE | APPEND))
1150 		return EPERM;
1151 
1152 	error = tmpfs_truncate(vp, size);
1153 	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1154 	 * for us, as will update tn_status; no need to do that here. */
1155 
1156 	KASSERT(VOP_ISLOCKED(vp));
1157 
1158 	return error;
1159 }
1160 
1161 /* --------------------------------------------------------------------- */
1162 
1163 /*
1164  * Change access and modification times of the given vnode.
1165  * Caller should execute tmpfs_update on vp after a successful execution.
1166  * The vnode must be locked on entry and remain locked on exit.
1167  */
1168 int
1169 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
1170     int vaflags, struct ucred *cred, struct lwp *l)
1171 {
1172 	int error;
1173 	struct tmpfs_node *node;
1174 
1175 	KASSERT(VOP_ISLOCKED(vp));
1176 
1177 	node = VP_TO_TMPFS_NODE(vp);
1178 
1179 	/* Disallow this operation if the file system is mounted read-only. */
1180 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1181 		return EROFS;
1182 
1183 	/* Immutable or append-only files cannot be modified, either. */
1184 	if (node->tn_flags & (IMMUTABLE | APPEND))
1185 		return EPERM;
1186 
1187 	/* XXX: The following comes from UFS code, and can be found in
1188 	 * several other file systems.  Shouldn't this be centralized
1189 	 * somewhere? */
1190 	if (cred->cr_uid != node->tn_uid &&
1191 	    (error = suser(cred, &l->l_proc->p_acflag)) &&
1192 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
1193 	    (error = VOP_ACCESS(vp, VWRITE, cred, l))))
1194 		return error;
1195 
1196 	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1197 		node->tn_status |= TMPFS_NODE_ACCESSED;
1198 
1199 	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1200 		node->tn_status |= TMPFS_NODE_MODIFIED;
1201 
1202 	tmpfs_update(vp, atime, mtime, 0);
1203 
1204 	KASSERT(VOP_ISLOCKED(vp));
1205 
1206 	return 0;
1207 }
1208 
1209 /* --------------------------------------------------------------------- */
1210 
1211 /* Sync timestamps */
1212 void
1213 tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
1214     const struct timespec *mod)
1215 {
1216 	struct tmpfs_node *node;
1217 	const struct timespec *ts = NULL;
1218 	struct timespec tsb;
1219 
1220 	node = VP_TO_TMPFS_NODE(vp);
1221 
1222 	if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
1223 	    TMPFS_NODE_CHANGED)) == 0)
1224 		return;
1225 
1226 	if (node->tn_status & TMPFS_NODE_ACCESSED) {
1227 		if (acc == NULL)
1228 			acc = ts == NULL ? (ts = nanotime(&tsb)) : ts;
1229 		node->tn_atime = *acc;
1230 	}
1231 	if (node->tn_status & TMPFS_NODE_MODIFIED) {
1232 		if (mod == NULL)
1233 			mod = ts == NULL ? (ts = nanotime(&tsb)) : ts;
1234 		node->tn_mtime = *mod;
1235 	}
1236 	if (node->tn_status & TMPFS_NODE_CHANGED) {
1237 		if (ts == NULL)
1238 			ts = nanotime(&tsb);
1239 		node->tn_ctime = *ts;
1240 	}
1241 	node->tn_status &=
1242 	    ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
1243 }
1244 
1245 /* --------------------------------------------------------------------- */
1246 
1247 void
1248 tmpfs_update(struct vnode *vp, const struct timespec *acc,
1249     const struct timespec *mod, int flags)
1250 {
1251 
1252 	struct tmpfs_node *node;
1253 
1254 	KASSERT(VOP_ISLOCKED(vp));
1255 
1256 	node = VP_TO_TMPFS_NODE(vp);
1257 
1258 	if (flags & UPDATE_CLOSE)
1259 		; /* XXX Need to do anything special? */
1260 
1261 	tmpfs_itimes(vp, acc, mod);
1262 
1263 	KASSERT(VOP_ISLOCKED(vp));
1264 }
1265 
1266 /* --------------------------------------------------------------------- */
1267 
1268 int
1269 tmpfs_truncate(struct vnode *vp, off_t length)
1270 {
1271 	boolean_t extended;
1272 	int error;
1273 	struct tmpfs_node *node;
1274 
1275 	node = VP_TO_TMPFS_NODE(vp);
1276 	extended = length > node->tn_size;
1277 
1278 	if (length < 0) {
1279 		error = EINVAL;
1280 		goto out;
1281 	}
1282 
1283 	if (node->tn_size == length) {
1284 		error = 0;
1285 		goto out;
1286 	}
1287 
1288 	error = tmpfs_reg_resize(vp, length);
1289 	if (error == 0) {
1290 		VN_KNOTE(vp, NOTE_ATTRIB | (extended ? NOTE_EXTEND : 0));
1291 		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1292 	}
1293 
1294 out:
1295 	tmpfs_update(vp, NULL, NULL, 0);
1296 
1297 	return error;
1298 }
1299