xref: /netbsd-src/sys/fs/tmpfs/tmpfs_subr.c (revision 5b84b3983f71fd20a534cfa5d1556623a8aaa717)
1 /*	$NetBSD: tmpfs_subr.c,v 1.3 2005/09/12 16:55:01 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Efficient memory file system supporting functions.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.3 2005/09/12 16:55:01 christos Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/dirent.h>
48 #include <sys/event.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/time.h>
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/swap.h>
56 #include <sys/vnode.h>
57 
58 #include <uvm/uvm.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 #include <fs/tmpfs/tmpfs.h>
62 #include <fs/tmpfs/tmpfs_fifoops.h>
63 #include <fs/tmpfs/tmpfs_specops.h>
64 #include <fs/tmpfs/tmpfs_vnops.h>
65 
66 /* --------------------------------------------------------------------- */
67 
68 int
69 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
70     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
71     char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
72 {
73 	struct tmpfs_node *nnode;
74 
75 	/* If the root directory of the 'tmp' file system is not yet
76 	 * allocated, this must be the request to do it. */
77 	KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
78 
79 	KASSERT(IFF(type == VLNK, target != NULL));
80 	KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
81 
82 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
83 
84 	nnode = NULL;
85 	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
86 		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
87 		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
88 			return ENOSPC;
89 
90 		nnode =
91 		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
92 		if (nnode == NULL)
93 			return ENOSPC;
94 		nnode->tn_id = tmp->tm_nodes_last++;
95 		nnode->tn_gen = 0;
96 	} else {
97 		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
98 		LIST_REMOVE(nnode, tn_entries);
99 		nnode->tn_gen++;
100 	}
101 	KASSERT(nnode != NULL);
102 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
103 
104 	/* Generic initialization. */
105 	nnode->tn_type = type;
106 	nnode->tn_size = 0;
107 	nnode->tn_status = 0;
108 	nnode->tn_flags = 0;
109 	nnode->tn_links = 0;
110 	(void)nanotime(&nnode->tn_atime);
111 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
112 	    nnode->tn_atime;
113 	nnode->tn_uid = uid;
114 	nnode->tn_gid = gid;
115 	nnode->tn_mode = mode;
116 	nnode->tn_vnode = NULL;
117 
118 	/* Type-specific initialization. */
119 	switch (nnode->tn_type) {
120 	case VBLK:
121 	case VCHR:
122 		nnode->tn_rdev = rdev;
123 		break;
124 
125 	case VDIR:
126 		TAILQ_INIT(&nnode->tn_dir);
127 		nnode->tn_parent = (parent == NULL) ? nnode : parent;
128 		nnode->tn_readdir_lastn = 0;
129 		nnode->tn_readdir_lastp = NULL;
130 		nnode->tn_links++;
131 		nnode->tn_parent->tn_links++;
132 		break;
133 
134 	case VFIFO:
135 		/* FALLTHROUGH */
136 	case VSOCK:
137 		break;
138 
139 	case VLNK:
140 		KASSERT(strlen(target) < MAXPATHLEN);
141 		nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
142 		    strlen(target), 0);
143 		if (nnode->tn_link == NULL) {
144 			nnode->tn_type = VNON;
145 			tmpfs_free_node(tmp, nnode);
146 			return ENOSPC;
147 		}
148 		strcpy(nnode->tn_link, target);
149 		nnode->tn_size = strlen(target);
150 		break;
151 
152 	case VREG:
153 		nnode->tn_aobj = NULL;
154 		nnode->tn_aobj_pages = 0;
155 		nnode->tn_va = 0;
156 		break;
157 
158 	default:
159 		KASSERT(0);
160 	}
161 
162 	*node = nnode;
163 	return 0;
164 }
165 
166 /* --------------------------------------------------------------------- */
167 
168 void
169 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
170 {
171 	ino_t id;
172 	unsigned long gen;
173 	size_t pages;
174 
175 	switch (node->tn_type) {
176 	case VNON:
177 		/* Do not do anything.  VNON is provided to let the
178 		 * allocation routine clean itself easily by avoiding
179 		 * duplicating code in it. */
180 		/* FALLTHROUGH */
181 	case VBLK:
182 		/* FALLTHROUGH */
183 	case VCHR:
184 		/* FALLTHROUGH */
185 	case VDIR:
186 		/* FALLTHROUGH */
187 	case VFIFO:
188 		/* FALLTHROUGH */
189 	case VSOCK:
190 		pages = 0;
191 		break;
192 
193 	case VLNK:
194 		tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
195 		    strlen(node->tn_link));
196 		pages = 0;
197 		break;
198 
199 	case VREG:
200 		if (node->tn_aobj != NULL)
201 			uao_detach(node->tn_aobj);
202 		pages = node->tn_aobj_pages;
203 		break;
204 
205 	default:
206 		KASSERT(0);
207 		pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
208 		break;
209 	}
210 
211 	tmp->tm_pages_used -= pages;
212 
213 	LIST_REMOVE(node, tn_entries);
214 	id = node->tn_id;
215 	gen = node->tn_gen;
216 	memset(node, 0, sizeof(struct tmpfs_node));
217 	node->tn_id = id;
218 	node->tn_type = VNON;
219 	node->tn_gen = gen;
220 	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
221 }
222 
223 /* --------------------------------------------------------------------- */
224 
225 int
226 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
227     const char *name, uint16_t len, struct tmpfs_dirent **de)
228 {
229 	struct tmpfs_dirent *nde;
230 
231 	nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
232 	if (nde == NULL)
233 		return ENOSPC;
234 
235 	nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
236 	if (nde->td_name == NULL) {
237 		TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
238 		return ENOSPC;
239 	}
240 	nde->td_namelen = len;
241 	memcpy(nde->td_name, name, len);
242 	nde->td_node = node;
243 
244 	node->tn_links++;
245 	*de = nde;
246 
247 	return 0;
248 }
249 
250 /* --------------------------------------------------------------------- */
251 
252 void
253 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
254     boolean_t node_exists)
255 {
256 	if (node_exists) {
257 		struct tmpfs_node *node;
258 
259 		node = de->td_node;
260 
261 		KASSERT(node->tn_links > 0);
262 		node->tn_links--;
263 	}
264 
265 	tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
266 	TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
267 }
268 
269 /* --------------------------------------------------------------------- */
270 
271 int
272 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
273 {
274 	int error;
275 	struct vnode *nvp;
276 	struct vnode *vp;
277 
278 	vp = NULL;
279 
280 	if (node->tn_vnode != NULL) {
281 		vp = node->tn_vnode;
282 		vget(vp, LK_EXCLUSIVE | LK_RETRY);
283 		error = 0;
284 		goto out;
285 	}
286 
287 	/* Get a new vnode and associate it with our node. */
288 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
289 	if (error != 0)
290 		goto out;
291 	KASSERT(vp != NULL);
292 
293 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
294 	if (error != 0) {
295 		vp->v_data = NULL;
296 		ungetnewvnode(vp);
297 		vp = NULL;
298 		goto out;
299 	}
300 
301 	vp->v_data = node;
302 	vp->v_type = node->tn_type;
303 
304 	/* Type-specific initialization. */
305 	switch (node->tn_type) {
306 	case VBLK:
307 		/* FALLTHROUGH */
308 	case VCHR:
309 		vp->v_op = tmpfs_specop_p;
310 		nvp = checkalias(vp, node->tn_rdev, mp);
311 		if (nvp != NULL) {
312 			/* Discard unneeded vnode, but save its inode. */
313 			nvp->v_data = vp->v_data;
314 			vp->v_data = NULL;
315 
316 			/* XXX spec_vnodeops has no locking, so we have to
317 			 * do it explicitly. */
318 			VOP_UNLOCK(vp, 0);
319 			vp->v_op = spec_vnodeop_p;
320 			vp->v_flag &= ~VLOCKSWORK;
321 			vrele(vp);
322 			vgone(vp);
323 
324 			/* Reinitialize aliased node. */
325 			vp = nvp;
326 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
327 			if (error != 0) {
328 				vp->v_data = NULL;
329 				vp = NULL;
330 				goto out;
331 			}
332 		}
333 		break;
334 
335 	case VDIR:
336 		vp->v_flag = node->tn_parent == node ? VROOT : 0;
337 		break;
338 
339 	case VFIFO:
340 		vp->v_op = tmpfs_fifoop_p;
341 		break;
342 
343 	case VLNK:
344 		/* FALLTHROUGH */
345 	case VREG:
346 		/* FALLTHROUGH */
347 	case VSOCK:
348 		break;
349 
350 	default:
351 		KASSERT(0);
352 	}
353 
354 	uvm_vnp_setsize(vp, node->tn_size);
355 
356 	error = 0;
357 
358 out:
359 	*vpp = node->tn_vnode = vp;
360 
361 	KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
362 	KASSERT(*vpp == node->tn_vnode);
363 
364 	return error;
365 }
366 
367 /* --------------------------------------------------------------------- */
368 
369 void
370 tmpfs_free_vp(struct vnode *vp)
371 {
372 	struct tmpfs_node *node;
373 
374 	node = VP_TO_TMPFS_NODE(vp);
375 
376 	node->tn_vnode = NULL;
377 	vp->v_data = NULL;
378 }
379 
380 /* --------------------------------------------------------------------- */
381 
382 /* Allocates a new file of type 'type' and adds it to the parent directory
383  * 'dvp'; this addition is done using the component name given in 'cnp'.
384  * The ownership of the new file is automatically assigned based on the
385  * credentials of the caller (through 'cnp'), the group is set based on
386  * the parent directory and the mode is determined from the 'vap' argument.
387  * If successful, *vpp holds a vnode to the newly created file and zero
388  * is returned.  Otherwise *vpp is NULL and the function returns an
389  * appropriate error code .*/
390 int
391 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
392     struct componentname *cnp, char *target)
393 {
394 	int error;
395 	struct tmpfs_dirent *de;
396 	struct tmpfs_mount *tmp;
397 	struct tmpfs_node *dnode;
398 	struct tmpfs_node *node;
399 	struct tmpfs_node *parent;
400 
401 	KASSERT(VOP_ISLOCKED(dvp));
402 	KASSERT(cnp->cn_flags & HASBUF);
403 
404 	tmp = VFS_TO_TMPFS(dvp->v_mount);
405 	dnode = VP_TO_TMPFS_DIR(dvp);
406 	*vpp = NULL;
407 
408 	/* If the entry we are creating is a directory, we cannot overflow
409 	 * the number of links of its parent, because it will get a new
410 	 * link. */
411 	if (vap->va_type == VDIR) {
412 		/* Ensure that we do not overflow the maximum number of links
413 		 * imposed by the system. */
414 		KASSERT(dnode->tn_links <= LINK_MAX);
415 		if (dnode->tn_links == LINK_MAX) {
416 			error = EMLINK;
417 			goto out;
418 		}
419 
420 		parent = dnode;
421 	} else
422 		parent = NULL;
423 
424 	/* Allocate a node that represents the new file. */
425 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
426 	    dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
427 	    cnp->cn_proc, &node);
428 	if (error != 0)
429 		goto out;
430 
431 	/* Allocate a directory entry that points to the new file. */
432 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
433 	    &de);
434 	if (error != 0) {
435 		tmpfs_free_node(tmp, node);
436 		goto out;
437 	}
438 
439 	/* Allocate a vnode for the new file. */
440 	error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
441 	if (error != 0) {
442 		tmpfs_free_dirent(tmp, de, TRUE);
443 		tmpfs_free_node(tmp, node);
444 		goto out;
445 	}
446 
447 	/* Now that all required items are allocated, we can proceed to
448 	 * insert the new node into the directory, an operation that
449 	 * cannot fail. */
450 	tmpfs_dir_attach(dvp, de);
451 	VN_KNOTE(dvp, NOTE_WRITE);
452 
453 out:
454 	if (error != 0 || !(cnp->cn_flags & SAVESTART))
455 		PNBUF_PUT(cnp->cn_pnbuf);
456 	vput(dvp);
457 
458 	KASSERT(!VOP_ISLOCKED(dvp));
459 	KASSERT(IFF(error == 0, *vpp != NULL));
460 
461 	return error;
462 }
463 
464 /* --------------------------------------------------------------------- */
465 
466 void
467 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
468 {
469 	struct tmpfs_node *dnode;
470 
471 	dnode = VP_TO_TMPFS_DIR(vp);
472 
473 	TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
474 	dnode->tn_size += sizeof(struct tmpfs_dirent);
475 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
476 	    TMPFS_NODE_MODIFIED;
477 	uvm_vnp_setsize(vp, dnode->tn_size);
478 }
479 
480 /* --------------------------------------------------------------------- */
481 
482 void
483 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
484 {
485 	struct tmpfs_node *dnode;
486 
487 	dnode = VP_TO_TMPFS_DIR(vp);
488 
489 	TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
490 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
491 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
492 	    TMPFS_NODE_MODIFIED;
493 	uvm_vnp_setsize(vp, dnode->tn_size);
494 }
495 
496 /* --------------------------------------------------------------------- */
497 
498 struct tmpfs_dirent *
499 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
500 {
501 	boolean_t found;
502 	struct tmpfs_dirent *de;
503 
504 	KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
505 	KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
506 	    cnp->cn_nameptr[1] == '.')));
507 	TMPFS_VALIDATE_DIR(node);
508 
509 	node->tn_status |= TMPFS_NODE_ACCESSED;
510 
511 	found = 0;
512 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
513 		KASSERT(cnp->cn_namelen < 0xffff);
514 		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
515 		    memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
516 			found = 1;
517 			break;
518 		}
519 	}
520 
521 	return found ? de : NULL;
522 }
523 
524 /* --------------------------------------------------------------------- */
525 
526 /* Helper function for tmpfs_readdir.  Creates a '.' entry for the given
527  * directory and returns it in the uio space.  The function returns 0
528  * on success, -1 if there was not enough space in the uio structure to
529  * hold the directory entry or an appropriate error code if another
530  * error happens. */
531 int
532 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
533 {
534 	int error;
535 	struct dirent dent;
536 
537 	TMPFS_VALIDATE_DIR(node);
538 	KASSERT(uio->uio_offset == 0);
539 
540 	dent.d_fileno = node->tn_id;
541 	dent.d_type = DT_DIR;
542 	dent.d_namlen = 1;
543 	dent.d_name[0] = '.';
544 	dent.d_name[1] = '\0';
545 	dent.d_reclen = _DIRENT_SIZE(&dent);
546 
547 	if (dent.d_reclen > uio->uio_resid)
548 		error = -1;
549 	else {
550 		error = uiomove(&dent, dent.d_reclen, uio);
551 		if (error == 0)
552 			uio->uio_offset += sizeof(struct tmpfs_dirent) - \
553 			    dent.d_reclen;
554 	}
555 
556 	node->tn_status |= TMPFS_NODE_ACCESSED;
557 
558 	return error;
559 }
560 
561 /* --------------------------------------------------------------------- */
562 
563 /* Helper function for tmpfs_readdir.  Creates a '..' entry for the given
564  * directory and returns it in the uio space.  The function returns 0
565  * on success, -1 if there was not enough space in the uio structure to
566  * hold the directory entry or an appropriate error code if another
567  * error happens. */
568 int
569 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
570 {
571 	int error;
572 	struct dirent dent;
573 
574 	TMPFS_VALIDATE_DIR(node);
575 	KASSERT(uio->uio_offset == sizeof(struct tmpfs_dirent));
576 
577 	dent.d_fileno = node->tn_id;
578 	dent.d_type = DT_DIR;
579 	dent.d_namlen = 2;
580 	dent.d_name[0] = '.';
581 	dent.d_name[1] = '.';
582 	dent.d_name[2] = '\0';
583 	dent.d_reclen = _DIRENT_SIZE(&dent);
584 
585 	if (dent.d_reclen > uio->uio_resid)
586 		error = -1;
587 	else {
588 		error = uiomove(&dent, dent.d_reclen, uio);
589 		if (error == 0)
590 			uio->uio_offset += sizeof(struct tmpfs_dirent) - \
591 			    dent.d_reclen;
592 	}
593 
594 	node->tn_status |= TMPFS_NODE_ACCESSED;
595 
596 	return error;
597 }
598 
599 /* --------------------------------------------------------------------- */
600 
601 /* Helper function for tmpfs_readdir.  Returns as much directory entries
602  * as can fit in the uio space.  The read starts at uio->uio_offset.
603  * The function returns 0 on success, -1 if there was not enough space
604  * in the uio structure to hold the directory entry or an appropriate
605  * error code if another error happens. */
606 int
607 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio)
608 {
609 	int error;
610 	long cnt, startcnt;
611 	struct tmpfs_dirent *de;
612 
613 	TMPFS_VALIDATE_DIR(node);
614 	KASSERT(uio->uio_offset % sizeof(struct tmpfs_dirent) == 0);
615 	KASSERT(uio->uio_offset >= sizeof(struct tmpfs_dirent) * 2);
616 	KASSERT(uio->uio_offset < node->tn_size +
617 	    sizeof(struct tmpfs_dirent) * 2);
618 
619 	/* Locate the first directory entry we have to return.  We have cached
620 	 * the last readdir in the node, so use those values if appropriate.
621 	 * Otherwise do a linear scan to find the requested entry. */
622 	de = NULL;
623 	startcnt = uio->uio_offset / sizeof(struct tmpfs_dirent) - 2;
624 	if (startcnt == node->tn_readdir_lastn && \
625 	    node->tn_readdir_lastp != NULL) {
626 		cnt = node->tn_readdir_lastn;
627 		de = node->tn_readdir_lastp;
628 	} else {
629 		cnt = 0;
630 		de = TAILQ_FIRST(&node->tn_dir);
631 		while (cnt < startcnt) {
632 			cnt++;
633 			de = TAILQ_NEXT(de, td_entries);
634 
635 			/* Ensure that if we have not found the desired item,
636 			 * there are more entries in the directory to continue
637 			 * the search. */
638 			KASSERT(IMPLIES(de == TAILQ_LAST(&node->tn_dir,
639 			    tmpfs_dir), cnt == startcnt));
640 		}
641 	}
642 	KASSERT(cnt == startcnt);
643 	KASSERT(de != NULL);
644 
645 	/* Read as much entries as possible; i.e., until we reach the end of
646 	 * the directory or we exhaust uio space. */
647 	do {
648 		struct dirent d;
649 
650 		/* Create a dirent structure representing the current
651 		 * tmpfs_node and fill it. */
652 		d.d_fileno = de->td_node->tn_id;
653 		switch (de->td_node->tn_type) {
654 		case VBLK:
655 			d.d_type = DT_BLK;
656 			break;
657 
658 		case VCHR:
659 			d.d_type = DT_CHR;
660 			break;
661 
662 		case VDIR:
663 			d.d_type = DT_DIR;
664 			break;
665 
666 		case VFIFO:
667 			d.d_type = DT_FIFO;
668 			break;
669 
670 		case VLNK:
671 			d.d_type = DT_LNK;
672 			break;
673 
674 		case VREG:
675 			d.d_type = DT_REG;
676 			break;
677 
678 		case VSOCK:
679 			d.d_type = DT_SOCK;
680 			break;
681 
682 		default:
683 			KASSERT(0);
684 		}
685 		d.d_namlen = de->td_namelen;
686 		KASSERT(de->td_namelen < sizeof(d.d_name));
687 		(void)memcpy(d.d_name, de->td_name, de->td_namelen);
688 		d.d_name[de->td_namelen] = '\0';
689 		d.d_reclen = _DIRENT_SIZE(&d);
690 
691 		/* Stop reading if the directory entry we are treating is
692 		 * bigger than the amount of data that can be returned. */
693 		if (d.d_reclen > uio->uio_resid) {
694 			error = -1;
695 			break;
696 		}
697 
698 		/* Copy the new dirent structure into the output buffer and
699 		 * advance pointers. */
700 		error = uiomove(&d, d.d_reclen, uio);
701 
702 		cnt++;
703 		de = TAILQ_NEXT(de, td_entries);
704 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
705 
706 	/* Update the offset in the uio structure to be correctly aligned
707 	 * with tmpfs_dirent structures.  Otherwise, the offset is the
708 	 * size of the returned dirent structures, which is useless for us. */
709 	uio->uio_offset = (cnt + 2) * sizeof(struct tmpfs_dirent);
710 
711 	/* Cache the current status. */
712 	if (de == NULL) {
713 		KASSERT(cnt == node->tn_size / sizeof(struct tmpfs_dirent));
714 		node->tn_readdir_lastn = 0;
715 		node->tn_readdir_lastp = NULL;
716 	} else {
717 		node->tn_readdir_lastn = cnt;
718 		node->tn_readdir_lastp = de;
719 	}
720 
721 	node->tn_status |= TMPFS_NODE_ACCESSED;
722 
723 	return error;
724 }
725 
726 /* --------------------------------------------------------------------- */
727 
728 int
729 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
730 {
731 	int error;
732 	size_t newpages, oldpages;
733 	struct tmpfs_mount *tmp;
734 	struct tmpfs_node *node;
735 
736 	KASSERT(vp->v_type == VREG);
737 	KASSERT(newsize >= 0);
738 	KASSERT(newsize != vp->v_size);
739 
740 	node = VP_TO_TMPFS_NODE(vp);
741 	tmp = VFS_TO_TMPFS(vp->v_mount);
742 
743 	/* Convert the old and new sizes to the number of pages needed to
744 	 * store them.  It may happen that we do not need to do anything
745 	 * because the last allocated page can accommodate the change on
746 	 * its own. */
747 	oldpages = round_page(node->tn_size) / PAGE_SIZE;
748 	KASSERT(oldpages == node->tn_aobj_pages);
749 	newpages = round_page(newsize) / PAGE_SIZE;
750 
751 	if (newpages > oldpages &&
752 	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
753 		error = ENOSPC;
754 		goto out;
755 	}
756 
757 	if (newpages == 0) {
758 		uao_detach(node->tn_aobj);
759 		node->tn_aobj = NULL;
760 		node->tn_aobj_pages = 0;
761 		node->tn_va = 0;
762 	} else if (newpages > oldpages) {
763 		vaddr_t va;
764 		struct uvm_object *aobj;
765 
766 		aobj = uao_create(newpages * PAGE_SIZE, 0);
767 		va = vm_map_min(kernel_map);
768 		error = uvm_map(kernel_map, &va, newpages * PAGE_SIZE,
769 		    aobj, 0, 0,
770 		    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
771 		    UVM_ADV_RANDOM, 0));
772 		if (error != 0) {
773 			uao_detach(aobj);
774 			error = ENOSPC;
775 			goto out;
776 		}
777 
778 		/* XXX This is really expensive.  Is it possible to do a
779 		 * map entry passing? */
780 		if (node->tn_size > 0) {
781 			KASSERT(node->tn_aobj != NULL);
782 			(void)memcpy((void *)va, (void *)node->tn_va,
783 			    node->tn_size);
784 			uao_detach(node->tn_aobj);
785 		}
786 
787 		node->tn_aobj = aobj;
788 		node->tn_aobj_pages = newpages;
789 		node->tn_va = va;
790 	} else if (newpages < oldpages) {
791 		/* XXX Do we need to shrink the aobj or is the unmap enough? */
792 		uvm_unmap(kernel_map, node->tn_va + (vaddr_t)newpages,
793 		    (vaddr_t)node->tn_aobj_pages * PAGE_SIZE);
794 		node->tn_aobj_pages = newpages;
795 	}
796 
797 	tmp->tm_pages_used += (newpages - oldpages);
798 	node->tn_size = newsize;
799 	uvm_vnp_setsize(vp, newsize);
800 
801 	error = 0;
802 
803 out:
804 	return error;
805 }
806 
807 /* --------------------------------------------------------------------- */
808 
809 /* Returns information about the number of available memory pages,
810  * including physical and virtual ones.
811  *
812  * If 'total' is TRUE, the value returned is the total amount of memory
813  * pages configured for the system (either in use or free).
814  * If it is FALSE, the value returned is the amount of free memory pages.
815  *
816  * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
817  * excessive memory usage.
818  *
819  * XXX: This function is used every time TMPFS_PAGES_MAX is called to gather
820  * the amount of free memory, something that happens during _each_
821  * object allocation.  The time it takes to run this function so many
822  * times is not negligible, so this value should be stored as an
823  * aggregate somewhere, possibly within UVM (we cannot do it ourselves
824  * because we can't get notifications on memory usage changes). */
825 size_t
826 tmpfs_mem_info(boolean_t total)
827 {
828 	int i, sec;
829 	register_t retval;
830 	size_t size;
831 	struct swapent *sep;
832 
833 	sec = uvmexp.nswapdev;
834 	sep = (struct swapent *)malloc(sizeof(struct swapent) * sec, M_TEMP,
835 	    M_WAITOK);
836 	KASSERT(sep != NULL);
837 	uvm_swap_stats(SWAP_STATS, sep, sec, &retval);
838 	KASSERT(retval == sec);
839 
840 	size = 0;
841 	if (total) {
842 		for (i = 0; i < sec; i++)
843 			size += dbtob(sep[i].se_nblks) / PAGE_SIZE;
844 	} else {
845 		for (i = 0; i < sec; i++)
846 			size += dbtob(sep[i].se_nblks - sep[i].se_inuse) /
847 			    PAGE_SIZE;
848 	}
849 	size += uvmexp.free;
850 
851 	free(sep, M_TEMP);
852 
853 	return size;
854 }
855 
856 /* --------------------------------------------------------------------- */
857 
858 /* Change flags of the given vnode.
859  * Caller should execute VOP_UPDATE on vp after a successful execution.
860  * The vnode must be locked on entry and remain locked on exit. */
861 int
862 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
863 {
864 	int error;
865 	struct tmpfs_node *node;
866 
867 	KASSERT(VOP_ISLOCKED(vp));
868 
869 	node = VP_TO_TMPFS_NODE(vp);
870 
871 	/* Disallow this operation if the file system is mounted read-only. */
872 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
873 		return EROFS;
874 
875 	/* XXX: The following comes from UFS code, and can be found in
876 	 * several other file systems.  Shouldn't this be centralized
877 	 * somewhere? */
878 	if (cred->cr_uid != node->tn_uid &&
879 	    (error = suser(cred, &p->p_acflag)))
880 		return error;
881 	if (cred->cr_uid == 0) {
882 		/* The super-user is only allowed to change flags if the file
883 		 * wasn't protected before and the securelevel is zero. */
884 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
885 		    securelevel > 0)
886 			return EPERM;
887 		node->tn_flags = flags;
888 	} else {
889 		/* Regular users can change flags provided they only want to
890 		 * change user-specific ones, not those reserved for the
891 		 * super-user. */
892 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
893 		    (flags & UF_SETTABLE) != flags)
894 			return EPERM;
895 		if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
896 			return EPERM;
897 		node->tn_flags &= SF_SETTABLE;
898 		node->tn_flags |= (flags & UF_SETTABLE);
899 	}
900 
901 	node->tn_status |= TMPFS_NODE_CHANGED;
902 	VN_KNOTE(vp, NOTE_ATTRIB);
903 
904 	KASSERT(VOP_ISLOCKED(vp));
905 
906 	return 0;
907 }
908 
909 /* --------------------------------------------------------------------- */
910 
911 /* Change access mode on the given vnode.
912  * Caller should execute VOP_UPDATE on vp after a successful execution.
913  * The vnode must be locked on entry and remain locked on exit. */
914 int
915 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
916 {
917 	int error;
918 	struct tmpfs_node *node;
919 
920 	KASSERT(VOP_ISLOCKED(vp));
921 
922 	node = VP_TO_TMPFS_NODE(vp);
923 
924 	/* Disallow this operation if the file system is mounted read-only. */
925 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
926 		return EROFS;
927 
928 	/* Immutable or append-only files cannot be modified, either. */
929 	if (node->tn_flags & (IMMUTABLE | APPEND))
930 		return EPERM;
931 
932 	/* XXX: The following comes from UFS code, and can be found in
933 	 * several other file systems.  Shouldn't this be centralized
934 	 * somewhere? */
935 	if (cred->cr_uid != node->tn_uid &&
936 	    (error = suser(cred, &p->p_acflag)))
937 		return error;
938 	if (cred->cr_uid != 0) {
939 		if (vp->v_type != VDIR && (mode & S_ISTXT))
940 			return EFTYPE;
941 
942 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
943 			return EPERM;
944 	}
945 
946 	node->tn_mode = (mode & ALLPERMS);
947 
948 	node->tn_status |= TMPFS_NODE_CHANGED;
949 	VN_KNOTE(vp, NOTE_ATTRIB);
950 
951 	KASSERT(VOP_ISLOCKED(vp));
952 
953 	return 0;
954 }
955 
956 /* --------------------------------------------------------------------- */
957 
958 /* Change ownership of the given vnode.  At least one of uid or gid must
959  * be different than VNOVAL.  If one is set to that value, the attribute
960  * is unchanged.
961  * Caller should execute VOP_UPDATE on vp after a successful execution.
962  * The vnode must be locked on entry and remain locked on exit. */
963 int
964 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
965     struct proc *p)
966 {
967 	int error;
968 	struct tmpfs_node *node;
969 
970 	KASSERT(VOP_ISLOCKED(vp));
971 
972 	node = VP_TO_TMPFS_NODE(vp);
973 
974 	/* Assign default values if they are unknown. */
975 	KASSERT(uid != VNOVAL || gid != VNOVAL);
976 	if (uid == VNOVAL)
977 		uid = node->tn_uid;
978 	if (gid == VNOVAL)
979 		gid = node->tn_gid;
980 	KASSERT(uid != VNOVAL && gid != VNOVAL);
981 
982 	/* Disallow this operation if the file system is mounted read-only. */
983 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
984 		return EROFS;
985 
986 	/* Immutable or append-only files cannot be modified, either. */
987 	if (node->tn_flags & (IMMUTABLE | APPEND))
988 		return EPERM;
989 
990 	/* XXX: The following comes from UFS code, and can be found in
991 	 * several other file systems.  Shouldn't this be centralized
992 	 * somewhere? */
993 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
994 	    (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
995 	     groupmember(gid, cred)))) &&
996 	    ((error = suser(cred, &p->p_acflag)) != 0))
997 		return error;
998 
999 	node->tn_uid = uid;
1000 	node->tn_gid = gid;
1001 
1002 	node->tn_status |= TMPFS_NODE_CHANGED;
1003 	VN_KNOTE(vp, NOTE_ATTRIB);
1004 
1005 	KASSERT(VOP_ISLOCKED(vp));
1006 
1007 	return 0;
1008 }
1009 
1010 /* --------------------------------------------------------------------- */
1011 
1012 /* Change size of the given vnode.
1013  * Caller should execute VOP_UPDATE on vp after a successful execution.
1014  * The vnode must be locked on entry and remain locked on exit. */
1015 int
1016 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
1017     struct proc *p)
1018 {
1019 	int error;
1020 	struct tmpfs_node *node;
1021 
1022 	KASSERT(VOP_ISLOCKED(vp));
1023 
1024 	node = VP_TO_TMPFS_NODE(vp);
1025 
1026 	/* Decide whether this is a valid operation based on the file type. */
1027 	error = 0;
1028 	switch (vp->v_type) {
1029 	case VDIR:
1030 		return EISDIR;
1031 
1032 	case VLNK:
1033 		/* FALLTHROUGH */
1034 	case VREG:
1035 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
1036 			return EROFS;
1037 		break;
1038 
1039 	case VBLK:
1040 		/* FALLTHROUGH */
1041 	case VCHR:
1042 		/* FALLTHROUGH */
1043 	case VSOCK:
1044 		/* FALLTHROUGH */
1045 	case VFIFO:
1046 		/* Allow modifications of special files even if in the file
1047 		 * system is mounted read-only (we are not modifying the
1048 		 * files themselves, but the objects they represent). */
1049 		break;
1050 
1051 	default:
1052 		/* Anything else is unsupported. */
1053 		return EINVAL;
1054 	}
1055 
1056 	/* Immutable or append-only files cannot be modified, either. */
1057 	if (node->tn_flags & (IMMUTABLE | APPEND))
1058 		return EPERM;
1059 
1060 	error = VOP_TRUNCATE(vp, size, 0, cred, p);
1061 	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1062 	 * for us, as will update tn_status; no need to do that here. */
1063 
1064 	KASSERT(VOP_ISLOCKED(vp));
1065 
1066 	return error;
1067 }
1068 
1069 /* --------------------------------------------------------------------- */
1070 
1071 /* Change access and modification times of the given vnode.
1072  * Caller should execute VOP_UPDATE on vp after a successful execution.
1073  * The vnode must be locked on entry and remain locked on exit. */
1074 int
1075 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
1076     int vaflags, struct ucred *cred, struct proc *p)
1077 {
1078 	int error;
1079 	struct tmpfs_node *node;
1080 
1081 	KASSERT(VOP_ISLOCKED(vp));
1082 
1083 	node = VP_TO_TMPFS_NODE(vp);
1084 
1085 	/* Disallow this operation if the file system is mounted read-only. */
1086 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1087 		return EROFS;
1088 
1089 	/* Immutable or append-only files cannot be modified, either. */
1090 	if (node->tn_flags & (IMMUTABLE | APPEND))
1091 		return EPERM;
1092 
1093 	/* XXX: The following comes from UFS code, and can be found in
1094 	 * several other file systems.  Shouldn't this be centralized
1095 	 * somewhere? */
1096 	if (cred->cr_uid != node->tn_uid &&
1097 	    (error = suser(cred, &p->p_acflag)) &&
1098 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
1099 	    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1100 		return error;
1101 
1102 	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1103 		node->tn_status |= TMPFS_NODE_ACCESSED;
1104 
1105 	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1106 		node->tn_status |= TMPFS_NODE_MODIFIED;
1107 
1108 	error = VOP_UPDATE(vp, atime, mtime, 0);
1109 
1110 	KASSERT(VOP_ISLOCKED(vp));
1111 
1112 	return error;
1113 }
1114