xref: /dflybsd-src/sys/vfs/tmpfs/tmpfs_vfsops.c (revision 71c97a3cee892aedd0f1dc81caba4c8fbdf7fc00)
1 /*	$NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Efficient memory file system.
35  *
36  * tmpfs is a file system that uses virtual memory to store file data and
37  * metadata efficiently. It does not follow the structure of an on-disk
38  * file system because it simply does not need to. Instead, it uses
39  * memory-specific data structures and algorithms to automatically
40  * allocate and release resources.
41  */
42 
43 #include <sys/conf.h>
44 #include <sys/param.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/kernel.h>
49 #include <sys/stat.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/objcache.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_param.h>
57 
58 #if 0
59 #include <vfs/tmpfs/tmpfs.h>
60 #endif
61 #include "tmpfs.h"
62 #include <vfs/tmpfs/tmpfs_vnops.h>
63 #include <vfs/tmpfs/tmpfs_mount.h>
64 
65 /*
66  * Default permission for root node
67  */
68 #define TMPFS_DEFAULT_ROOT_MODE	(S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
69 
70 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
71 
72 /* --------------------------------------------------------------------- */
73 
74 static int	tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
75 static int	tmpfs_unmount(struct mount *, int);
76 static int	tmpfs_root(struct mount *, struct vnode **);
77 static int	tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
78 static int	tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
79 
80 /* --------------------------------------------------------------------- */
81 boolean_t
82 tmpfs_node_ctor(void *obj, void *privdata, int flags)
83 {
84 	struct tmpfs_node *node = obj;
85 
86 	node->tn_gen++;
87 	node->tn_size = 0;
88 	node->tn_status = 0;
89 	node->tn_flags = 0;
90 	node->tn_links = 0;
91 	node->tn_vnode = NULL;
92 	node->tn_vpstate = 0;
93 	bzero(&node->tn_spec, sizeof(node->tn_spec));
94 
95 	return (TRUE);
96 }
97 
98 static void
99 tmpfs_node_dtor(void *obj, void *privdata)
100 {
101 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
102 	node->tn_type = VNON;
103 	node->tn_vpstate = TMPFS_VNODE_DOOMED;
104 }
105 
106 static void *
107 tmpfs_node_init(void *args, int flags)
108 {
109 	struct tmpfs_node *node;
110 
111 	node = objcache_malloc_alloc(args, flags);
112 	if (node == NULL)
113 		return (NULL);
114 	node->tn_id = 0;
115 	node->tn_blksize = PAGE_SIZE;	/* start small */
116 
117 	lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
118 	node->tn_gen = karc4random();
119 
120 	return node;
121 }
122 
123 static void
124 tmpfs_node_fini(void *obj, void *args)
125 {
126 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
127 	lockuninit(&node->tn_interlock);
128 	objcache_malloc_free(obj, args);
129 }
130 
131 static int
132 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
133 {
134 	struct tmpfs_mount *tmp;
135 	struct tmpfs_node *root;
136 	struct tmpfs_mount_info args;
137 	vm_pindex_t pages;
138 	vm_pindex_t pages_limit;
139 	ino_t nodes;
140 	u_int64_t	maxfsize;
141 	int error;
142 	/* Size counters. */
143 	ino_t	nodes_max;
144 	off_t	size_max;
145 	size_t	maxfsize_max;
146 	size_t	size;
147 
148 	/* Root node attributes. */
149 	uid_t	root_uid = cred->cr_uid;
150 	gid_t	root_gid = cred->cr_gid;
151 	mode_t	root_mode = (VREAD | VWRITE);
152 
153 	if (mp->mnt_flag & MNT_UPDATE) {
154 		/* XXX: There is no support yet to update file system
155 		 * settings.  Should be added. */
156 
157 		return EOPNOTSUPP;
158 	}
159 
160 	/*
161 	 * mount info
162 	 */
163 	bzero(&args, sizeof(args));
164 	size_max  = 0;
165 	nodes_max = 0;
166 	maxfsize_max = 0;
167 
168 	if (path) {
169 		if (data) {
170 			error = copyin(data, &args, sizeof(args));
171 			if (error)
172 				return (error);
173 		}
174 		size_max = args.ta_size_max;
175 		nodes_max = args.ta_nodes_max;
176 		maxfsize_max = args.ta_maxfsize_max;
177 		root_uid = args.ta_root_uid;
178 		root_gid = args.ta_root_gid;
179 		root_mode = args.ta_root_mode;
180 	}
181 
182 	/*
183 	 * If mount by non-root, then verify that user has necessary
184 	 * permissions on the device.
185 	 */
186 	if (cred->cr_uid != 0) {
187 		root_mode = VREAD;
188 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
189 			root_mode |= VWRITE;
190 	}
191 
192 	pages_limit = vm_swap_max + vmstats.v_page_count / 2;
193 
194 	if (size_max == 0) {
195 		pages = pages_limit / 2;
196 	} else if (size_max < PAGE_SIZE) {
197 		pages = 1;
198 	} else if (OFF_TO_IDX(size_max) > pages_limit) {
199 		/*
200 		 * do not force pages = pages_limit for this case, otherwise
201 		 * we might not honor tmpfs size requests from /etc/fstab
202 		 * during boot because they are mounted prior to swap being
203 		 * turned on.
204 		 */
205 		pages = OFF_TO_IDX(size_max);
206 	} else {
207 		pages = OFF_TO_IDX(size_max);
208 	}
209 
210 	if (nodes_max == 0)
211 		nodes = 3 + pages * PAGE_SIZE / 1024;
212 	else if (nodes_max < 3)
213 		nodes = 3;
214 	else if (nodes_max > pages)
215 		nodes = pages;
216 	else
217 		nodes = nodes_max;
218 
219 	maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE;
220 	if (maxfsize_max != 0 && maxfsize > maxfsize_max)
221 		maxfsize = maxfsize_max;
222 
223 	/* Allocate the tmpfs mount structure and fill it. */
224 	tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
225 
226 	tmp->tm_mount = mp;
227 	tmp->tm_nodes_max = nodes;
228 	tmp->tm_nodes_inuse = 0;
229 	tmp->tm_maxfilesize = maxfsize;
230 	LIST_INIT(&tmp->tm_nodes_used);
231 
232 	tmp->tm_pages_max = pages;
233 	tmp->tm_pages_used = 0;
234 
235 	kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
236 	kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
237 	kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
238 
239 	kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
240 			    tmp->tm_nodes_max);
241 
242 	tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
243 	tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
244 
245 	tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
246 	tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
247 
248 	tmp->tm_dirent_pool =  objcache_create( "tmpfs dirent cache",
249 	    0, 0,
250 	    NULL, NULL, NULL,
251 	    objcache_malloc_alloc, objcache_malloc_free,
252 	    &tmp->tm_dirent_zone_malloc_args);
253 	tmp->tm_node_pool = objcache_create( "tmpfs node cache",
254 	    0, 0,
255 	    tmpfs_node_ctor, tmpfs_node_dtor, NULL,
256 	    tmpfs_node_init, tmpfs_node_fini,
257 	    &tmp->tm_node_zone_malloc_args);
258 
259 	tmp->tm_ino = TMPFS_ROOTINO;
260 
261 	/* Allocate the root node. */
262 	error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
263 				 root_mode & ALLPERMS, NULL,
264 				 VNOVAL, VNOVAL, &root);
265 
266 	/*
267 	 * We are backed by swap, set snocache chflags flag so we
268 	 * don't trip over swapcache.
269 	 */
270 	root->tn_flags = SF_NOCACHE;
271 
272 	if (error != 0 || root == NULL) {
273 	    objcache_destroy(tmp->tm_node_pool);
274 	    objcache_destroy(tmp->tm_dirent_pool);
275 	    kfree(tmp, M_TMPFSMNT);
276 	    return error;
277 	}
278 	KASSERT(root->tn_id == TMPFS_ROOTINO,
279 		("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id));
280 
281 	atomic_add_int(&root->tn_links, 1);	/* keep around */
282 	tmp->tm_root = root;
283 
284 	mp->mnt_flag |= MNT_LOCAL;
285 	mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
286 	mp->mnt_kern_flag |= MNTK_NOMSYNC;
287 	mp->mnt_kern_flag |= MNTK_THR_SYNC;	/* new vsyncscan semantics */
288 	mp->mnt_kern_flag |= MNTK_QUICKHALT;	/* no teardown needed on halt */
289 	mp->mnt_data = (qaddr_t)tmp;
290 	mp->mnt_iosize_max = MAXBSIZE;
291 	vfs_getnewfsid(mp);
292 
293 	vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
294 	vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
295 
296 	copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
297 	bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
298 	bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
299 	copyinstr(path, mp->mnt_stat.f_mntonname,
300 		  sizeof(mp->mnt_stat.f_mntonname) -1,
301 		  &size);
302 
303 	tmpfs_statfs(mp, &mp->mnt_stat, cred);
304 
305 	return 0;
306 }
307 
308 /* --------------------------------------------------------------------- */
309 
310 /* ARGSUSED2 */
311 static int
312 tmpfs_unmount(struct mount *mp, int mntflags)
313 {
314 	int error;
315 	int flags = 0;
316 	struct tmpfs_mount *tmp;
317 	struct tmpfs_node *node;
318 	struct vnode *vp;
319 	int isok;
320 
321 	tmp = VFS_TO_TMPFS(mp);
322 	TMPFS_LOCK(tmp);
323 
324 	/* Handle forced unmounts. */
325 	if (mntflags & MNT_FORCE)
326 		flags |= FORCECLOSE;
327 
328 	/*
329 	 * Finalize all pending I/O.  In the case of tmpfs we want
330 	 * to throw all the data away so clean out the buffer cache
331 	 * and vm objects before calling vflush().
332 	 */
333 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
334 		/*
335 		 * tn_links is mnt_token protected
336 		 */
337 		atomic_add_int(&node->tn_links, 1);
338 		TMPFS_NODE_LOCK(node);
339 
340 		while (node->tn_type == VREG && node->tn_vnode) {
341 			vp = node->tn_vnode;
342 			vhold(vp);
343 			TMPFS_NODE_UNLOCK(node);
344 			lwkt_yield();
345 
346 			/*
347 			 * vx_get/vx_put and tmpfs_truncate may block,
348 			 * releasing the tmpfs mountpoint token.
349 			 *
350 			 * Make sure the lock order is correct.
351 			 */
352 			vx_get(vp);		/* held vnode */
353 			TMPFS_NODE_LOCK(node);
354 			if (node->tn_vnode == vp) {
355 				tmpfs_truncate(vp, 0);
356 				isok = 1;
357 			} else {
358 				isok = 0;
359 			}
360 			TMPFS_NODE_UNLOCK(node);
361 			vx_put(vp);
362 			vdrop(vp);
363 			TMPFS_NODE_LOCK(node);
364 			if (isok)
365 				break;
366 			/* retry */
367 		}
368 
369 		TMPFS_NODE_UNLOCK(node);
370 		atomic_add_int(&node->tn_links, -1);
371 	}
372 
373 	/*
374 	 * Flush all vnodes on the unmount.
375 	 *
376 	 * If we fail to flush, we cannot unmount, but all the nodes have
377 	 * already been truncated. Erroring out is the best we can do.
378 	 */
379 	error = vflush(mp, 0, flags);
380 	if (error != 0) {
381 		TMPFS_UNLOCK(tmp);
382 		return (error);
383 	}
384 
385 	/*
386 	 * First pass get rid of all the directory entries and
387 	 * vnode associations.  This will also destroy the
388 	 * directory topology and should drop all link counts
389 	 * to 0 except for the root.
390 	 *
391 	 * No vnodes should remain after the vflush above.
392 	 */
393 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
394 		lwkt_yield();
395 
396 		atomic_add_int(&node->tn_links, 1);
397 		TMPFS_NODE_LOCK(node);
398 		if (node->tn_type == VDIR) {
399 			struct tmpfs_dirent *de;
400 
401 			while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL)			{
402 				tmpfs_dir_detach(node, de);
403 				tmpfs_free_dirent(tmp, de);
404 			}
405 		}
406 		KKASSERT(node->tn_vnode == NULL);
407 
408 		TMPFS_NODE_UNLOCK(node);
409 		atomic_add_int(&node->tn_links, -1);
410 	}
411 
412 	/*
413 	 * Allow the root node to be destroyed by dropping the link count
414 	 * we bumped in the mount code.
415 	 */
416 	KKASSERT(tmp->tm_root);
417 	TMPFS_NODE_LOCK(tmp->tm_root);
418 	atomic_add_int(&tmp->tm_root->tn_links, -1);
419 	TMPFS_NODE_UNLOCK(tmp->tm_root);
420 
421 	/*
422 	 * At this point all nodes, including the root node, should have a
423 	 * link count of 0.  The root is not necessarily going to be last.
424 	 */
425 	while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) {
426 		if (node->tn_links)
427 			panic("tmpfs: Dangling nodes during umount (%p)!\n",
428 			      node);
429 
430 		TMPFS_NODE_LOCK(node);
431 		tmpfs_free_node(tmp, node);
432 		/* eats lock */
433 		lwkt_yield();
434 	}
435 	KKASSERT(tmp->tm_root == NULL);
436 
437 	objcache_destroy(tmp->tm_dirent_pool);
438 	objcache_destroy(tmp->tm_node_pool);
439 
440 	kmalloc_destroy(&tmp->tm_name_zone);
441 	kmalloc_destroy(&tmp->tm_dirent_zone);
442 	kmalloc_destroy(&tmp->tm_node_zone);
443 
444 	tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
445 
446 	KKASSERT(tmp->tm_pages_used == 0);
447 	KKASSERT(tmp->tm_nodes_inuse == 0);
448 
449 	TMPFS_UNLOCK(tmp);
450 
451 	/* Throw away the tmpfs_mount structure. */
452 	kfree(tmp, M_TMPFSMNT);
453 	mp->mnt_data = NULL;
454 
455 	mp->mnt_flag &= ~MNT_LOCAL;
456 	return 0;
457 }
458 
459 /* --------------------------------------------------------------------- */
460 
461 static int
462 tmpfs_root(struct mount *mp, struct vnode **vpp)
463 {
464 	struct tmpfs_mount *tmp;
465 	int error;
466 
467 	tmp = VFS_TO_TMPFS(mp);
468 	if (tmp->tm_root == NULL) {
469 		kprintf("tmpfs_root: called without root node %p\n", mp);
470 		print_backtrace(-1);
471 		*vpp = NULL;
472 		error = EINVAL;
473 	} else {
474 		error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root,
475 				       LK_EXCLUSIVE, vpp);
476 		(*vpp)->v_flag |= VROOT;
477 		(*vpp)->v_type = VDIR;
478 	}
479 	return error;
480 }
481 
482 /* --------------------------------------------------------------------- */
483 
484 static int
485 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp,
486 	     struct vnode **vpp)
487 {
488 	boolean_t found;
489 	struct tmpfs_fid *tfhp;
490 	struct tmpfs_mount *tmp;
491 	struct tmpfs_node *node;
492 	int rc;
493 
494 	tmp = VFS_TO_TMPFS(mp);
495 
496 	tfhp = (struct tmpfs_fid *) fhp;
497 	if (tfhp->tf_len != sizeof(struct tmpfs_fid))
498 		return EINVAL;
499 
500 	rc = EINVAL;
501 	found = FALSE;
502 
503 	TMPFS_LOCK(tmp);
504 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
505 		if (node->tn_id == tfhp->tf_id &&
506 		    node->tn_gen == tfhp->tf_gen) {
507 			found = TRUE;
508 			break;
509 		}
510 	}
511 
512 	if (found)
513 		rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp);
514 
515 	TMPFS_UNLOCK(tmp);
516 
517 	return (rc);
518 }
519 
520 /* --------------------------------------------------------------------- */
521 
522 /* ARGSUSED2 */
523 static int
524 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
525 {
526 	fsfilcnt_t freenodes;
527 	struct tmpfs_mount *tmp;
528 
529 	tmp = VFS_TO_TMPFS(mp);
530 
531 	/* TMPFS_LOCK(tmp); not really needed */
532 
533 	sbp->f_iosize = PAGE_SIZE;
534 	sbp->f_bsize = PAGE_SIZE;
535 
536 	sbp->f_blocks = tmp->tm_pages_max;
537 	sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
538 	sbp->f_bfree = sbp->f_bavail;
539 
540 	freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
541 
542 	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
543 	sbp->f_ffree = freenodes;
544 	sbp->f_owner = tmp->tm_root->tn_uid;
545 
546 	/* TMPFS_UNLOCK(tmp); */
547 
548 	return 0;
549 }
550 
551 /* --------------------------------------------------------------------- */
552 
553 static int
554 tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
555 {
556 	struct tmpfs_node *node;
557 	struct tmpfs_fid tfh;
558 	node = VP_TO_TMPFS_NODE(vp);
559 	memset(&tfh, 0, sizeof(tfh));
560 	tfh.tf_len = sizeof(struct tmpfs_fid);
561 	tfh.tf_gen = node->tn_gen;
562 	tfh.tf_id = node->tn_id;
563 	memcpy(fhp, &tfh, sizeof(tfh));
564 	return (0);
565 }
566 
567 /* --------------------------------------------------------------------- */
568 
569 static int
570 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
571 	       struct ucred **credanonp)
572 {
573 	struct tmpfs_mount *tmp;
574 	struct netcred *nc;
575 
576 	tmp = (struct tmpfs_mount *) mp->mnt_data;
577 	nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
578 	if (nc == NULL)
579 		return (EACCES);
580 
581 	*exflagsp = nc->netc_exflags;
582 	*credanonp = &nc->netc_anon;
583 
584 	return (0);
585 }
586 
587 /* --------------------------------------------------------------------- */
588 
589 /*
590  * tmpfs vfs operations.
591  */
592 
593 static struct vfsops tmpfs_vfsops = {
594 	.vfs_flags =			0,
595 	.vfs_mount =			tmpfs_mount,
596 	.vfs_unmount =			tmpfs_unmount,
597 	.vfs_root =			tmpfs_root,
598 	.vfs_statfs =			tmpfs_statfs,
599 	.vfs_fhtovp =			tmpfs_fhtovp,
600 	.vfs_vptofh =			tmpfs_vptofh,
601 	.vfs_checkexp =			tmpfs_checkexp,
602 };
603 
604 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE);
605 MODULE_VERSION(tmpfs, 1);
606