xref: /dflybsd-src/sys/vfs/tmpfs/tmpfs_vfsops.c (revision a42c8ee581107512c9160b3b352135f5c5eb90fe)
1 /*	$NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Efficient memory file system.
35  *
36  * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37  * (the well-known UVM) to store file data and metadata in an efficient
38  * way.  This means that it does not follow the structure of an on-disk
39  * file system because it simply does not need to.  Instead, it uses
40  * memory-specific data structures and algorithms to automatically
41  * allocate and release resources.
42  */
43 
44 #include <sys/conf.h>
45 #include <sys/param.h>
46 #include <sys/limits.h>
47 #include <sys/lock.h>
48 #include <sys/mutex.h>
49 #include <sys/kernel.h>
50 #include <sys/stat.h>
51 #include <sys/systm.h>
52 #include <sys/sysctl.h>
53 #include <sys/objcache.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_param.h>
58 
59 #include <vfs/tmpfs/tmpfs.h>
60 #include <vfs/tmpfs/tmpfs_vnops.h>
61 #include <vfs/tmpfs/tmpfs_args.h>
62 
63 /*
64  * Default permission for root node
65  */
66 #define TMPFS_DEFAULT_ROOT_MODE	(S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67 
68 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
69 
70 /* --------------------------------------------------------------------- */
71 
72 static int	tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
73 static int	tmpfs_unmount(struct mount *, int);
74 static int	tmpfs_root(struct mount *, struct vnode **);
75 static int	tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
76 static int	tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
77 
78 /* --------------------------------------------------------------------- */
79 int
80 tmpfs_node_ctor(void *obj, void *privdata, int flags)
81 {
82 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
83 
84 	node->tn_gen++;
85 	node->tn_size = 0;
86 	node->tn_status = 0;
87 	node->tn_flags = 0;
88 	node->tn_links = 0;
89 	node->tn_vnode = NULL;
90 	node->tn_vpstate = TMPFS_VNODE_WANT;
91 	bzero(&node->tn_spec, sizeof(node->tn_spec));
92 
93 	return (1);
94 }
95 
96 static void
97 tmpfs_node_dtor(void *obj, void *privdata)
98 {
99 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
100 	node->tn_type = VNON;
101 	node->tn_vpstate = TMPFS_VNODE_DOOMED;
102 }
103 
104 static void*
105 tmpfs_node_init(void *args, int flags)
106 {
107 	struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
108 	if (node == NULL)
109 		return (NULL);
110 	node->tn_id = 0;
111 
112 	lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
113 	node->tn_gen = karc4random();
114 
115 	return node;
116 }
117 
118 static void
119 tmpfs_node_fini(void *obj, void *args)
120 {
121 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
122 	lockuninit(&node->tn_interlock);
123 	objcache_malloc_free(obj, args);
124 }
125 
126 static int
127 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
128 {
129 	struct tmpfs_mount *tmp;
130 	struct tmpfs_node *root;
131 	struct tmpfs_args args;
132 	vm_pindex_t pages;
133 	vm_pindex_t pages_limit;
134 	ino_t nodes;
135 	u_int64_t	maxfsize;
136 	int error;
137 	/* Size counters. */
138 	ino_t	nodes_max;
139 	off_t	size_max;
140 	size_t	maxfsize_max;
141 	size_t	size;
142 
143 	/* Root node attributes. */
144 	uid_t	root_uid = cred->cr_uid;
145 	gid_t	root_gid = cred->cr_gid;
146 	mode_t	root_mode = (VREAD | VWRITE);
147 
148 	if (mp->mnt_flag & MNT_UPDATE) {
149 		/* XXX: There is no support yet to update file system
150 		 * settings.  Should be added. */
151 
152 		return EOPNOTSUPP;
153 	}
154 
155 	/*
156 	 * mount info
157 	 */
158 	bzero(&args, sizeof(args));
159 	size_max  = 0;
160 	nodes_max = 0;
161 	maxfsize_max = 0;
162 
163 	if (path) {
164 		if (data) {
165 			error = copyin(data, &args, sizeof(args));
166 			if (error)
167 				return (error);
168 		}
169 		size_max = args.ta_size_max;
170 		nodes_max = args.ta_nodes_max;
171 		maxfsize_max = args.ta_maxfsize_max;
172 		root_uid = args.ta_root_uid;
173 		root_gid = args.ta_root_gid;
174 		root_mode = args.ta_root_mode;
175 	}
176 
177 	/*
178 	 * If mount by non-root, then verify that user has necessary
179 	 * permissions on the device.
180 	 */
181 	if (cred->cr_uid != 0) {
182 		root_mode = VREAD;
183 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
184 			root_mode |= VWRITE;
185 	}
186 
187 	pages_limit = vm_swap_max + vmstats.v_page_count / 2;
188 
189 	if (size_max == 0) {
190 		pages = pages_limit / 2;
191 	} else if (size_max < PAGE_SIZE) {
192 		pages = 1;
193 	} else if (OFF_TO_IDX(size_max) > pages_limit) {
194 		/*
195 		 * do not force pages = pages_limit for this case, otherwise
196 		 * we might not honor tmpfs size requests from /etc/fstab
197 		 * during boot because they are mounted prior to swap being
198 		 * turned on.
199 		 */
200 		pages = OFF_TO_IDX(size_max);
201 	} else {
202 		pages = OFF_TO_IDX(size_max);
203 	}
204 
205 	if (nodes_max == 0)
206 		nodes = 3 + pages * PAGE_SIZE / 1024;
207 	else if (nodes_max < 3)
208 		nodes = 3;
209 	else if (nodes_max > pages)
210 		nodes = pages;
211 	else
212 		nodes = nodes_max;
213 
214 	maxfsize = IDX_TO_OFF(pages_limit);
215 	if (maxfsize_max != 0 && maxfsize > maxfsize_max)
216 		maxfsize = maxfsize_max;
217 
218 	/* Allocate the tmpfs mount structure and fill it. */
219 	tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
220 
221 	lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
222 	tmp->tm_nodes_max = nodes;
223 	tmp->tm_nodes_inuse = 0;
224 	tmp->tm_maxfilesize = maxfsize;
225 	LIST_INIT(&tmp->tm_nodes_used);
226 
227 	tmp->tm_pages_max = pages;
228 	tmp->tm_pages_used = 0;
229 
230 	kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
231 	kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
232 	kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
233 
234 	kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
235 			    tmp->tm_nodes_max);
236 
237 	tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
238 	tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
239 
240 	tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
241 	tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
242 
243 	tmp->tm_dirent_pool =  objcache_create( "tmpfs dirent cache",
244 	    0, 0,
245 	    NULL, NULL, NULL,
246 	    objcache_malloc_alloc, objcache_malloc_free,
247 	    &tmp->tm_dirent_zone_malloc_args);
248 	tmp->tm_node_pool = objcache_create( "tmpfs node cache",
249 	    0, 0,
250 	    tmpfs_node_ctor, tmpfs_node_dtor, NULL,
251 	    tmpfs_node_init, tmpfs_node_fini,
252 	    &tmp->tm_node_zone_malloc_args);
253 
254 	/* Allocate the root node. */
255 	error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
256 				 root_mode & ALLPERMS, NULL, NULL,
257 				 VNOVAL, VNOVAL, &root);
258 
259 	/*
260 	 * We are backed by swap, set snocache chflags flag so we
261 	 * don't trip over swapcache.
262 	 */
263 	root->tn_flags = SF_NOCACHE;
264 
265 	if (error != 0 || root == NULL) {
266 	    objcache_destroy(tmp->tm_node_pool);
267 	    objcache_destroy(tmp->tm_dirent_pool);
268 	    kfree(tmp, M_TMPFSMNT);
269 	    return error;
270 	}
271 	KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
272 	tmp->tm_root = root;
273 
274 	mp->mnt_flag |= MNT_LOCAL;
275 #if 0
276 	mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE  |
277 			     MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
278 #endif
279 	mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_SG_MPSAFE;
280 	mp->mnt_kern_flag |= MNTK_WR_MPSAFE;
281 	mp->mnt_kern_flag |= MNTK_NOMSYNC;
282 	mp->mnt_data = (qaddr_t)tmp;
283 	vfs_getnewfsid(mp);
284 
285 
286 	vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
287 	vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
288 
289 	copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
290 	bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
291 	bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
292 	copyinstr(path, mp->mnt_stat.f_mntonname,
293 		  sizeof(mp->mnt_stat.f_mntonname) -1,
294 		  &size);
295 
296 	tmpfs_statfs(mp, &mp->mnt_stat, cred);
297 
298 	return 0;
299 }
300 
301 /* --------------------------------------------------------------------- */
302 
303 /* ARGSUSED2 */
304 static int
305 tmpfs_unmount(struct mount *mp, int mntflags)
306 {
307 	int error;
308 	int flags = 0;
309 	int found;
310 	struct tmpfs_mount *tmp;
311 	struct tmpfs_node *node;
312 
313 	/* Handle forced unmounts. */
314 	if (mntflags & MNT_FORCE)
315 		flags |= FORCECLOSE;
316 
317 	tmp = VFS_TO_TMPFS(mp);
318 
319 	/*
320 	 * Finalize all pending I/O.  In the case of tmpfs we want
321 	 * to throw all the data away so clean out the buffer cache
322 	 * and vm objects before calling vflush().
323 	 */
324 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
325 		if (node->tn_type == VREG && node->tn_vnode) {
326 			++node->tn_links;
327 			TMPFS_NODE_LOCK(node);
328 			vx_get(node->tn_vnode);
329 			tmpfs_truncate(node->tn_vnode, 0);
330 			vx_put(node->tn_vnode);
331 			TMPFS_NODE_UNLOCK(node);
332 			--node->tn_links;
333 		}
334 	}
335 	error = vflush(mp, 0, flags);
336 	if (error != 0)
337 		return error;
338 
339 	/*
340 	 * First pass get rid of all the directory entries and
341 	 * vnode associations.  The directory structure will
342 	 * remain via the extra link count representing tn_dir.tn_parent.
343 	 *
344 	 * No vnodes should remain after the vflush above.
345 	 */
346 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
347 		++node->tn_links;
348 		TMPFS_NODE_LOCK(node);
349 		if (node->tn_type == VDIR) {
350 			struct tmpfs_dirent *de;
351 
352 			while (!TAILQ_EMPTY(&node->tn_dir.tn_dirhead)) {
353 				de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
354 				tmpfs_dir_detach(node, de);
355 				tmpfs_free_dirent(tmp, de);
356 				node->tn_size -= sizeof(struct tmpfs_dirent);
357 			}
358 		}
359 		KKASSERT(node->tn_vnode == NULL);
360 #if 0
361 		vp = node->tn_vnode;
362 		if (vp != NULL) {
363 			tmpfs_free_vp(vp);
364 			vrecycle(vp);
365 			node->tn_vnode = NULL;
366 		}
367 #endif
368 		TMPFS_NODE_UNLOCK(node);
369 		--node->tn_links;
370 	}
371 
372 	/*
373 	 * Now get rid of all nodes.  We can remove any node with a
374 	 * link count of 0 or any directory node with a link count of
375 	 * 1.  The parents will not be destroyed until all their children
376 	 * have been destroyed.
377 	 *
378 	 * Recursion in tmpfs_free_node() can further modify the list so
379 	 * we cannot use a next pointer here.
380 	 *
381 	 * The root node will be destroyed by this loop (it will be last).
382 	 */
383 	while (!LIST_EMPTY(&tmp->tm_nodes_used)) {
384 		found = 0;
385 		LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
386 			if (node->tn_links == 0 ||
387 			    (node->tn_links == 1 && node->tn_type == VDIR)) {
388 				TMPFS_NODE_LOCK(node);
389 				tmpfs_free_node(tmp, node);
390 				/* eats lock */
391 				found = 1;
392 				break;
393 			}
394 		}
395 		if (found == 0) {
396 			kprintf("tmpfs: Cannot free entire node tree!");
397 			break;
398 		}
399 	}
400 
401 	KKASSERT(tmp->tm_root == NULL);
402 
403 	objcache_destroy(tmp->tm_dirent_pool);
404 	objcache_destroy(tmp->tm_node_pool);
405 
406 	kmalloc_destroy(&tmp->tm_name_zone);
407 	kmalloc_destroy(&tmp->tm_dirent_zone);
408 	kmalloc_destroy(&tmp->tm_node_zone);
409 
410 	tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
411 
412 	lockuninit(&tmp->allnode_lock);
413 	KKASSERT(tmp->tm_pages_used == 0);
414 	KKASSERT(tmp->tm_nodes_inuse == 0);
415 
416 	/* Throw away the tmpfs_mount structure. */
417 	kfree(tmp, M_TMPFSMNT);
418 	mp->mnt_data = NULL;
419 
420 	mp->mnt_flag &= ~MNT_LOCAL;
421 	return 0;
422 }
423 
424 /* --------------------------------------------------------------------- */
425 
426 static int
427 tmpfs_root(struct mount *mp, struct vnode **vpp)
428 {
429 	struct tmpfs_mount *tmp;
430 	int error;
431 
432 	tmp = VFS_TO_TMPFS(mp);
433 	if (tmp->tm_root == NULL) {
434 		kprintf("tmpfs_root: called without root node %p\n", mp);
435 		print_backtrace(-1);
436 		*vpp = NULL;
437 		error = EINVAL;
438 	} else {
439 		error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp);
440 		(*vpp)->v_flag |= VROOT;
441 		(*vpp)->v_type = VDIR;
442 	}
443 	return error;
444 }
445 
446 /* --------------------------------------------------------------------- */
447 
448 static int
449 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
450 {
451 	boolean_t found;
452 	struct tmpfs_fid *tfhp;
453 	struct tmpfs_mount *tmp;
454 	struct tmpfs_node *node;
455 
456 	tmp = VFS_TO_TMPFS(mp);
457 
458 	tfhp = (struct tmpfs_fid *)fhp;
459 	if (tfhp->tf_len != sizeof(struct tmpfs_fid))
460 		return EINVAL;
461 
462 	if (tfhp->tf_id >= tmp->tm_nodes_max)
463 		return EINVAL;
464 
465 	found = FALSE;
466 
467 	TMPFS_LOCK(tmp);
468 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
469 		if (node->tn_id == tfhp->tf_id &&
470 		    node->tn_gen == tfhp->tf_gen) {
471 			found = TRUE;
472 			break;
473 		}
474 	}
475 	TMPFS_UNLOCK(tmp);
476 
477 	if (found)
478 		return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
479 
480 	return (EINVAL);
481 }
482 
483 /* --------------------------------------------------------------------- */
484 
485 /* ARGSUSED2 */
486 static int
487 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
488 {
489 	fsfilcnt_t freenodes;
490 	struct tmpfs_mount *tmp;
491 
492 	tmp = VFS_TO_TMPFS(mp);
493 
494 	sbp->f_iosize = PAGE_SIZE;
495 	sbp->f_bsize = PAGE_SIZE;
496 
497 	sbp->f_blocks = tmp->tm_pages_max;
498 	sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
499 	sbp->f_bfree = sbp->f_bavail;
500 
501 	freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
502 
503 	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
504 	sbp->f_ffree = freenodes;
505 	sbp->f_owner = tmp->tm_root->tn_uid;
506 
507 	return 0;
508 }
509 
510 /* --------------------------------------------------------------------- */
511 
512 static int
513 tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
514 {
515 	struct tmpfs_node *node;
516 	struct tmpfs_fid tfh;
517 	node = VP_TO_TMPFS_NODE(vp);
518 	memset(&tfh, 0, sizeof(tfh));
519 	tfh.tf_len = sizeof(struct tmpfs_fid);
520 	tfh.tf_gen = node->tn_gen;
521 	tfh.tf_id = node->tn_id;
522 	memcpy(fhp, &tfh, sizeof(tfh));
523 	return (0);
524 }
525 
526 /* --------------------------------------------------------------------- */
527 
528 static int
529 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
530 	       struct ucred **credanonp)
531 {
532 	struct tmpfs_mount *tmp;
533 	struct netcred *nc;
534 
535 	tmp = (struct tmpfs_mount *) mp->mnt_data;
536 	nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
537 	if (nc == NULL)
538 		return (EACCES);
539 
540 	*exflagsp = nc->netc_exflags;
541 	*credanonp = &nc->netc_anon;
542 
543 	return (0);
544 }
545 
546 /* --------------------------------------------------------------------- */
547 
548 /*
549  * tmpfs vfs operations.
550  */
551 
552 static struct vfsops tmpfs_vfsops = {
553 	.vfs_mount =			tmpfs_mount,
554 	.vfs_unmount =			tmpfs_unmount,
555 	.vfs_root =			tmpfs_root,
556 	.vfs_statfs =			tmpfs_statfs,
557 	.vfs_fhtovp =			tmpfs_fhtovp,
558 	.vfs_vptofh =			tmpfs_vptofh,
559 	.vfs_sync =			vfs_stdsync,
560 	.vfs_checkexp =			tmpfs_checkexp,
561 };
562 
563 VFS_SET(tmpfs_vfsops, tmpfs, 0);
564 MODULE_VERSION(tmpfs, 1);
565