1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/kernel.h> 49 #include <sys/stat.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/objcache.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_param.h> 57 58 #if 0 59 #include <vfs/tmpfs/tmpfs.h> 60 #endif 61 #include "tmpfs.h" 62 #include <vfs/tmpfs/tmpfs_vnops.h> 63 #include <vfs/tmpfs/tmpfs_mount.h> 64 65 /* 66 * Default permission for root node 67 */ 68 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 69 70 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 71 72 /* --------------------------------------------------------------------- */ 73 74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 75 static int tmpfs_unmount(struct mount *, int); 76 static int tmpfs_root(struct mount *, struct vnode **); 77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 79 80 /* --------------------------------------------------------------------- */ 81 boolean_t 82 tmpfs_node_ctor(void *obj, void *privdata, int flags) 83 { 84 struct tmpfs_node *node = obj; 85 86 node->tn_gen++; 87 node->tn_size = 0; 88 node->tn_status = 0; 89 node->tn_flags = 0; 90 node->tn_links = 0; 91 node->tn_vnode = NULL; 92 node->tn_vpstate = 0; 93 bzero(&node->tn_spec, sizeof(node->tn_spec)); 94 95 return (TRUE); 96 } 97 98 static void 99 tmpfs_node_dtor(void *obj, void *privdata) 100 { 101 struct tmpfs_node *node = (struct tmpfs_node *)obj; 102 node->tn_type = VNON; 103 node->tn_vpstate = TMPFS_VNODE_DOOMED; 104 } 105 106 static void * 107 tmpfs_node_init(void *args, int flags) 108 { 109 struct tmpfs_node *node; 110 111 node = objcache_malloc_alloc(args, flags); 112 if (node == NULL) 113 return (NULL); 114 node->tn_id = 0; 115 node->tn_blksize = PAGE_SIZE; /* start small */ 116 117 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 118 node->tn_gen = karc4random(); 119 120 return node; 121 } 122 123 static void 124 tmpfs_node_fini(void *obj, void *args) 125 { 126 struct tmpfs_node *node = (struct tmpfs_node *)obj; 127 lockuninit(&node->tn_interlock); 128 objcache_malloc_free(obj, args); 129 } 130 131 static int 132 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 133 { 134 struct tmpfs_mount *tmp; 135 struct tmpfs_node *root; 136 struct tmpfs_mount_info args; 137 vm_pindex_t pages; 138 vm_pindex_t pages_limit; 139 ino_t nodes; 140 u_int64_t maxfsize; 141 int error; 142 /* Size counters. */ 143 ino_t nodes_max; 144 off_t size_max; 145 size_t maxfsize_max; 146 size_t size; 147 148 /* Root node attributes. */ 149 uid_t root_uid = cred->cr_uid; 150 gid_t root_gid = cred->cr_gid; 151 mode_t root_mode = (VREAD | VWRITE); 152 153 if (mp->mnt_flag & MNT_UPDATE) { 154 /* XXX: There is no support yet to update file system 155 * settings. Should be added. */ 156 157 return EOPNOTSUPP; 158 } 159 160 /* 161 * mount info 162 */ 163 bzero(&args, sizeof(args)); 164 size_max = 0; 165 nodes_max = 0; 166 maxfsize_max = 0; 167 168 if (path) { 169 if (data) { 170 error = copyin(data, &args, sizeof(args)); 171 if (error) 172 return (error); 173 } 174 size_max = args.ta_size_max; 175 nodes_max = args.ta_nodes_max; 176 maxfsize_max = args.ta_maxfsize_max; 177 root_uid = args.ta_root_uid; 178 root_gid = args.ta_root_gid; 179 root_mode = args.ta_root_mode; 180 } 181 182 /* 183 * If mount by non-root, then verify that user has necessary 184 * permissions on the device. 185 */ 186 if (cred->cr_uid != 0) { 187 root_mode = VREAD; 188 if ((mp->mnt_flag & MNT_RDONLY) == 0) 189 root_mode |= VWRITE; 190 } 191 192 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 193 194 if (size_max == 0) { 195 pages = pages_limit / 2; 196 } else if (size_max < PAGE_SIZE) { 197 pages = 1; 198 } else if (OFF_TO_IDX(size_max) > pages_limit) { 199 /* 200 * do not force pages = pages_limit for this case, otherwise 201 * we might not honor tmpfs size requests from /etc/fstab 202 * during boot because they are mounted prior to swap being 203 * turned on. 204 */ 205 pages = OFF_TO_IDX(size_max); 206 } else { 207 pages = OFF_TO_IDX(size_max); 208 } 209 210 if (nodes_max == 0) 211 nodes = 3 + pages * PAGE_SIZE / 1024; 212 else if (nodes_max < 3) 213 nodes = 3; 214 else if (nodes_max > pages) 215 nodes = pages; 216 else 217 nodes = nodes_max; 218 219 maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE; 220 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 221 maxfsize = maxfsize_max; 222 223 /* Allocate the tmpfs mount structure and fill it. */ 224 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 225 226 tmp->tm_mount = mp; 227 tmp->tm_nodes_max = nodes; 228 tmp->tm_nodes_inuse = 0; 229 tmp->tm_maxfilesize = maxfsize; 230 LIST_INIT(&tmp->tm_nodes_used); 231 232 tmp->tm_pages_max = pages; 233 tmp->tm_pages_used = 0; 234 235 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 236 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 237 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 238 239 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 240 tmp->tm_nodes_max); 241 242 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 243 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 244 245 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 246 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 247 248 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 249 0, 0, 250 NULL, NULL, NULL, 251 objcache_malloc_alloc, objcache_malloc_free, 252 &tmp->tm_dirent_zone_malloc_args); 253 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 254 0, 0, 255 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 256 tmpfs_node_init, tmpfs_node_fini, 257 &tmp->tm_node_zone_malloc_args); 258 259 tmp->tm_ino = TMPFS_ROOTINO; 260 261 /* Allocate the root node. */ 262 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 263 root_mode & ALLPERMS, NULL, 264 VNOVAL, VNOVAL, &root); 265 266 /* 267 * We are backed by swap, set snocache chflags flag so we 268 * don't trip over swapcache. 269 */ 270 root->tn_flags = SF_NOCACHE; 271 272 if (error != 0 || root == NULL) { 273 objcache_destroy(tmp->tm_node_pool); 274 objcache_destroy(tmp->tm_dirent_pool); 275 kfree(tmp, M_TMPFSMNT); 276 return error; 277 } 278 KASSERT(root->tn_id == TMPFS_ROOTINO, 279 ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 280 281 atomic_add_int(&root->tn_links, 1); /* keep around */ 282 tmp->tm_root = root; 283 284 mp->mnt_flag |= MNT_LOCAL; 285 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 286 mp->mnt_kern_flag |= MNTK_NOMSYNC; 287 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 288 mp->mnt_kern_flag |= MNTK_QUICKHALT; /* no teardown needed on halt */ 289 mp->mnt_data = (qaddr_t)tmp; 290 mp->mnt_iosize_max = MAXBSIZE; 291 vfs_getnewfsid(mp); 292 293 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 294 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 295 296 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 297 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 298 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 299 copyinstr(path, mp->mnt_stat.f_mntonname, 300 sizeof(mp->mnt_stat.f_mntonname) -1, 301 &size); 302 303 tmpfs_statfs(mp, &mp->mnt_stat, cred); 304 305 return 0; 306 } 307 308 /* --------------------------------------------------------------------- */ 309 310 /* ARGSUSED2 */ 311 static int 312 tmpfs_unmount(struct mount *mp, int mntflags) 313 { 314 int error; 315 int flags = 0; 316 struct tmpfs_mount *tmp; 317 struct tmpfs_node *node; 318 struct vnode *vp; 319 int isok; 320 321 tmp = VFS_TO_TMPFS(mp); 322 TMPFS_LOCK(tmp); 323 324 /* Handle forced unmounts. */ 325 if (mntflags & MNT_FORCE) 326 flags |= FORCECLOSE; 327 328 /* 329 * Finalize all pending I/O. In the case of tmpfs we want 330 * to throw all the data away so clean out the buffer cache 331 * and vm objects before calling vflush(). 332 */ 333 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 334 /* 335 * tn_links is mnt_token protected 336 */ 337 atomic_add_int(&node->tn_links, 1); 338 TMPFS_NODE_LOCK(node); 339 340 while (node->tn_type == VREG && node->tn_vnode) { 341 vp = node->tn_vnode; 342 vhold(vp); 343 TMPFS_NODE_UNLOCK(node); 344 lwkt_yield(); 345 346 /* 347 * vx_get/vx_put and tmpfs_truncate may block, 348 * releasing the tmpfs mountpoint token. 349 * 350 * Make sure the lock order is correct. 351 */ 352 vx_get(vp); /* held vnode */ 353 TMPFS_NODE_LOCK(node); 354 if (node->tn_vnode == vp) { 355 tmpfs_truncate(vp, 0); 356 isok = 1; 357 } else { 358 isok = 0; 359 } 360 TMPFS_NODE_UNLOCK(node); 361 vx_put(vp); 362 vdrop(vp); 363 TMPFS_NODE_LOCK(node); 364 if (isok) 365 break; 366 /* retry */ 367 } 368 369 TMPFS_NODE_UNLOCK(node); 370 atomic_add_int(&node->tn_links, -1); 371 } 372 373 /* 374 * Flush all vnodes on the unmount. 375 * 376 * If we fail to flush, we cannot unmount, but all the nodes have 377 * already been truncated. Erroring out is the best we can do. 378 */ 379 error = vflush(mp, 0, flags); 380 if (error != 0) { 381 TMPFS_UNLOCK(tmp); 382 return (error); 383 } 384 385 /* 386 * First pass get rid of all the directory entries and 387 * vnode associations. This will also destroy the 388 * directory topology and should drop all link counts 389 * to 0 except for the root. 390 * 391 * No vnodes should remain after the vflush above. 392 */ 393 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 394 lwkt_yield(); 395 396 atomic_add_int(&node->tn_links, 1); 397 TMPFS_NODE_LOCK(node); 398 if (node->tn_type == VDIR) { 399 struct tmpfs_dirent *de; 400 401 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 402 tmpfs_dir_detach(node, de); 403 tmpfs_free_dirent(tmp, de); 404 } 405 } 406 KKASSERT(node->tn_vnode == NULL); 407 408 TMPFS_NODE_UNLOCK(node); 409 atomic_add_int(&node->tn_links, -1); 410 } 411 412 /* 413 * Allow the root node to be destroyed by dropping the link count 414 * we bumped in the mount code. 415 */ 416 KKASSERT(tmp->tm_root); 417 TMPFS_NODE_LOCK(tmp->tm_root); 418 atomic_add_int(&tmp->tm_root->tn_links, -1); 419 TMPFS_NODE_UNLOCK(tmp->tm_root); 420 421 /* 422 * At this point all nodes, including the root node, should have a 423 * link count of 0. The root is not necessarily going to be last. 424 */ 425 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 426 if (node->tn_links) 427 panic("tmpfs: Dangling nodes during umount (%p)!\n", 428 node); 429 430 TMPFS_NODE_LOCK(node); 431 tmpfs_free_node(tmp, node); 432 /* eats lock */ 433 lwkt_yield(); 434 } 435 KKASSERT(tmp->tm_root == NULL); 436 437 objcache_destroy(tmp->tm_dirent_pool); 438 objcache_destroy(tmp->tm_node_pool); 439 440 kmalloc_destroy(&tmp->tm_name_zone); 441 kmalloc_destroy(&tmp->tm_dirent_zone); 442 kmalloc_destroy(&tmp->tm_node_zone); 443 444 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 445 446 KKASSERT(tmp->tm_pages_used == 0); 447 KKASSERT(tmp->tm_nodes_inuse == 0); 448 449 TMPFS_UNLOCK(tmp); 450 451 /* Throw away the tmpfs_mount structure. */ 452 kfree(tmp, M_TMPFSMNT); 453 mp->mnt_data = NULL; 454 455 mp->mnt_flag &= ~MNT_LOCAL; 456 return 0; 457 } 458 459 /* --------------------------------------------------------------------- */ 460 461 static int 462 tmpfs_root(struct mount *mp, struct vnode **vpp) 463 { 464 struct tmpfs_mount *tmp; 465 int error; 466 467 tmp = VFS_TO_TMPFS(mp); 468 if (tmp->tm_root == NULL) { 469 kprintf("tmpfs_root: called without root node %p\n", mp); 470 print_backtrace(-1); 471 *vpp = NULL; 472 error = EINVAL; 473 } else { 474 error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root, 475 LK_EXCLUSIVE, vpp); 476 (*vpp)->v_flag |= VROOT; 477 (*vpp)->v_type = VDIR; 478 } 479 return error; 480 } 481 482 /* --------------------------------------------------------------------- */ 483 484 static int 485 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 486 struct vnode **vpp) 487 { 488 boolean_t found; 489 struct tmpfs_fid *tfhp; 490 struct tmpfs_mount *tmp; 491 struct tmpfs_node *node; 492 int rc; 493 494 tmp = VFS_TO_TMPFS(mp); 495 496 tfhp = (struct tmpfs_fid *) fhp; 497 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 498 return EINVAL; 499 500 rc = EINVAL; 501 found = FALSE; 502 503 TMPFS_LOCK(tmp); 504 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 505 if (node->tn_id == tfhp->tf_id && 506 node->tn_gen == tfhp->tf_gen) { 507 found = TRUE; 508 break; 509 } 510 } 511 512 if (found) 513 rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp); 514 515 TMPFS_UNLOCK(tmp); 516 517 return (rc); 518 } 519 520 /* --------------------------------------------------------------------- */ 521 522 /* ARGSUSED2 */ 523 static int 524 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 525 { 526 fsfilcnt_t freenodes; 527 struct tmpfs_mount *tmp; 528 529 tmp = VFS_TO_TMPFS(mp); 530 531 /* TMPFS_LOCK(tmp); not really needed */ 532 533 sbp->f_iosize = PAGE_SIZE; 534 sbp->f_bsize = PAGE_SIZE; 535 536 sbp->f_blocks = tmp->tm_pages_max; 537 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 538 sbp->f_bfree = sbp->f_bavail; 539 540 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 541 542 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 543 sbp->f_ffree = freenodes; 544 sbp->f_owner = tmp->tm_root->tn_uid; 545 546 /* TMPFS_UNLOCK(tmp); */ 547 548 return 0; 549 } 550 551 /* --------------------------------------------------------------------- */ 552 553 static int 554 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 555 { 556 struct tmpfs_node *node; 557 struct tmpfs_fid tfh; 558 node = VP_TO_TMPFS_NODE(vp); 559 memset(&tfh, 0, sizeof(tfh)); 560 tfh.tf_len = sizeof(struct tmpfs_fid); 561 tfh.tf_gen = node->tn_gen; 562 tfh.tf_id = node->tn_id; 563 memcpy(fhp, &tfh, sizeof(tfh)); 564 return (0); 565 } 566 567 /* --------------------------------------------------------------------- */ 568 569 static int 570 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 571 struct ucred **credanonp) 572 { 573 struct tmpfs_mount *tmp; 574 struct netcred *nc; 575 576 tmp = (struct tmpfs_mount *) mp->mnt_data; 577 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 578 if (nc == NULL) 579 return (EACCES); 580 581 *exflagsp = nc->netc_exflags; 582 *credanonp = &nc->netc_anon; 583 584 return (0); 585 } 586 587 /* --------------------------------------------------------------------- */ 588 589 /* 590 * tmpfs vfs operations. 591 */ 592 593 static struct vfsops tmpfs_vfsops = { 594 .vfs_flags = 0, 595 .vfs_mount = tmpfs_mount, 596 .vfs_unmount = tmpfs_unmount, 597 .vfs_root = tmpfs_root, 598 .vfs_statfs = tmpfs_statfs, 599 .vfs_fhtovp = tmpfs_fhtovp, 600 .vfs_vptofh = tmpfs_vptofh, 601 .vfs_checkexp = tmpfs_checkexp, 602 }; 603 604 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE); 605 MODULE_VERSION(tmpfs, 1); 606