1 /* $NetBSD: layer_vnops.c,v 1.67 2017/06/04 08:05:42 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 1999 National Aeronautics & Space Administration 5 * All rights reserved. 6 * 7 * This software was written by William Studenmund of the 8 * Numerical Aerospace Simulation Facility, NASA Ames Research Center. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the National Aeronautics & Space Administration 19 * nor the names of its contributors may be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB- 27 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 28 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1992, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * This code is derived from software contributed to Berkeley by 41 * John Heidemann of the UCLA Ficus project. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 68 * 69 * Ancestors: 70 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 71 * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp 72 * ...and... 73 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 74 */ 75 76 /* 77 * Generic layer vnode operations. 78 * 79 * The layer.h, layer_extern.h, layer_vfs.c, and layer_vnops.c files provide 80 * the core implementation of stacked file-systems. 81 * 82 * The layerfs duplicates a portion of the file system name space under 83 * a new name. In this respect, it is similar to the loopback file system. 84 * It differs from the loopback fs in two respects: it is implemented using 85 * a stackable layers technique, and it is "layerfs-nodes" stack above all 86 * lower-layer vnodes, not just over directory vnodes. 87 * 88 * OPERATION OF LAYERFS 89 * 90 * The layerfs is the minimum file system layer, bypassing all possible 91 * operations to the lower layer for processing there. The majority of its 92 * activity centers on the bypass routine, through which nearly all vnode 93 * operations pass. 94 * 95 * The bypass routine accepts arbitrary vnode operations for handling by 96 * the lower layer. It begins by examining vnode operation arguments and 97 * replacing any layered nodes by their lower-layer equivalents. It then 98 * invokes an operation on the lower layer. Finally, it replaces the 99 * layered nodes in the arguments and, if a vnode is returned by the 100 * operation, stacks a layered node on top of the returned vnode. 101 * 102 * The bypass routine in this file, layer_bypass(), is suitable for use 103 * by many different layered filesystems. It can be used by multiple 104 * filesystems simultaneously. Alternatively, a layered fs may provide 105 * its own bypass routine, in which case layer_bypass() should be used as 106 * a model. For instance, the main functionality provided by umapfs, the user 107 * identity mapping file system, is handled by a custom bypass routine. 108 * 109 * Typically a layered fs registers its selected bypass routine as the 110 * default vnode operation in its vnodeopv_entry_desc table. Additionally 111 * the filesystem must store the bypass entry point in the layerm_bypass 112 * field of struct layer_mount. All other layer routines in this file will 113 * use the layerm_bypass() routine. 114 * 115 * Although the bypass routine handles most operations outright, a number 116 * of operations are special cased and handled by the layerfs. For instance, 117 * layer_getattr() must change the fsid being returned. While layer_lock() 118 * and layer_unlock() must handle any locking for the current vnode as well 119 * as pass the lock request down. layer_inactive() and layer_reclaim() are 120 * not bypassed so that they can handle freeing layerfs-specific data. Also, 121 * certain vnode operations (create, mknod, remove, link, rename, mkdir, 122 * rmdir, and symlink) change the locking state within the operation. Ideally 123 * these operations should not change the lock state, but should be changed 124 * to let the caller of the function unlock them. Otherwise, all intermediate 125 * vnode layers (such as union, umapfs, etc) must catch these functions to do 126 * the necessary locking at their layer. 127 * 128 * INSTANTIATING VNODE STACKS 129 * 130 * Mounting associates "layerfs-nodes" stack and lower layer, in effect 131 * stacking two VFSes. The initial mount creates a single vnode stack for 132 * the root of the new layerfs. All other vnode stacks are created as a 133 * result of vnode operations on this or other layerfs vnode stacks. 134 * 135 * New vnode stacks come into existence as a result of an operation which 136 * returns a vnode. The bypass routine stacks a layerfs-node above the new 137 * vnode before returning it to the caller. 138 * 139 * For example, imagine mounting a null layer with: 140 * 141 * "mount_null /usr/include /dev/layer/null" 142 * 143 * Changing directory to /dev/layer/null will assign the root layerfs-node, 144 * which was created when the null layer was mounted). Now consider opening 145 * "sys". A layer_lookup() would be performed on the root layerfs-node. 146 * This operation would bypass through to the lower layer which would return 147 * a vnode representing the UFS "sys". Then, layer_bypass() builds a 148 * layerfs-node aliasing the UFS "sys" and returns this to the caller. 149 * Later operations on the layerfs-node "sys" will repeat this process when 150 * constructing other vnode stacks. 151 * 152 * INVOKING OPERATIONS ON LOWER LAYERS 153 * 154 * There are two techniques to invoke operations on a lower layer when the 155 * operation cannot be completely bypassed. Each method is appropriate in 156 * different situations. In both cases, it is the responsibility of the 157 * aliasing layer to make the operation arguments "correct" for the lower 158 * layer by mapping any vnode arguments to the lower layer. 159 * 160 * The first approach is to call the aliasing layer's bypass routine. This 161 * method is most suitable when you wish to invoke the operation currently 162 * being handled on the lower layer. It has the advantage that the bypass 163 * routine already must do argument mapping. An example of this is 164 * layer_getattr(). 165 * 166 * A second approach is to directly invoke vnode operations on the lower 167 * layer with the VOP_OPERATIONNAME interface. The advantage of this method 168 * is that it is easy to invoke arbitrary operations on the lower layer. 169 * The disadvantage is that vnode's arguments must be manually mapped. 170 */ 171 172 #include <sys/cdefs.h> 173 __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.67 2017/06/04 08:05:42 hannken Exp $"); 174 175 #include <sys/param.h> 176 #include <sys/systm.h> 177 #include <sys/proc.h> 178 #include <sys/time.h> 179 #include <sys/vnode.h> 180 #include <sys/mount.h> 181 #include <sys/namei.h> 182 #include <sys/kmem.h> 183 #include <sys/buf.h> 184 #include <sys/kauth.h> 185 #include <sys/fcntl.h> 186 #include <sys/fstrans.h> 187 188 #include <miscfs/genfs/layer.h> 189 #include <miscfs/genfs/layer_extern.h> 190 #include <miscfs/genfs/genfs.h> 191 #include <miscfs/specfs/specdev.h> 192 193 /* 194 * This is the 08-June-99 bypass routine, based on the 10-Apr-92 bypass 195 * routine by John Heidemann. 196 * The new element for this version is that the whole nullfs 197 * system gained the concept of locks on the lower node. 198 * The 10-Apr-92 version was optimized for speed, throwing away some 199 * safety checks. It should still always work, but it's not as 200 * robust to programmer errors. 201 * 202 * In general, we map all vnodes going down and unmap them on the way back. 203 * 204 * Also, some BSD vnode operations have the side effect of vrele'ing 205 * their arguments. With stacking, the reference counts are held 206 * by the upper node, not the lower one, so we must handle these 207 * side-effects here. This is not of concern in Sun-derived systems 208 * since there are no such side-effects. 209 * 210 * New for the 08-June-99 version: we also handle operations which unlock 211 * the passed-in node (typically they vput the node). 212 * 213 * This makes the following assumptions: 214 * - only one returned vpp 215 * - no INOUT vpp's (Sun's vop_open has one of these) 216 * - the vnode operation vector of the first vnode should be used 217 * to determine what implementation of the op should be invoked 218 * - all mapped vnodes are of our vnode-type (NEEDSWORK: 219 * problems on rmdir'ing mount points and renaming?) 220 */ 221 int 222 layer_bypass(void *v) 223 { 224 struct vop_generic_args /* { 225 struct vnodeop_desc *a_desc; 226 <other random data follows, presumably> 227 } */ *ap = v; 228 int (**our_vnodeop_p)(void *); 229 struct vnode **this_vp_p; 230 int error; 231 struct vnode *old_vps[VDESC_MAX_VPS], *vp0; 232 struct vnode **vps_p[VDESC_MAX_VPS]; 233 struct vnode ***vppp; 234 struct mount *mp; 235 struct vnodeop_desc *descp = ap->a_desc; 236 int reles, i, flags; 237 238 #ifdef DIAGNOSTIC 239 /* 240 * We require at least one vp. 241 */ 242 if (descp->vdesc_vp_offsets == NULL || 243 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 244 panic("%s: no vp's in map.\n", __func__); 245 #endif 246 247 vps_p[0] = 248 VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap); 249 vp0 = *vps_p[0]; 250 mp = vp0->v_mount; 251 flags = MOUNTTOLAYERMOUNT(mp)->layerm_flags; 252 our_vnodeop_p = vp0->v_op; 253 254 if (flags & LAYERFS_MBYPASSDEBUG) 255 printf("%s: %s\n", __func__, descp->vdesc_name); 256 257 /* 258 * Map the vnodes going in. 259 * Later, we'll invoke the operation based on 260 * the first mapped vnode's operation vector. 261 */ 262 reles = descp->vdesc_flags; 263 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 264 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 265 break; /* bail out at end of list */ 266 vps_p[i] = this_vp_p = 267 VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], 268 ap); 269 /* 270 * We're not guaranteed that any but the first vnode 271 * are of our type. Check for and don't map any 272 * that aren't. (We must always map first vp or vclean fails.) 273 */ 274 if (i && (*this_vp_p == NULL || 275 (*this_vp_p)->v_op != our_vnodeop_p)) { 276 old_vps[i] = NULL; 277 } else { 278 old_vps[i] = *this_vp_p; 279 *(vps_p[i]) = LAYERVPTOLOWERVP(*this_vp_p); 280 /* 281 * XXX - Several operations have the side effect 282 * of vrele'ing their vp's. We must account for 283 * that. (This should go away in the future.) 284 */ 285 if (reles & VDESC_VP0_WILLRELE) 286 vref(*this_vp_p); 287 } 288 } 289 290 /* 291 * Call the operation on the lower layer 292 * with the modified argument structure. 293 */ 294 error = VCALL(*vps_p[0], descp->vdesc_offset, ap); 295 296 /* 297 * Maintain the illusion of call-by-value 298 * by restoring vnodes in the argument structure 299 * to their original value. 300 */ 301 reles = descp->vdesc_flags; 302 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 303 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 304 break; /* bail out at end of list */ 305 if (old_vps[i]) { 306 *(vps_p[i]) = old_vps[i]; 307 if (reles & VDESC_VP0_WILLRELE) 308 vrele(*(vps_p[i])); 309 } 310 } 311 312 /* 313 * Map the possible out-going vpp 314 * (Assumes that the lower layer always returns 315 * a VREF'ed vpp unless it gets an error.) 316 */ 317 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !error) { 318 vppp = VOPARG_OFFSETTO(struct vnode***, 319 descp->vdesc_vpp_offset, ap); 320 /* 321 * Only vop_lookup, vop_create, vop_makedir, vop_mknod 322 * and vop_symlink return vpp's. vop_lookup doesn't call bypass 323 * as a lookup on "." would generate a locking error. 324 * So all the calls which get us here have a unlocked vpp. :-) 325 */ 326 error = layer_node_create(mp, **vppp, *vppp); 327 if (error) { 328 vrele(**vppp); 329 **vppp = NULL; 330 } 331 } 332 return error; 333 } 334 335 /* 336 * We have to carry on the locking protocol on the layer vnodes 337 * as we progress through the tree. We also have to enforce read-only 338 * if this layer is mounted read-only. 339 */ 340 int 341 layer_lookup(void *v) 342 { 343 struct vop_lookup_v2_args /* { 344 struct vnodeop_desc *a_desc; 345 struct vnode * a_dvp; 346 struct vnode ** a_vpp; 347 struct componentname * a_cnp; 348 } */ *ap = v; 349 struct componentname *cnp = ap->a_cnp; 350 struct vnode *dvp, *lvp, *ldvp; 351 int error, flags = cnp->cn_flags; 352 353 dvp = ap->a_dvp; 354 355 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 356 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 357 *ap->a_vpp = NULL; 358 return EROFS; 359 } 360 361 ldvp = LAYERVPTOLOWERVP(dvp); 362 ap->a_dvp = ldvp; 363 error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap); 364 lvp = *ap->a_vpp; 365 *ap->a_vpp = NULL; 366 367 if (error == EJUSTRETURN && (flags & ISLASTCN) && 368 (dvp->v_mount->mnt_flag & MNT_RDONLY) && 369 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) 370 error = EROFS; 371 372 /* 373 * We must do the same locking and unlocking at this layer as 374 * is done in the layers below us. 375 */ 376 if (ldvp == lvp) { 377 /* 378 * Got the same object back, because we looked up ".", 379 * or ".." in the root node of a mount point. 380 * So we make another reference to dvp and return it. 381 */ 382 vref(dvp); 383 *ap->a_vpp = dvp; 384 vrele(lvp); 385 } else if (lvp != NULL) { 386 /* Note: dvp and ldvp are both locked. */ 387 error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp); 388 if (error) { 389 vrele(lvp); 390 } 391 } 392 return error; 393 } 394 395 /* 396 * Setattr call. Disallow write attempts if the layer is mounted read-only. 397 */ 398 int 399 layer_setattr(void *v) 400 { 401 struct vop_setattr_args /* { 402 struct vnodeop_desc *a_desc; 403 struct vnode *a_vp; 404 struct vattr *a_vap; 405 kauth_cred_t a_cred; 406 struct lwp *a_l; 407 } */ *ap = v; 408 struct vnode *vp = ap->a_vp; 409 struct vattr *vap = ap->a_vap; 410 411 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 412 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 413 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 414 (vp->v_mount->mnt_flag & MNT_RDONLY)) 415 return EROFS; 416 if (vap->va_size != VNOVAL) { 417 switch (vp->v_type) { 418 case VDIR: 419 return EISDIR; 420 case VCHR: 421 case VBLK: 422 case VSOCK: 423 case VFIFO: 424 return 0; 425 case VREG: 426 case VLNK: 427 default: 428 /* 429 * Disallow write attempts if the filesystem is 430 * mounted read-only. 431 */ 432 if (vp->v_mount->mnt_flag & MNT_RDONLY) 433 return EROFS; 434 } 435 } 436 return LAYERFS_DO_BYPASS(vp, ap); 437 } 438 439 /* 440 * We handle getattr only to change the fsid. 441 */ 442 int 443 layer_getattr(void *v) 444 { 445 struct vop_getattr_args /* { 446 struct vnode *a_vp; 447 struct vattr *a_vap; 448 kauth_cred_t a_cred; 449 struct lwp *a_l; 450 } */ *ap = v; 451 struct vnode *vp = ap->a_vp; 452 int error; 453 454 error = LAYERFS_DO_BYPASS(vp, ap); 455 if (error) { 456 return error; 457 } 458 /* Requires that arguments be restored. */ 459 ap->a_vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 460 return 0; 461 } 462 463 int 464 layer_access(void *v) 465 { 466 struct vop_access_args /* { 467 struct vnode *a_vp; 468 int a_mode; 469 kauth_cred_t a_cred; 470 struct lwp *a_l; 471 } */ *ap = v; 472 struct vnode *vp = ap->a_vp; 473 mode_t mode = ap->a_mode; 474 475 /* 476 * Disallow write attempts on read-only layers; 477 * unless the file is a socket, fifo, or a block or 478 * character device resident on the file system. 479 */ 480 if (mode & VWRITE) { 481 switch (vp->v_type) { 482 case VDIR: 483 case VLNK: 484 case VREG: 485 if (vp->v_mount->mnt_flag & MNT_RDONLY) 486 return EROFS; 487 break; 488 default: 489 break; 490 } 491 } 492 return LAYERFS_DO_BYPASS(vp, ap); 493 } 494 495 /* 496 * We must handle open to be able to catch MNT_NODEV and friends 497 * and increment the lower v_writecount. 498 */ 499 int 500 layer_open(void *v) 501 { 502 struct vop_open_args /* { 503 const struct vnodeop_desc *a_desc; 504 struct vnode *a_vp; 505 int a_mode; 506 kauth_cred_t a_cred; 507 } */ *ap = v; 508 struct vnode *vp = ap->a_vp; 509 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 510 int error; 511 512 if (((lvp->v_type == VBLK) || (lvp->v_type == VCHR)) && 513 (vp->v_mount->mnt_flag & MNT_NODEV)) 514 return ENXIO; 515 516 error = LAYERFS_DO_BYPASS(vp, ap); 517 if (error == 0 && (ap->a_mode & FWRITE)) { 518 mutex_enter(lvp->v_interlock); 519 lvp->v_writecount++; 520 mutex_exit(lvp->v_interlock); 521 } 522 return error; 523 } 524 525 /* 526 * We must handle close to decrement the lower v_writecount. 527 */ 528 int 529 layer_close(void *v) 530 { 531 struct vop_close_args /* { 532 const struct vnodeop_desc *a_desc; 533 struct vnode *a_vp; 534 int a_fflag; 535 kauth_cred_t a_cred; 536 } */ *ap = v; 537 struct vnode *vp = ap->a_vp; 538 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 539 540 if ((ap->a_fflag & FWRITE)) { 541 mutex_enter(lvp->v_interlock); 542 KASSERT(lvp->v_writecount > 0); 543 lvp->v_writecount--; 544 mutex_exit(lvp->v_interlock); 545 } 546 return LAYERFS_DO_BYPASS(vp, ap); 547 } 548 549 /* 550 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't bother 551 * syncing the underlying vnodes, since they'll be fsync'ed when 552 * reclaimed; otherwise, pass it through to the underlying layer. 553 * 554 * XXX Do we still need to worry about shallow fsync? 555 */ 556 int 557 layer_fsync(void *v) 558 { 559 struct vop_fsync_args /* { 560 struct vnode *a_vp; 561 kauth_cred_t a_cred; 562 int a_flags; 563 off_t offlo; 564 off_t offhi; 565 struct lwp *a_l; 566 } */ *ap = v; 567 int error; 568 569 if (ap->a_flags & FSYNC_RECLAIM) { 570 return 0; 571 } 572 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) { 573 error = spec_fsync(v); 574 if (error) 575 return error; 576 } 577 return LAYERFS_DO_BYPASS(ap->a_vp, ap); 578 } 579 580 int 581 layer_inactive(void *v) 582 { 583 struct vop_inactive_v2_args /* { 584 struct vnode *a_vp; 585 bool *a_recycle; 586 } */ *ap = v; 587 struct vnode *vp = ap->a_vp; 588 589 /* 590 * If we did a remove, don't cache the node. 591 */ 592 *ap->a_recycle = ((VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED) != 0); 593 594 /* 595 * Do nothing (and _don't_ bypass). 596 * Wait to vrele lowervp until reclaim, 597 * so that until then our layer_node is in the 598 * cache and reusable. 599 * 600 * NEEDSWORK: Someday, consider inactive'ing 601 * the lowervp and then trying to reactivate it 602 * with capabilities (v_id) 603 * like they do in the name lookup cache code. 604 * That's too much work for now. 605 */ 606 607 return 0; 608 } 609 610 int 611 layer_remove(void *v) 612 { 613 struct vop_remove_v2_args /* { 614 struct vnode *a_dvp; 615 struct vnode *a_vp; 616 struct componentname *a_cnp; 617 } */ *ap = v; 618 struct vnode *vp = ap->a_vp; 619 int error; 620 621 vref(vp); 622 error = LAYERFS_DO_BYPASS(vp, ap); 623 if (error == 0) { 624 VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; 625 } 626 vrele(vp); 627 628 return error; 629 } 630 631 int 632 layer_rename(void *v) 633 { 634 struct vop_rename_args /* { 635 struct vnode *a_fdvp; 636 struct vnode *a_fvp; 637 struct componentname *a_fcnp; 638 struct vnode *a_tdvp; 639 struct vnode *a_tvp; 640 struct componentname *a_tcnp; 641 } */ *ap = v; 642 struct vnode *fdvp = ap->a_fdvp, *tvp; 643 int error; 644 645 tvp = ap->a_tvp; 646 if (tvp) { 647 if (tvp->v_mount != fdvp->v_mount) 648 tvp = NULL; 649 else 650 vref(tvp); 651 } 652 error = LAYERFS_DO_BYPASS(fdvp, ap); 653 if (tvp) { 654 if (error == 0) 655 VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED; 656 vrele(tvp); 657 } 658 return error; 659 } 660 661 int 662 layer_rmdir(void *v) 663 { 664 struct vop_rmdir_v2_args /* { 665 struct vnode *a_dvp; 666 struct vnode *a_vp; 667 struct componentname *a_cnp; 668 } */ *ap = v; 669 int error; 670 struct vnode *vp = ap->a_vp; 671 672 vref(vp); 673 error = LAYERFS_DO_BYPASS(vp, ap); 674 if (error == 0) { 675 VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; 676 } 677 vrele(vp); 678 679 return error; 680 } 681 682 int 683 layer_revoke(void *v) 684 { 685 struct vop_revoke_args /* { 686 struct vnode *a_vp; 687 int a_flags; 688 } */ *ap = v; 689 struct vnode *vp = ap->a_vp; 690 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 691 int error; 692 693 /* 694 * We will most likely end up in vclean which uses the v_usecount 695 * to determine if a vnode is active. Take an extra reference on 696 * the lower vnode so it will always close and inactivate. 697 */ 698 vref(lvp); 699 error = LAYERFS_DO_BYPASS(vp, ap); 700 vrele(lvp); 701 702 return error; 703 } 704 705 int 706 layer_reclaim(void *v) 707 { 708 struct vop_reclaim_v2_args /* { 709 struct vnode *a_vp; 710 struct lwp *a_l; 711 } */ *ap = v; 712 struct vnode *vp = ap->a_vp; 713 struct layer_mount *lmp = MOUNTTOLAYERMOUNT(vp->v_mount); 714 struct layer_node *xp = VTOLAYER(vp); 715 struct vnode *lowervp = xp->layer_lowervp; 716 717 VOP_UNLOCK(vp); 718 719 /* 720 * Note: in vop_reclaim, the node's struct lock has been 721 * decomissioned, so we have to be careful about calling 722 * VOP's on ourself. We must be careful as VXLOCK is set. 723 */ 724 if (vp == lmp->layerm_rootvp) { 725 /* 726 * Oops! We no longer have a root node. Most likely reason is 727 * that someone forcably unmunted the underlying fs. 728 * 729 * Now getting the root vnode will fail. We're dead. :-( 730 */ 731 lmp->layerm_rootvp = NULL; 732 } 733 734 mutex_enter(vp->v_interlock); 735 KASSERT(vp->v_interlock == lowervp->v_interlock); 736 lowervp->v_writecount -= vp->v_writecount; 737 mutex_exit(vp->v_interlock); 738 739 /* After this assignment, this node will not be re-used. */ 740 xp->layer_lowervp = NULL; 741 kmem_free(vp->v_data, lmp->layerm_size); 742 vp->v_data = NULL; 743 vrele(lowervp); 744 745 return 0; 746 } 747 748 /* 749 * We just feed the returned vnode up to the caller - there's no need 750 * to build a layer node on top of the node on which we're going to do 751 * i/o. :-) 752 */ 753 int 754 layer_bmap(void *v) 755 { 756 struct vop_bmap_args /* { 757 struct vnode *a_vp; 758 daddr_t a_bn; 759 struct vnode **a_vpp; 760 daddr_t *a_bnp; 761 int *a_runp; 762 } */ *ap = v; 763 struct vnode *vp; 764 765 vp = LAYERVPTOLOWERVP(ap->a_vp); 766 ap->a_vp = vp; 767 768 return VCALL(vp, ap->a_desc->vdesc_offset, ap); 769 } 770 771 int 772 layer_print(void *v) 773 { 774 struct vop_print_args /* { 775 struct vnode *a_vp; 776 } */ *ap = v; 777 struct vnode *vp = ap->a_vp; 778 printf ("\ttag VT_LAYERFS, vp=%p, lowervp=%p\n", vp, LAYERVPTOLOWERVP(vp)); 779 return 0; 780 } 781 782 int 783 layer_getpages(void *v) 784 { 785 struct vop_getpages_args /* { 786 struct vnode *a_vp; 787 voff_t a_offset; 788 struct vm_page **a_m; 789 int *a_count; 790 int a_centeridx; 791 vm_prot_t a_access_type; 792 int a_advice; 793 int a_flags; 794 } */ *ap = v; 795 struct vnode *vp = ap->a_vp; 796 struct mount *mp = vp->v_mount; 797 int error; 798 799 KASSERT(mutex_owned(vp->v_interlock)); 800 801 if (ap->a_flags & PGO_LOCKED) { 802 return EBUSY; 803 } 804 ap->a_vp = LAYERVPTOLOWERVP(vp); 805 KASSERT(vp->v_interlock == ap->a_vp->v_interlock); 806 807 /* Just pass the request on to the underlying layer. */ 808 mutex_exit(vp->v_interlock); 809 fstrans_start(mp); 810 mutex_enter(vp->v_interlock); 811 if (mp == vp->v_mount) { 812 /* Will release the interlock. */ 813 error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); 814 } else { 815 mutex_exit(vp->v_interlock); 816 error = ENOENT; 817 } 818 fstrans_done(mp); 819 820 return error; 821 } 822 823 int 824 layer_putpages(void *v) 825 { 826 struct vop_putpages_args /* { 827 struct vnode *a_vp; 828 voff_t a_offlo; 829 voff_t a_offhi; 830 int a_flags; 831 } */ *ap = v; 832 struct vnode *vp = ap->a_vp; 833 834 KASSERT(mutex_owned(vp->v_interlock)); 835 836 ap->a_vp = LAYERVPTOLOWERVP(vp); 837 KASSERT(vp->v_interlock == ap->a_vp->v_interlock); 838 839 if (ap->a_flags & PGO_RECLAIM) { 840 mutex_exit(vp->v_interlock); 841 return 0; 842 } 843 844 /* Just pass the request on to the underlying layer. */ 845 return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); 846 } 847