1 /* $NetBSD: layer_vnops.c,v 1.60 2017/01/27 10:47:13 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 1999 National Aeronautics & Space Administration 5 * All rights reserved. 6 * 7 * This software was written by William Studenmund of the 8 * Numerical Aerospace Simulation Facility, NASA Ames Research Center. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the National Aeronautics & Space Administration 19 * nor the names of its contributors may be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB- 27 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 28 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1992, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * This code is derived from software contributed to Berkeley by 41 * John Heidemann of the UCLA Ficus project. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 68 * 69 * Ancestors: 70 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 71 * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp 72 * ...and... 73 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 74 */ 75 76 /* 77 * Generic layer vnode operations. 78 * 79 * The layer.h, layer_extern.h, layer_vfs.c, and layer_vnops.c files provide 80 * the core implementation of stacked file-systems. 81 * 82 * The layerfs duplicates a portion of the file system name space under 83 * a new name. In this respect, it is similar to the loopback file system. 84 * It differs from the loopback fs in two respects: it is implemented using 85 * a stackable layers technique, and it is "layerfs-nodes" stack above all 86 * lower-layer vnodes, not just over directory vnodes. 87 * 88 * OPERATION OF LAYERFS 89 * 90 * The layerfs is the minimum file system layer, bypassing all possible 91 * operations to the lower layer for processing there. The majority of its 92 * activity centers on the bypass routine, through which nearly all vnode 93 * operations pass. 94 * 95 * The bypass routine accepts arbitrary vnode operations for handling by 96 * the lower layer. It begins by examining vnode operation arguments and 97 * replacing any layered nodes by their lower-layer equivalents. It then 98 * invokes an operation on the lower layer. Finally, it replaces the 99 * layered nodes in the arguments and, if a vnode is returned by the 100 * operation, stacks a layered node on top of the returned vnode. 101 * 102 * The bypass routine in this file, layer_bypass(), is suitable for use 103 * by many different layered filesystems. It can be used by multiple 104 * filesystems simultaneously. Alternatively, a layered fs may provide 105 * its own bypass routine, in which case layer_bypass() should be used as 106 * a model. For instance, the main functionality provided by umapfs, the user 107 * identity mapping file system, is handled by a custom bypass routine. 108 * 109 * Typically a layered fs registers its selected bypass routine as the 110 * default vnode operation in its vnodeopv_entry_desc table. Additionally 111 * the filesystem must store the bypass entry point in the layerm_bypass 112 * field of struct layer_mount. All other layer routines in this file will 113 * use the layerm_bypass() routine. 114 * 115 * Although the bypass routine handles most operations outright, a number 116 * of operations are special cased and handled by the layerfs. For instance, 117 * layer_getattr() must change the fsid being returned. While layer_lock() 118 * and layer_unlock() must handle any locking for the current vnode as well 119 * as pass the lock request down. layer_inactive() and layer_reclaim() are 120 * not bypassed so that they can handle freeing layerfs-specific data. Also, 121 * certain vnode operations (create, mknod, remove, link, rename, mkdir, 122 * rmdir, and symlink) change the locking state within the operation. Ideally 123 * these operations should not change the lock state, but should be changed 124 * to let the caller of the function unlock them. Otherwise, all intermediate 125 * vnode layers (such as union, umapfs, etc) must catch these functions to do 126 * the necessary locking at their layer. 127 * 128 * INSTANTIATING VNODE STACKS 129 * 130 * Mounting associates "layerfs-nodes" stack and lower layer, in effect 131 * stacking two VFSes. The initial mount creates a single vnode stack for 132 * the root of the new layerfs. All other vnode stacks are created as a 133 * result of vnode operations on this or other layerfs vnode stacks. 134 * 135 * New vnode stacks come into existence as a result of an operation which 136 * returns a vnode. The bypass routine stacks a layerfs-node above the new 137 * vnode before returning it to the caller. 138 * 139 * For example, imagine mounting a null layer with: 140 * 141 * "mount_null /usr/include /dev/layer/null" 142 * 143 * Changing directory to /dev/layer/null will assign the root layerfs-node, 144 * which was created when the null layer was mounted). Now consider opening 145 * "sys". A layer_lookup() would be performed on the root layerfs-node. 146 * This operation would bypass through to the lower layer which would return 147 * a vnode representing the UFS "sys". Then, layer_bypass() builds a 148 * layerfs-node aliasing the UFS "sys" and returns this to the caller. 149 * Later operations on the layerfs-node "sys" will repeat this process when 150 * constructing other vnode stacks. 151 * 152 * INVOKING OPERATIONS ON LOWER LAYERS 153 * 154 * There are two techniques to invoke operations on a lower layer when the 155 * operation cannot be completely bypassed. Each method is appropriate in 156 * different situations. In both cases, it is the responsibility of the 157 * aliasing layer to make the operation arguments "correct" for the lower 158 * layer by mapping any vnode arguments to the lower layer. 159 * 160 * The first approach is to call the aliasing layer's bypass routine. This 161 * method is most suitable when you wish to invoke the operation currently 162 * being handled on the lower layer. It has the advantage that the bypass 163 * routine already must do argument mapping. An example of this is 164 * layer_getattr(). 165 * 166 * A second approach is to directly invoke vnode operations on the lower 167 * layer with the VOP_OPERATIONNAME interface. The advantage of this method 168 * is that it is easy to invoke arbitrary operations on the lower layer. 169 * The disadvantage is that vnode's arguments must be manually mapped. 170 */ 171 172 #include <sys/cdefs.h> 173 __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.60 2017/01/27 10:47:13 hannken Exp $"); 174 175 #include <sys/param.h> 176 #include <sys/systm.h> 177 #include <sys/proc.h> 178 #include <sys/time.h> 179 #include <sys/vnode.h> 180 #include <sys/mount.h> 181 #include <sys/namei.h> 182 #include <sys/kmem.h> 183 #include <sys/buf.h> 184 #include <sys/kauth.h> 185 #include <sys/fcntl.h> 186 187 #include <miscfs/genfs/layer.h> 188 #include <miscfs/genfs/layer_extern.h> 189 #include <miscfs/genfs/genfs.h> 190 #include <miscfs/specfs/specdev.h> 191 192 /* 193 * This is the 08-June-99 bypass routine, based on the 10-Apr-92 bypass 194 * routine by John Heidemann. 195 * The new element for this version is that the whole nullfs 196 * system gained the concept of locks on the lower node. 197 * The 10-Apr-92 version was optimized for speed, throwing away some 198 * safety checks. It should still always work, but it's not as 199 * robust to programmer errors. 200 * 201 * In general, we map all vnodes going down and unmap them on the way back. 202 * 203 * Also, some BSD vnode operations have the side effect of vrele'ing 204 * their arguments. With stacking, the reference counts are held 205 * by the upper node, not the lower one, so we must handle these 206 * side-effects here. This is not of concern in Sun-derived systems 207 * since there are no such side-effects. 208 * 209 * New for the 08-June-99 version: we also handle operations which unlock 210 * the passed-in node (typically they vput the node). 211 * 212 * This makes the following assumptions: 213 * - only one returned vpp 214 * - no INOUT vpp's (Sun's vop_open has one of these) 215 * - the vnode operation vector of the first vnode should be used 216 * to determine what implementation of the op should be invoked 217 * - all mapped vnodes are of our vnode-type (NEEDSWORK: 218 * problems on rmdir'ing mount points and renaming?) 219 */ 220 int 221 layer_bypass(void *v) 222 { 223 struct vop_generic_args /* { 224 struct vnodeop_desc *a_desc; 225 <other random data follows, presumably> 226 } */ *ap = v; 227 int (**our_vnodeop_p)(void *); 228 struct vnode **this_vp_p; 229 int error; 230 struct vnode *old_vps[VDESC_MAX_VPS], *vp0; 231 struct vnode **vps_p[VDESC_MAX_VPS]; 232 struct vnode ***vppp; 233 struct mount *mp; 234 struct vnodeop_desc *descp = ap->a_desc; 235 int reles, i, flags; 236 237 #ifdef DIAGNOSTIC 238 /* 239 * We require at least one vp. 240 */ 241 if (descp->vdesc_vp_offsets == NULL || 242 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 243 panic("%s: no vp's in map.\n", __func__); 244 #endif 245 246 vps_p[0] = 247 VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap); 248 vp0 = *vps_p[0]; 249 mp = vp0->v_mount; 250 flags = MOUNTTOLAYERMOUNT(mp)->layerm_flags; 251 our_vnodeop_p = vp0->v_op; 252 253 if (flags & LAYERFS_MBYPASSDEBUG) 254 printf("%s: %s\n", __func__, descp->vdesc_name); 255 256 /* 257 * Map the vnodes going in. 258 * Later, we'll invoke the operation based on 259 * the first mapped vnode's operation vector. 260 */ 261 reles = descp->vdesc_flags; 262 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 263 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 264 break; /* bail out at end of list */ 265 vps_p[i] = this_vp_p = 266 VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], 267 ap); 268 /* 269 * We're not guaranteed that any but the first vnode 270 * are of our type. Check for and don't map any 271 * that aren't. (We must always map first vp or vclean fails.) 272 */ 273 if (i && (*this_vp_p == NULL || 274 (*this_vp_p)->v_op != our_vnodeop_p)) { 275 old_vps[i] = NULL; 276 } else { 277 old_vps[i] = *this_vp_p; 278 *(vps_p[i]) = LAYERVPTOLOWERVP(*this_vp_p); 279 /* 280 * XXX - Several operations have the side effect 281 * of vrele'ing their vp's. We must account for 282 * that. (This should go away in the future.) 283 */ 284 if (reles & VDESC_VP0_WILLRELE) 285 vref(*this_vp_p); 286 } 287 } 288 289 /* 290 * Call the operation on the lower layer 291 * with the modified argument structure. 292 */ 293 error = VCALL(*vps_p[0], descp->vdesc_offset, ap); 294 295 /* 296 * Maintain the illusion of call-by-value 297 * by restoring vnodes in the argument structure 298 * to their original value. 299 */ 300 reles = descp->vdesc_flags; 301 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 302 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 303 break; /* bail out at end of list */ 304 if (old_vps[i]) { 305 *(vps_p[i]) = old_vps[i]; 306 if (reles & VDESC_VP0_WILLRELE) 307 vrele(*(vps_p[i])); 308 } 309 } 310 311 /* 312 * Map the possible out-going vpp 313 * (Assumes that the lower layer always returns 314 * a VREF'ed vpp unless it gets an error.) 315 */ 316 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !error) { 317 vppp = VOPARG_OFFSETTO(struct vnode***, 318 descp->vdesc_vpp_offset, ap); 319 /* 320 * Only vop_lookup, vop_create, vop_makedir, vop_mknod 321 * and vop_symlink return vpp's. vop_lookup doesn't call bypass 322 * as a lookup on "." would generate a locking error. 323 * So all the calls which get us here have a unlocked vpp. :-) 324 */ 325 error = layer_node_create(mp, **vppp, *vppp); 326 if (error) { 327 vrele(**vppp); 328 **vppp = NULL; 329 } 330 } 331 return error; 332 } 333 334 /* 335 * We have to carry on the locking protocol on the layer vnodes 336 * as we progress through the tree. We also have to enforce read-only 337 * if this layer is mounted read-only. 338 */ 339 int 340 layer_lookup(void *v) 341 { 342 struct vop_lookup_v2_args /* { 343 struct vnodeop_desc *a_desc; 344 struct vnode * a_dvp; 345 struct vnode ** a_vpp; 346 struct componentname * a_cnp; 347 } */ *ap = v; 348 struct componentname *cnp = ap->a_cnp; 349 struct vnode *dvp, *lvp, *ldvp; 350 int error, flags = cnp->cn_flags; 351 352 dvp = ap->a_dvp; 353 354 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 355 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 356 *ap->a_vpp = NULL; 357 return EROFS; 358 } 359 360 ldvp = LAYERVPTOLOWERVP(dvp); 361 ap->a_dvp = ldvp; 362 error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap); 363 lvp = *ap->a_vpp; 364 *ap->a_vpp = NULL; 365 366 if (error == EJUSTRETURN && (flags & ISLASTCN) && 367 (dvp->v_mount->mnt_flag & MNT_RDONLY) && 368 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) 369 error = EROFS; 370 371 /* 372 * We must do the same locking and unlocking at this layer as 373 * is done in the layers below us. 374 */ 375 if (ldvp == lvp) { 376 /* 377 * Got the same object back, because we looked up ".", 378 * or ".." in the root node of a mount point. 379 * So we make another reference to dvp and return it. 380 */ 381 vref(dvp); 382 *ap->a_vpp = dvp; 383 vrele(lvp); 384 } else if (lvp != NULL) { 385 /* Note: dvp and ldvp are both locked. */ 386 error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp); 387 if (error) { 388 vrele(lvp); 389 } 390 } 391 return error; 392 } 393 394 /* 395 * Setattr call. Disallow write attempts if the layer is mounted read-only. 396 */ 397 int 398 layer_setattr(void *v) 399 { 400 struct vop_setattr_args /* { 401 struct vnodeop_desc *a_desc; 402 struct vnode *a_vp; 403 struct vattr *a_vap; 404 kauth_cred_t a_cred; 405 struct lwp *a_l; 406 } */ *ap = v; 407 struct vnode *vp = ap->a_vp; 408 struct vattr *vap = ap->a_vap; 409 410 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 411 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 412 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 413 (vp->v_mount->mnt_flag & MNT_RDONLY)) 414 return EROFS; 415 if (vap->va_size != VNOVAL) { 416 switch (vp->v_type) { 417 case VDIR: 418 return EISDIR; 419 case VCHR: 420 case VBLK: 421 case VSOCK: 422 case VFIFO: 423 return 0; 424 case VREG: 425 case VLNK: 426 default: 427 /* 428 * Disallow write attempts if the filesystem is 429 * mounted read-only. 430 */ 431 if (vp->v_mount->mnt_flag & MNT_RDONLY) 432 return EROFS; 433 } 434 } 435 return LAYERFS_DO_BYPASS(vp, ap); 436 } 437 438 /* 439 * We handle getattr only to change the fsid. 440 */ 441 int 442 layer_getattr(void *v) 443 { 444 struct vop_getattr_args /* { 445 struct vnode *a_vp; 446 struct vattr *a_vap; 447 kauth_cred_t a_cred; 448 struct lwp *a_l; 449 } */ *ap = v; 450 struct vnode *vp = ap->a_vp; 451 int error; 452 453 error = LAYERFS_DO_BYPASS(vp, ap); 454 if (error) { 455 return error; 456 } 457 /* Requires that arguments be restored. */ 458 ap->a_vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 459 return 0; 460 } 461 462 int 463 layer_access(void *v) 464 { 465 struct vop_access_args /* { 466 struct vnode *a_vp; 467 int a_mode; 468 kauth_cred_t a_cred; 469 struct lwp *a_l; 470 } */ *ap = v; 471 struct vnode *vp = ap->a_vp; 472 mode_t mode = ap->a_mode; 473 474 /* 475 * Disallow write attempts on read-only layers; 476 * unless the file is a socket, fifo, or a block or 477 * character device resident on the file system. 478 */ 479 if (mode & VWRITE) { 480 switch (vp->v_type) { 481 case VDIR: 482 case VLNK: 483 case VREG: 484 if (vp->v_mount->mnt_flag & MNT_RDONLY) 485 return EROFS; 486 break; 487 default: 488 break; 489 } 490 } 491 return LAYERFS_DO_BYPASS(vp, ap); 492 } 493 494 /* 495 * We must handle open to be able to catch MNT_NODEV and friends 496 * and increment the lower v_writecount. 497 */ 498 int 499 layer_open(void *v) 500 { 501 struct vop_open_args /* { 502 const struct vnodeop_desc *a_desc; 503 struct vnode *a_vp; 504 int a_mode; 505 kauth_cred_t a_cred; 506 } */ *ap = v; 507 struct vnode *vp = ap->a_vp; 508 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 509 int error; 510 511 if (((lvp->v_type == VBLK) || (lvp->v_type == VCHR)) && 512 (vp->v_mount->mnt_flag & MNT_NODEV)) 513 return ENXIO; 514 515 error = LAYERFS_DO_BYPASS(vp, ap); 516 if (error == 0 && (ap->a_mode & FWRITE)) { 517 mutex_enter(lvp->v_interlock); 518 lvp->v_writecount++; 519 mutex_exit(lvp->v_interlock); 520 } 521 return error; 522 } 523 524 /* 525 * We must handle close to decrement the lower v_writecount. 526 */ 527 int 528 layer_close(void *v) 529 { 530 struct vop_close_args /* { 531 const struct vnodeop_desc *a_desc; 532 struct vnode *a_vp; 533 int a_fflag; 534 kauth_cred_t a_cred; 535 } */ *ap = v; 536 struct vnode *vp = ap->a_vp; 537 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 538 539 if ((ap->a_fflag & FWRITE)) { 540 mutex_enter(lvp->v_interlock); 541 KASSERT(lvp->v_writecount > 0); 542 lvp->v_writecount--; 543 mutex_exit(lvp->v_interlock); 544 } 545 return LAYERFS_DO_BYPASS(vp, ap); 546 } 547 548 /* 549 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't bother 550 * syncing the underlying vnodes, since they'll be fsync'ed when 551 * reclaimed; otherwise, pass it through to the underlying layer. 552 * 553 * XXX Do we still need to worry about shallow fsync? 554 */ 555 int 556 layer_fsync(void *v) 557 { 558 struct vop_fsync_args /* { 559 struct vnode *a_vp; 560 kauth_cred_t a_cred; 561 int a_flags; 562 off_t offlo; 563 off_t offhi; 564 struct lwp *a_l; 565 } */ *ap = v; 566 int error; 567 568 if (ap->a_flags & FSYNC_RECLAIM) { 569 return 0; 570 } 571 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) { 572 error = spec_fsync(v); 573 if (error) 574 return error; 575 } 576 return LAYERFS_DO_BYPASS(ap->a_vp, ap); 577 } 578 579 int 580 layer_inactive(void *v) 581 { 582 struct vop_inactive_args /* { 583 struct vnode *a_vp; 584 bool *a_recycle; 585 } */ *ap = v; 586 struct vnode *vp = ap->a_vp; 587 588 /* 589 * If we did a remove, don't cache the node. 590 */ 591 *ap->a_recycle = ((VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED) != 0); 592 593 /* 594 * Do nothing (and _don't_ bypass). 595 * Wait to vrele lowervp until reclaim, 596 * so that until then our layer_node is in the 597 * cache and reusable. 598 * 599 * NEEDSWORK: Someday, consider inactive'ing 600 * the lowervp and then trying to reactivate it 601 * with capabilities (v_id) 602 * like they do in the name lookup cache code. 603 * That's too much work for now. 604 */ 605 VOP_UNLOCK(vp); 606 return 0; 607 } 608 609 int 610 layer_remove(void *v) 611 { 612 struct vop_remove_args /* { 613 struct vonde *a_dvp; 614 struct vnode *a_vp; 615 struct componentname *a_cnp; 616 } */ *ap = v; 617 struct vnode *vp = ap->a_vp; 618 int error; 619 620 vref(vp); 621 error = LAYERFS_DO_BYPASS(vp, ap); 622 if (error == 0) { 623 VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; 624 } 625 vrele(vp); 626 627 return error; 628 } 629 630 int 631 layer_rename(void *v) 632 { 633 struct vop_rename_args /* { 634 struct vnode *a_fdvp; 635 struct vnode *a_fvp; 636 struct componentname *a_fcnp; 637 struct vnode *a_tdvp; 638 struct vnode *a_tvp; 639 struct componentname *a_tcnp; 640 } */ *ap = v; 641 struct vnode *fdvp = ap->a_fdvp, *tvp; 642 int error; 643 644 tvp = ap->a_tvp; 645 if (tvp) { 646 if (tvp->v_mount != fdvp->v_mount) 647 tvp = NULL; 648 else 649 vref(tvp); 650 } 651 error = LAYERFS_DO_BYPASS(fdvp, ap); 652 if (tvp) { 653 if (error == 0) 654 VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED; 655 vrele(tvp); 656 } 657 return error; 658 } 659 660 int 661 layer_rmdir(void *v) 662 { 663 struct vop_rmdir_args /* { 664 struct vnode *a_dvp; 665 struct vnode *a_vp; 666 struct componentname *a_cnp; 667 } */ *ap = v; 668 int error; 669 struct vnode *vp = ap->a_vp; 670 671 vref(vp); 672 error = LAYERFS_DO_BYPASS(vp, ap); 673 if (error == 0) { 674 VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; 675 } 676 vrele(vp); 677 678 return error; 679 } 680 681 int 682 layer_revoke(void *v) 683 { 684 struct vop_revoke_args /* { 685 struct vnode *a_vp; 686 int a_flags; 687 } */ *ap = v; 688 struct vnode *vp = ap->a_vp; 689 struct vnode *lvp = LAYERVPTOLOWERVP(vp); 690 int error; 691 692 /* 693 * We will most likely end up in vclean which uses the v_usecount 694 * to determine if a vnode is active. Take an extra reference on 695 * the lower vnode so it will always close and inactivate. 696 * Remove our writecount from the lower vnode. 697 */ 698 vref(lvp); 699 700 mutex_enter(vp->v_interlock); 701 KASSERT(vp->v_interlock == lvp->v_interlock); 702 lvp->v_writecount -= vp->v_writecount; 703 mutex_exit(vp->v_interlock); 704 705 error = LAYERFS_DO_BYPASS(vp, ap); 706 vrele(lvp); 707 708 return error; 709 } 710 711 int 712 layer_reclaim(void *v) 713 { 714 struct vop_reclaim_args /* { 715 struct vnode *a_vp; 716 struct lwp *a_l; 717 } */ *ap = v; 718 struct vnode *vp = ap->a_vp; 719 struct layer_mount *lmp = MOUNTTOLAYERMOUNT(vp->v_mount); 720 struct layer_node *xp = VTOLAYER(vp); 721 struct vnode *lowervp = xp->layer_lowervp; 722 723 /* 724 * Note: in vop_reclaim, the node's struct lock has been 725 * decomissioned, so we have to be careful about calling 726 * VOP's on ourself. We must be careful as VXLOCK is set. 727 */ 728 if (vp == lmp->layerm_rootvp) { 729 /* 730 * Oops! We no longer have a root node. Most likely reason is 731 * that someone forcably unmunted the underlying fs. 732 * 733 * Now getting the root vnode will fail. We're dead. :-( 734 */ 735 lmp->layerm_rootvp = NULL; 736 } 737 /* After this assignment, this node will not be re-used. */ 738 xp->layer_lowervp = NULL; 739 kmem_free(vp->v_data, lmp->layerm_size); 740 vp->v_data = NULL; 741 vrele(lowervp); 742 743 return 0; 744 } 745 746 int 747 layer_lock(void *v) 748 { 749 struct vop_lock_args /* { 750 struct vnode *a_vp; 751 int a_flags; 752 } */ *ap = v; 753 struct vnode *vp = ap->a_vp; 754 struct vnode *lowervp = LAYERVPTOLOWERVP(vp); 755 int flags = ap->a_flags; 756 int error; 757 758 if (ISSET(flags, LK_NOWAIT)) { 759 error = VOP_LOCK(lowervp, flags); 760 if (error) 761 return error; 762 if (mutex_tryenter(vp->v_interlock)) { 763 error = vdead_check(vp, VDEAD_NOWAIT); 764 mutex_exit(vp->v_interlock); 765 } else 766 error = EBUSY; 767 if (error) 768 VOP_UNLOCK(lowervp); 769 return error; 770 } 771 772 error = VOP_LOCK(lowervp, flags); 773 if (error) 774 return error; 775 776 mutex_enter(vp->v_interlock); 777 error = vdead_check(vp, VDEAD_NOWAIT); 778 if (error) { 779 VOP_UNLOCK(lowervp); 780 error = vdead_check(vp, 0); 781 KASSERT(error == ENOENT); 782 } 783 mutex_exit(vp->v_interlock); 784 785 return error; 786 } 787 788 /* 789 * We just feed the returned vnode up to the caller - there's no need 790 * to build a layer node on top of the node on which we're going to do 791 * i/o. :-) 792 */ 793 int 794 layer_bmap(void *v) 795 { 796 struct vop_bmap_args /* { 797 struct vnode *a_vp; 798 daddr_t a_bn; 799 struct vnode **a_vpp; 800 daddr_t *a_bnp; 801 int *a_runp; 802 } */ *ap = v; 803 struct vnode *vp; 804 805 vp = LAYERVPTOLOWERVP(ap->a_vp); 806 ap->a_vp = vp; 807 808 return VCALL(vp, ap->a_desc->vdesc_offset, ap); 809 } 810 811 int 812 layer_print(void *v) 813 { 814 struct vop_print_args /* { 815 struct vnode *a_vp; 816 } */ *ap = v; 817 struct vnode *vp = ap->a_vp; 818 printf ("\ttag VT_LAYERFS, vp=%p, lowervp=%p\n", vp, LAYERVPTOLOWERVP(vp)); 819 return 0; 820 } 821 822 int 823 layer_getpages(void *v) 824 { 825 struct vop_getpages_args /* { 826 struct vnode *a_vp; 827 voff_t a_offset; 828 struct vm_page **a_m; 829 int *a_count; 830 int a_centeridx; 831 vm_prot_t a_access_type; 832 int a_advice; 833 int a_flags; 834 } */ *ap = v; 835 struct vnode *vp = ap->a_vp; 836 837 KASSERT(mutex_owned(vp->v_interlock)); 838 839 if (ap->a_flags & PGO_LOCKED) { 840 return EBUSY; 841 } 842 ap->a_vp = LAYERVPTOLOWERVP(vp); 843 KASSERT(vp->v_interlock == ap->a_vp->v_interlock); 844 845 /* Just pass the request on to the underlying layer. */ 846 return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); 847 } 848 849 int 850 layer_putpages(void *v) 851 { 852 struct vop_putpages_args /* { 853 struct vnode *a_vp; 854 voff_t a_offlo; 855 voff_t a_offhi; 856 int a_flags; 857 } */ *ap = v; 858 struct vnode *vp = ap->a_vp; 859 860 KASSERT(mutex_owned(vp->v_interlock)); 861 862 ap->a_vp = LAYERVPTOLOWERVP(vp); 863 KASSERT(vp->v_interlock == ap->a_vp->v_interlock); 864 865 if (ap->a_flags & PGO_RECLAIM) { 866 mutex_exit(vp->v_interlock); 867 return 0; 868 } 869 870 /* Just pass the request on to the underlying layer. */ 871 return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); 872 } 873