/* * Copyright (c) 1992, 1993, 1994 The Regents of the University of California. * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry and by John Heidemann of the UCLA Ficus project. * * %sccs.include.redist.c% * * @(#)union_vnops.c 1.1 (Berkeley) 01/28/94 */ #include #include #include #include #include #include #include #include #include #include #include #include #include "union.h" int union_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ /* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as * robust to programmer errors. * Define SAFETY to include some error checking code. * * In general, we map all vnodes going down and unmap them on the way back. * As an exception to this, vnodes can be marked "unmapped" by setting * the Nth bit in operation's vdesc_flags. * * Also, some BSD vnode operations have the side effect of vrele'ing * their arguments. With stacking, the reference counts are held * by the upper node, not the lower one, so we must handle these * side-effects here. This is not of concern in Sun-derived systems * since there are no such side-effects. * * This makes the following assumptions: * - only one returned vpp * - no INOUT vpp's (Sun's vop_open has one of these) * - the vnode operation vector of the first vnode should be used * to determine what implementation of the op should be invoked * - all mapped vnodes are of our vnode-type (NEEDSWORK: * problems on rmdir'ing mount points and renaming?) */ int union_bypass(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { struct vnode **this_vp_p; int error; struct vnode *old_vps[VDESC_MAX_VPS]; struct vnode **vps_p[VDESC_MAX_VPS]; struct vnode ***vppp; struct vnodeop_desc *descp = ap->a_desc; int reles, i; if (union_bug_bypass) printf ("union_bypass: %s\n", descp->vdesc_name); #ifdef SAFETY /* * We require at least one vp. */ if (descp->vdesc_vp_offsets == NULL || descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) panic ("union_bypass: no vp's in map.\n"); #endif /* * Map the vnodes going in. * Later, we'll invoke the operation based on * the first mapped vnode's operation vector. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ vps_p[i] = this_vp_p = VOPARG_OFFSETTO(struct vnode **, descp->vdesc_vp_offsets[i],ap); /* * We're not guaranteed that any but the first vnode * are of our type. Check for and don't map any * that aren't. (We must always map first vp or vclean fails.) */ if (i && (*this_vp_p)->v_op != union_vnodeop_p) { old_vps[i] = NULL; } else { old_vps[i] = *this_vp_p; *(vps_p[i]) = OTHERVP(*this_vp_p); /* * XXX - Several operations have the side effect * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ if (reles & 1) VREF(*this_vp_p); } } /* * Call the operation on the lower layer * with the modified argument structure. */ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); /* * Maintain the illusion of call-by-value * by restoring vnodes in the argument structure * to their original value. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; if (reles & 1) vrele(*(vps_p[i])); } } /* * Map the possible out-going vpp * (Assumes that the lower layer always returns * a VREF'ed vpp unless it gets an error.) */ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !(descp->vdesc_flags & VDESC_NOMAP_VPP) && !error) { /* * XXX - even though some ops have vpp returned vp's, * several ops actually vrele this before returning. * We must avoid these ops. * (This should go away when these ops are regularized.) */ if (descp->vdesc_flags & VDESC_VPP_WILLRELE) goto out; vppp = VOPARG_OFFSETTO(struct vnode***, descp->vdesc_vpp_offset,ap); panic("union: failed to handled returned vnode"); error = union_allocvp(0, 0, 0, 0, 0, 0); } out: return (error); } /* * Check access permission on the union vnode. * The access check being enforced is to check * against both the underlying vnode, and any * copied vnode. This ensures that no additional * file permissions are given away simply because * the user caused an implicit file copy. */ int union_access(ap) struct vop_access_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; if (vp = un->un_lowervp) { int error; error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p); if (error) return (error); } if (vp = un->un_uppervp) return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p)); return (0); } static int union_mkshadow(dvp, cnp, vpp) struct vnode *dvp; struct componentname *cnp; struct vnode *vpp; { int error; struct vattr va; struct proc *p = cnp->cn_proc; int lockparent = (cnp->cn_flags & LOCKPARENT); /* * policy: when creating the shadow directory in the * upper layer, create it owned by the current user, * group from parent directory, and mode 777 modified * by umask (ie mostly identical to the mkdir syscall). * (jsp, kb) * TODO: create the directory owned by the user who * did the mount (um->um_cred). */ VATTR_NULL(&va); va.va_type = VDIR; va.va_mode = UN_DIRMODE &~ p->p_fd->fd_cmask; if (lockparent) VOP_UNLOCK(dvp); LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE); VOP_LOCK(dvp); error = VOP_MKDIR(dvp, vpp, cnp, &va); if (lockparent) VOP_LOCK(dvp); return (error); } static int union_lookup1(dvp, vpp, cnp) struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { int error; struct vnode *tdvp; struct mount *mp; if (cnp->cn_flags & ISDOTDOT) { for (;;) { if ((dvp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; tdvp = dvp; dvp = dvp->v_mount->mnt_vnodecovered; vput(tdvp); VREF(dvp); VOP_LOCK(dvp); } } error = VOP_LOOKUP(dvp, &tdvp, cnp); if (error) return (error); dvp = tdvp; while ((dvp->v_type == VDIR) && (mp = dvp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { if (mp->mnt_flag & MNT_MLOCK) { mp->mnt_flag |= MNT_MWAIT; sleep((caddr_t) mp, PVFS); continue; } if (error = VFS_ROOT(mp, &tdvp)) { vput(dvp); return (error); } vput(tdvp); dvp = tdvp; } *vpp = dvp; return (0); } int union_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; struct union_node *dun = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; int lockparent = cnp->cn_flags & LOCKPARENT; upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; /* * do the lookup in the upper level. * if that level comsumes additional pathnames, * then assume that something special is going * on and just return that vnode. */ uppervp = 0; if (upperdvp) { uerror = union_lookup1(upperdvp, &uppervp, cnp); if (cnp->cn_consume != 0) { *ap->a_vpp = uppervp; return (uerror); } if (!lockparent) VOP_LOCK(upperdvp); } else { uerror = ENOENT; } /* * in a similar way to the upper layer, do the lookup * in the lower layer. this time, if there is some * component magic going on, then vput whatever we got * back from the upper layer and return the lower vnode * instead. */ lowervp = 0; if (lowerdvp) { lerror = union_lookup1(lowerdvp, &lowervp, cnp); if (cnp->cn_consume != 0) { if (uppervp) { vput(uppervp); uppervp = 0; } *ap->a_vpp = lowervp; return (lerror); } if (!lockparent) VOP_LOCK(lowerdvp); } else { lerror = ENOENT; } /* * at this point, we have uerror and lerror indicating * possible errors with the lookups in the upper and lower * layers. additionally, uppervp and lowervp are (locked) * references to existing vnodes in the upper and lower layers. * * there are now three cases to consider. * 1. if both layers returned an error, then return whatever * error the upper layer generated. * * 2. if the top layer failed and the bottom layer succeeded * then two subcases occur. * a. the bottom vnode is not a directory, in which * case just return a new union vnode referencing * an empty top layer and the existing bottom layer. * b. the bottom vnode is a directory, in which case * create a new directory in the top-level and * continue as in case 3. * * 3. if the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ /* case 1. */ if ((uerror != 0) && (lerror != 0)) { *ap->a_vpp = 0; return (uerror); } /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ uerror = union_mkshadow(upperdvp, cnp, &uppervp); if (uerror) { if (lowervp) { vput(lowervp); lowervp = 0; } return (uerror); } } } return (union_allocvp(ap->a_vpp, dvp->v_mount, dvp, cnp, uppervp, lowervp)); } /* * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. */ static int union_copyfile(p, cred, fvp, tvp) struct proc *p; struct ucred *cred; struct vnode *fvp; struct vnode *tvp; { char *buf; struct uio uio; struct iovec iov; int error = 0; off_t offset; /* * strategy: * allocate a buffer of size MAXBSIZE. * loop doing reads and writes, keeping track * of the current uio offset. * give up at the first sign of trouble. */ uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; offset = 0; VOP_UNLOCK(fvp); /* XXX */ LEASE_CHECK(fvp, p, cred, LEASE_READ); VOP_LOCK(fvp); /* XXX */ VOP_UNLOCK(tvp); /* XXX */ LEASE_CHECK(tvp, p, cred, LEASE_WRITE); VOP_LOCK(tvp); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); do { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_offset = offset; uio.uio_rw = UIO_READ; error = VOP_READ(fvp, &uio, 0, cred); if (error == 0) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE - uio.uio_resid; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; uio.uio_offset = offset; do { error = VOP_WRITE(tvp, &uio, 0, cred); } while (error == 0 && uio.uio_resid > 0); if (error == 0) offset = uio.uio_offset; } } while ((uio.uio_resid == 0) && (error == 0)); free(buf, M_TEMP); return (error); } int union_open(ap) struct vop_open_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; /* * If there is an existing upper vp then simply open that. */ if (un->un_uppervp) return (VOP_OPEN(un->un_uppervp, mode, cred, p)); /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, * otherwise can simply open the lower vnode. */ if ((ap->a_mode & FWRITE) && (un->un_lowervp->v_type == VREG)) { int error; struct nameidata nd; struct filedesc *fdp = p->p_fd; int fmode; int cmode; /* * Open the named file in the upper layer. Note that * the file may have come into existence *since* the lookup * was done, since the upper layer may really be a * loopback mount of some other filesystem... so open * the file with exclusive create and barf if it already * exists. * XXX - perhaps shoudl re-lookup the node (once more with * feeling) and simply open that. Who knows. */ NDINIT(&nd, CREATE, 0, UIO_SYSSPACE, un->un_path, p); fmode = (O_CREAT|O_TRUNC|O_EXCL); cmode = UN_FILEMODE & ~fdp->fd_cmask; error = vn_open(&nd, fmode, cmode); if (error) return (error); un->un_uppervp = nd.ni_vp; /* * Now, if the file is being opened with truncation, then * the (new) upper vnode is ready to fly, otherwise the * data from the lower vnode must be copied to the upper * layer first. This only works for regular files (check * is made above). */ if ((mode & O_TRUNC) == 0) { /* XXX - should not ignore errors from VOP_CLOSE */ error = VOP_OPEN(un->un_lowervp, FREAD, cred, p); if (error == 0) { error = union_copyfile(p, cred, un->un_lowervp, un->un_uppervp); (void) VOP_CLOSE(un->un_lowervp, FREAD); } (void) VOP_CLOSE(un->un_uppervp, FWRITE); } if (error == 0) error = VOP_OPEN(un->un_uppervp, FREAD, cred, p); return (error); } return (VOP_OPEN(un->un_lowervp, mode, cred, p)); } /* * We handle getattr only to change the fsid. */ int union_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { int error; if (error = union_bypass(ap)) return (error); /* Requires that arguments be restored. */ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; return (0); } /* * union_readdir works in concert with getdirentries and * readdir(3) to provide a list of entries in the unioned * directories. getdirentries is responsible for walking * down the union stack. readdir(3) is responsible for * eliminating duplicate names from the returned data stream. */ int union_readdir(ap) struct vop_readdir_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); if (un->un_uppervp) return (union_bypass(ap)); return (0); } int union_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; } */ *ap; { /* * Do nothing (and _don't_ bypass). * Wait to vrele lowervp until reclaim, * so that until then our union_node is in the * cache and reusable. * * NEEDSWORK: Someday, consider inactive'ing * the lowervp and then trying to reactivate it * with capabilities (v_id) * like they do in the name lookup cache code. * That's too much work for now. */ return (0); } int union_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct union_node *un = VTOUNION(vp); struct vnode *uppervp = un->un_uppervp; struct vnode *lowervp = un->un_lowervp; struct vnode *dirvp = un->un_dirvp; char *path = un->un_path; /* * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, * so we can't call VOPs on ourself. */ /* After this assignment, this node will not be re-used. */ un->un_uppervp = 0; un->un_lowervp = 0; un->un_dirvp = 0; un->un_path = NULL; union_freevp(vp); if (uppervp) vrele(uppervp); if (lowervp) vrele(lowervp); if (dirvp) vrele(dirvp); if (path) free(path, M_TEMP); return (0); } int union_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n", vp, UPPERVP(vp), LOWERVP(vp)); return (0); } /* * XXX - vop_strategy must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ int union_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; int error; struct vnode *savedvp; savedvp = bp->b_vp; bp->b_vp = OTHERVP(bp->b_vp); #ifdef DIAGNOSTIC if (bp->b_vp == 0) panic("union_strategy: nil vp"); if (((bp->b_flags & B_READ) == 0) && (bp->b_vp == LOWERVP(savedvp))) panic("union_strategy: writing to lowervp"); #endif error = VOP_STRATEGY(bp); bp->b_vp = savedvp; return (error); } /* * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ int union_bwrite(ap) struct vop_bwrite_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; int error; struct vnode *savedvp; savedvp = bp->b_vp; bp->b_vp = UPPERVP(bp->b_vp); #ifdef DIAGNOSTIC if (bp->b_vp == 0) panic("union_bwrite: no upper vp"); #endif error = VOP_BWRITE(bp); bp->b_vp = savedvp; return (error); } int union_lock(ap) struct vop_lock_args *ap; { struct union_node *un = VTOUNION(ap->a_vp); #ifdef DIAGNOSTIC if (un->un_pid == curproc->p_pid) panic("union: locking agsinst myself"); #endif while (un->un_flags & UN_LOCKED) { un->un_flags |= UN_WANT; sleep((caddr_t) &un->un_flags, PINOD); } un->un_flags |= UN_LOCKED; #ifdef DIAGNOSTIC un->un_pid = curproc->p_pid; #endif if (un->un_lowervp && !VOP_ISLOCKED(un->un_lowervp)) VOP_LOCK(un->un_lowervp); if (un->un_uppervp && !VOP_ISLOCKED(un->un_uppervp)) VOP_LOCK(un->un_uppervp); } int union_unlock(ap) struct vop_lock_args *ap; { struct union_node *un = VTOUNION(ap->a_vp); #ifdef DIAGNOSTIC if (un->un_pid != curproc->p_pid) panic("union: unlocking other process's union node"); if ((un->un_flags & UN_LOCKED) == 0) panic("union: unlock unlocked node"); #endif if (un->un_uppervp && VOP_ISLOCKED(un->un_uppervp)) VOP_UNLOCK(un->un_uppervp); if (un->un_lowervp && VOP_ISLOCKED(un->un_lowervp)) VOP_UNLOCK(un->un_lowervp); un->un_flags &= ~UN_LOCKED; if (un->un_flags & UN_WANT) { un->un_flags &= ~UN_WANT; wakeup((caddr_t) &un->un_flags); } #ifdef DIAGNOSTIC un->un_pid = 0; #endif } /* * Global vfs data structures */ int (**union_vnodeop_p)(); struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_default_desc, union_bypass }, { &vop_getattr_desc, union_getattr }, { &vop_inactive_desc, union_inactive }, { &vop_reclaim_desc, union_reclaim }, { &vop_print_desc, union_print }, { &vop_strategy_desc, union_strategy }, { &vop_bwrite_desc, union_bwrite }, { &vop_lock_desc, union_lock }, { &vop_unlock_desc, union_unlock }, { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries };