165934Spendry /* 265934Spendry * Copyright (c) 1994 Jan-Simon Pendry 365934Spendry * Copyright (c) 1994 465934Spendry * The Regents of the University of California. All rights reserved. 565934Spendry * 665934Spendry * This code is derived from software contributed to Berkeley by 765934Spendry * Jan-Simon Pendry. 865934Spendry * 965934Spendry * %sccs.include.redist.c% 1065934Spendry * 11*66034Spendry * @(#)union_subr.c 1.9 (Berkeley) 02/08/94 1265934Spendry */ 1365934Spendry 1465934Spendry #include <sys/param.h> 1565934Spendry #include <sys/systm.h> 1665934Spendry #include <sys/time.h> 1765934Spendry #include <sys/kernel.h> 1865934Spendry #include <sys/vnode.h> 1965934Spendry #include <sys/namei.h> 2065934Spendry #include <sys/malloc.h> 2165994Spendry #include <sys/file.h> 2265997Spendry #include <sys/filedesc.h> 2365934Spendry #include "union.h" /*<miscfs/union/union.h>*/ 2465934Spendry 2565992Spendry #ifdef DIAGNOSTIC 2665992Spendry #include <sys/proc.h> 2765992Spendry #endif 2865992Spendry 2965934Spendry static struct union_node *unhead; 3065934Spendry static int unvplock; 3165934Spendry 3265934Spendry int 3365934Spendry union_init() 3465934Spendry { 3565934Spendry 3665934Spendry unhead = 0; 3765934Spendry unvplock = 0; 3865934Spendry } 3965934Spendry 4065934Spendry /* 4165934Spendry * allocate a union_node/vnode pair. the vnode is 4265965Spendry * referenced and locked. the new vnode is returned 4365965Spendry * via (vpp). (mp) is the mountpoint of the union filesystem, 4465965Spendry * (dvp) is the parent directory where the upper layer object 4565965Spendry * should exist (but doesn't) and (cnp) is the componentname 4665965Spendry * information which is partially copied to allow the upper 4765965Spendry * layer object to be created at a later time. (uppervp) 4865965Spendry * and (lowervp) reference the upper and lower layer objects 4965965Spendry * being mapped. either, but not both, can be nil. 5065997Spendry * the reference is either maintained in the new union_node 5165997Spendry * object which is allocated, or they are vrele'd. 5265934Spendry * 5365934Spendry * all union_nodes are maintained on a singly-linked 5465934Spendry * list. new nodes are only allocated when they cannot 5565934Spendry * be found on this list. entries on the list are 5665934Spendry * removed when the vfs reclaim entry is called. 5765934Spendry * 5865934Spendry * a single lock is kept for the entire list. this is 5965934Spendry * needed because the getnewvnode() function can block 6065934Spendry * waiting for a vnode to become free, in which case there 6165934Spendry * may be more than one process trying to get the same 6265934Spendry * vnode. this lock is only taken if we are going to 6365934Spendry * call getnewvnode, since the kernel itself is single-threaded. 6465934Spendry * 6565934Spendry * if an entry is found on the list, then call vget() to 6665934Spendry * take a reference. this is done because there may be 6765934Spendry * zero references to it and so it needs to removed from 6865934Spendry * the vnode free list. 6965934Spendry */ 7065934Spendry int 7165992Spendry union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp) 7265934Spendry struct vnode **vpp; 7365934Spendry struct mount *mp; 7465992Spendry struct vnode *undvp; 7565934Spendry struct vnode *dvp; /* may be null */ 7665934Spendry struct componentname *cnp; /* may be null */ 7765934Spendry struct vnode *uppervp; /* may be null */ 7865934Spendry struct vnode *lowervp; /* may be null */ 7965934Spendry { 8065934Spendry int error; 8165934Spendry struct union_node *un; 8265934Spendry struct union_node **pp; 8365965Spendry struct vnode *xlowervp = 0; 8465934Spendry 8565965Spendry if (uppervp == 0 && lowervp == 0) 8665965Spendry panic("union: unidentifiable allocation"); 8765965Spendry 8865965Spendry if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 8965965Spendry xlowervp = lowervp; 9065965Spendry lowervp = 0; 9165965Spendry } 9265965Spendry 9365934Spendry loop: 9465934Spendry for (un = unhead; un != 0; un = un->un_next) { 9565934Spendry if ((un->un_lowervp == lowervp || 9665934Spendry un->un_lowervp == 0) && 9765934Spendry (un->un_uppervp == uppervp || 9865934Spendry un->un_uppervp == 0) && 9965934Spendry (UNIONTOV(un)->v_mount == mp)) { 10065992Spendry if (vget(UNIONTOV(un), 0)) 10165934Spendry goto loop; 10265992Spendry if (UNIONTOV(un) != undvp) 10365992Spendry VOP_LOCK(UNIONTOV(un)); 10466027Spendry 10566027Spendry /* 10666027Spendry * Save information about the upper layer. 10766027Spendry */ 10865992Spendry if (uppervp != un->un_uppervp) { 10965992Spendry if (un->un_uppervp) 11065992Spendry vrele(un->un_uppervp); 11165992Spendry un->un_uppervp = uppervp; 11265997Spendry } else if (uppervp) { 11365997Spendry vrele(uppervp); 11465992Spendry } 11566027Spendry 11666027Spendry /* 11766027Spendry * Save information about the lower layer. 11866027Spendry * This needs to keep track of pathname 11966027Spendry * and directory information which union_vn_create 12066027Spendry * might need. 12166027Spendry */ 12265992Spendry if (lowervp != un->un_lowervp) { 12366027Spendry if (un->un_lowervp) { 12465992Spendry vrele(un->un_lowervp); 12566027Spendry free(un->un_path, M_TEMP); 12666027Spendry vrele(un->un_dirvp); 12766027Spendry } 12865992Spendry un->un_lowervp = lowervp; 12966027Spendry if (cnp && (lowervp != NULLVP) && 13066027Spendry (lowervp->v_type == VREG)) { 13166027Spendry un->un_hash = cnp->cn_hash; 13266027Spendry un->un_path = malloc(cnp->cn_namelen+1, 13366027Spendry M_TEMP, M_WAITOK); 13466027Spendry bcopy(cnp->cn_nameptr, un->un_path, 13566027Spendry cnp->cn_namelen); 13666027Spendry un->un_path[cnp->cn_namelen] = '\0'; 13766027Spendry VREF(dvp); 13866027Spendry un->un_dirvp = dvp; 13966027Spendry } 14065997Spendry } else if (lowervp) { 14165997Spendry vrele(lowervp); 14265992Spendry } 14365992Spendry *vpp = UNIONTOV(un); 14465934Spendry return (0); 14565934Spendry } 14665934Spendry } 14765934Spendry 14865934Spendry /* 14965934Spendry * otherwise lock the vp list while we call getnewvnode 15065934Spendry * since that can block. 15165934Spendry */ 15265934Spendry if (unvplock & UN_LOCKED) { 15365934Spendry unvplock |= UN_WANT; 15465934Spendry sleep((caddr_t) &unvplock, PINOD); 15565934Spendry goto loop; 15665934Spendry } 15765934Spendry unvplock |= UN_LOCKED; 15865934Spendry 15965934Spendry error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 16065934Spendry if (error) 16165934Spendry goto out; 16265934Spendry 16365934Spendry MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 16465934Spendry M_TEMP, M_WAITOK); 16565934Spendry 16665965Spendry if (uppervp) 16765965Spendry (*vpp)->v_type = uppervp->v_type; 16865965Spendry else 16965965Spendry (*vpp)->v_type = lowervp->v_type; 17065934Spendry un = VTOUNION(*vpp); 17165992Spendry un->un_vnode = *vpp; 17265934Spendry un->un_next = 0; 17365934Spendry un->un_uppervp = uppervp; 17465934Spendry un->un_lowervp = lowervp; 17566028Spendry un->un_openl = 0; 17665934Spendry un->un_flags = 0; 17766027Spendry if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) { 17866027Spendry un->un_hash = cnp->cn_hash; 17965934Spendry un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 18065934Spendry bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 18165934Spendry un->un_path[cnp->cn_namelen] = '\0'; 18265965Spendry VREF(dvp); 18365965Spendry un->un_dirvp = dvp; 18465934Spendry } else { 18566027Spendry un->un_hash = 0; 18665934Spendry un->un_path = 0; 18765965Spendry un->un_dirvp = 0; 18865934Spendry } 18965934Spendry 19065934Spendry /* add to union vnode list */ 19165934Spendry for (pp = &unhead; *pp; pp = &(*pp)->un_next) 19265934Spendry continue; 19365934Spendry *pp = un; 19465934Spendry 19565992Spendry un->un_flags |= UN_LOCKED; 19665965Spendry 19765992Spendry #ifdef DIAGNOSTIC 19865992Spendry un->un_pid = curproc->p_pid; 19965992Spendry #endif 20065992Spendry 20165965Spendry if (xlowervp) 20265965Spendry vrele(xlowervp); 20365965Spendry 20465934Spendry out: 20565934Spendry unvplock &= ~UN_LOCKED; 20665934Spendry 20765934Spendry if (unvplock & UN_WANT) { 20865934Spendry unvplock &= ~UN_WANT; 20965934Spendry wakeup((caddr_t) &unvplock); 21065934Spendry } 21165934Spendry 21265934Spendry return (error); 21365934Spendry } 21465934Spendry 21565934Spendry int 21665934Spendry union_freevp(vp) 21765934Spendry struct vnode *vp; 21865934Spendry { 21965934Spendry struct union_node **unpp; 22065934Spendry struct union_node *un = VTOUNION(vp); 22165934Spendry 22265934Spendry for (unpp = &unhead; *unpp != 0; unpp = &(*unpp)->un_next) { 22365934Spendry if (*unpp == un) { 22465934Spendry *unpp = un->un_next; 22565934Spendry break; 22665934Spendry } 22765934Spendry } 22865934Spendry 22965934Spendry FREE(vp->v_data, M_TEMP); 23065934Spendry vp->v_data = 0; 23165934Spendry return (0); 23265934Spendry } 23365994Spendry 23465994Spendry /* 23565994Spendry * copyfile. copy the vnode (fvp) to the vnode (tvp) 23665994Spendry * using a sequence of reads and writes. both (fvp) 23765994Spendry * and (tvp) are locked on entry and exit. 23865994Spendry */ 23965994Spendry int 24065994Spendry union_copyfile(p, cred, fvp, tvp) 24165994Spendry struct proc *p; 24265994Spendry struct ucred *cred; 24365994Spendry struct vnode *fvp; 24465994Spendry struct vnode *tvp; 24565994Spendry { 24665994Spendry char *buf; 24765994Spendry struct uio uio; 24865994Spendry struct iovec iov; 24965994Spendry int error = 0; 25065994Spendry 25165994Spendry /* 25265994Spendry * strategy: 25365994Spendry * allocate a buffer of size MAXBSIZE. 25465994Spendry * loop doing reads and writes, keeping track 25565994Spendry * of the current uio offset. 25665994Spendry * give up at the first sign of trouble. 25765994Spendry */ 25865994Spendry 25965994Spendry uio.uio_procp = p; 26065994Spendry uio.uio_segflg = UIO_SYSSPACE; 26165994Spendry uio.uio_offset = 0; 26265994Spendry 26365994Spendry VOP_UNLOCK(fvp); /* XXX */ 26465994Spendry LEASE_CHECK(fvp, p, cred, LEASE_READ); 26565994Spendry VOP_LOCK(fvp); /* XXX */ 26665994Spendry VOP_UNLOCK(tvp); /* XXX */ 26765994Spendry LEASE_CHECK(tvp, p, cred, LEASE_WRITE); 26865994Spendry VOP_LOCK(tvp); /* XXX */ 26965994Spendry 27065994Spendry buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 27165994Spendry 27265994Spendry /* ugly loop follows... */ 27365994Spendry do { 27465994Spendry off_t offset = uio.uio_offset; 27565994Spendry 27665994Spendry uio.uio_iov = &iov; 27765994Spendry uio.uio_iovcnt = 1; 27865994Spendry iov.iov_base = buf; 27965994Spendry iov.iov_len = MAXBSIZE; 28065994Spendry uio.uio_resid = iov.iov_len; 28165994Spendry uio.uio_rw = UIO_READ; 28265994Spendry error = VOP_READ(fvp, &uio, 0, cred); 28365994Spendry 28465994Spendry if (error == 0) { 28565994Spendry uio.uio_iov = &iov; 28665994Spendry uio.uio_iovcnt = 1; 28765994Spendry iov.iov_base = buf; 28865994Spendry iov.iov_len = MAXBSIZE - uio.uio_resid; 28965994Spendry uio.uio_offset = offset; 29065994Spendry uio.uio_rw = UIO_WRITE; 29165994Spendry uio.uio_resid = iov.iov_len; 29265994Spendry 29365994Spendry if (uio.uio_resid == 0) 29465994Spendry break; 29565994Spendry 29665994Spendry do { 29765994Spendry error = VOP_WRITE(tvp, &uio, 0, cred); 29865994Spendry } while ((uio.uio_resid > 0) && (error == 0)); 29965994Spendry } 30065994Spendry 30165994Spendry } while (error == 0); 30265994Spendry 30365994Spendry free(buf, M_TEMP); 30465994Spendry return (error); 30565994Spendry } 30665994Spendry 30765994Spendry /* 30865997Spendry * Create a shadow directory in the upper layer. 30965997Spendry * The new vnode is returned locked. 31065997Spendry * 31165997Spendry * (um) points to the union mount structure for access to the 31265997Spendry * the mounting process's credentials. 31365997Spendry * (dvp) is the directory in which to create the shadow directory. 31465997Spendry * it is unlocked on entry and exit. 31565997Spendry * (cnp) is the componentname to be created. 31665997Spendry * (vpp) is the returned newly created shadow directory, which 31765997Spendry * is returned locked. 31865997Spendry */ 31965997Spendry int 32065997Spendry union_mkshadow(um, dvp, cnp, vpp) 32165997Spendry struct union_mount *um; 32265997Spendry struct vnode *dvp; 32365997Spendry struct componentname *cnp; 32465997Spendry struct vnode **vpp; 32565997Spendry { 32665997Spendry int error; 32765997Spendry struct vattr va; 32865997Spendry struct proc *p = cnp->cn_proc; 32965997Spendry struct componentname cn; 33065997Spendry 33165997Spendry /* 33265997Spendry * policy: when creating the shadow directory in the 333*66034Spendry * upper layer, create it owned by the user who did 334*66034Spendry * the mount, group from parent directory, and mode 335*66034Spendry * 777 modified by umask (ie mostly identical to the 336*66034Spendry * mkdir syscall). (jsp, kb) 33765997Spendry */ 33865997Spendry 33965997Spendry /* 34065997Spendry * A new componentname structure must be faked up because 34165997Spendry * there is no way to know where the upper level cnp came 34265997Spendry * from or what it is being used for. This must duplicate 34365997Spendry * some of the work done by NDINIT, some of the work done 34465997Spendry * by namei, some of the work done by lookup and some of 34565997Spendry * the work done by VOP_LOOKUP when given a CREATE flag. 34665997Spendry * Conclusion: Horrible. 34765997Spendry * 34865997Spendry * The pathname buffer will be FREEed by VOP_MKDIR. 34965997Spendry */ 35065997Spendry cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK); 35166027Spendry bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen); 35266027Spendry cn.cn_pnbuf[cnp->cn_namelen] = '\0'; 35365997Spendry 35465997Spendry cn.cn_nameiop = CREATE; 35565997Spendry cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN); 35665997Spendry cn.cn_proc = cnp->cn_proc; 357*66034Spendry cn.cn_cred = um->um_cred; 35865997Spendry cn.cn_nameptr = cn.cn_pnbuf; 35965997Spendry cn.cn_namelen = cnp->cn_namelen; 36065997Spendry cn.cn_hash = cnp->cn_hash; 36165997Spendry cn.cn_consume = cnp->cn_consume; 36265997Spendry 36366027Spendry VREF(dvp); 36465997Spendry if (error = relookup(dvp, vpp, &cn)) 36565997Spendry return (error); 36666027Spendry vrele(dvp); 36765997Spendry 36865997Spendry if (*vpp) { 36965997Spendry VOP_ABORTOP(dvp, &cn); 37065997Spendry VOP_UNLOCK(dvp); 37165997Spendry vrele(*vpp); 37265997Spendry *vpp = NULLVP; 37365997Spendry return (EEXIST); 37465997Spendry } 37565997Spendry 37665997Spendry VATTR_NULL(&va); 37765997Spendry va.va_type = VDIR; 378*66034Spendry va.va_mode = um->um_cmode; 37965997Spendry 38065997Spendry /* LEASE_CHECK: dvp is locked */ 38165997Spendry LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE); 38265997Spendry 38365997Spendry VREF(dvp); 38465997Spendry error = VOP_MKDIR(dvp, vpp, &cn, &va); 38565997Spendry return (error); 38665997Spendry } 38765997Spendry 38865997Spendry /* 38965994Spendry * union_vn_create: creates and opens a new shadow file 39065994Spendry * on the upper union layer. this function is similar 39165994Spendry * in spirit to calling vn_open but it avoids calling namei(). 39265994Spendry * the problem with calling namei is that a) it locks too many 39365994Spendry * things, and b) it doesn't start at the "right" directory, 39465994Spendry * whereas relookup is told where to start. 39565994Spendry */ 39665994Spendry int 39765997Spendry union_vn_create(vpp, un, p) 39865994Spendry struct vnode **vpp; 39965994Spendry struct union_node *un; 40065994Spendry struct proc *p; 40165994Spendry { 40265994Spendry struct vnode *vp; 40365994Spendry struct ucred *cred = p->p_ucred; 40465994Spendry struct vattr vat; 40565994Spendry struct vattr *vap = &vat; 40665994Spendry int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 40765994Spendry int error; 40866027Spendry int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; 40965994Spendry char *cp; 41065994Spendry struct componentname cn; 41165994Spendry 41265994Spendry *vpp = NULLVP; 41365994Spendry 414*66034Spendry /* 415*66034Spendry * Build a new componentname structure (for the same 416*66034Spendry * reasons outlines in union_mkshadow). 417*66034Spendry * The difference here is that the file is owned by 418*66034Spendry * the current user, rather than by the person who 419*66034Spendry * did the mount, since the current user needs to be 420*66034Spendry * able to write the file (that's why it is being 421*66034Spendry * copied in the first place). 422*66034Spendry */ 42365994Spendry cn.cn_namelen = strlen(un->un_path); 42465994Spendry cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK); 42565994Spendry bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 42665994Spendry cn.cn_nameiop = CREATE; 42765994Spendry cn.cn_flags = (LOCKLEAF|LOCKPARENT|HASBUF|SAVENAME|ISLASTCN); 42865994Spendry cn.cn_proc = p; 42965994Spendry cn.cn_cred = p->p_ucred; 43065994Spendry cn.cn_nameptr = cn.cn_pnbuf; 43166027Spendry cn.cn_hash = un->un_hash; 43265994Spendry cn.cn_consume = 0; 43365994Spendry 43466027Spendry VREF(un->un_dirvp); 43565994Spendry if (error = relookup(un->un_dirvp, &vp, &cn)) 43665994Spendry return (error); 43766027Spendry vrele(un->un_dirvp); 43866027Spendry 43965994Spendry if (vp == NULLVP) { 440*66034Spendry /* 441*66034Spendry * Good - there was no race to create the file 442*66034Spendry * so go ahead and create it. The permissions 443*66034Spendry * on the file will be 0666 modified by the 444*66034Spendry * current user's umask. Access to the file, while 445*66034Spendry * it is unioned, will require access to the top *and* 446*66034Spendry * bottom files. Access when not unioned will simply 447*66034Spendry * require access to the top-level file. 448*66034Spendry * TODO: confirm choice of access permissions. 449*66034Spendry */ 45065994Spendry VATTR_NULL(vap); 45165994Spendry vap->va_type = VREG; 45265994Spendry vap->va_mode = cmode; 45365994Spendry LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE); 45465994Spendry if (error = VOP_CREATE(un->un_dirvp, &vp, 45565994Spendry &cn, vap)) 45665994Spendry return (error); 45765994Spendry } else { 45865994Spendry VOP_ABORTOP(un->un_dirvp, &cn); 45965994Spendry if (un->un_dirvp == vp) 46065994Spendry vrele(un->un_dirvp); 46165994Spendry else 46265994Spendry vput(vp); 46365994Spendry error = EEXIST; 46465994Spendry goto bad; 46565994Spendry } 46665994Spendry 46765994Spendry if (vp->v_type != VREG) { 46865994Spendry error = EOPNOTSUPP; 46965994Spendry goto bad; 47065994Spendry } 47165994Spendry 47265994Spendry VOP_UNLOCK(vp); /* XXX */ 47365994Spendry LEASE_CHECK(vp, p, cred, LEASE_WRITE); 47465994Spendry VOP_LOCK(vp); /* XXX */ 47565994Spendry VATTR_NULL(vap); 47665994Spendry vap->va_size = 0; 47765994Spendry if (error = VOP_SETATTR(vp, vap, cred, p)) 47865994Spendry goto bad; 47965994Spendry 48065994Spendry if (error = VOP_OPEN(vp, fmode, cred, p)) 48165994Spendry goto bad; 48265994Spendry 48365994Spendry vp->v_writecount++; 48465994Spendry *vpp = vp; 48565994Spendry return (0); 48665994Spendry bad: 48765994Spendry vput(vp); 48865994Spendry return (error); 48965994Spendry } 49066027Spendry 49166027Spendry int 49266028Spendry union_vn_close(vp, fmode, cred, p) 49366027Spendry struct vnode *vp; 49466027Spendry int fmode; 49566028Spendry struct ucred *cred; 49666028Spendry struct proc *p; 49766027Spendry { 49866027Spendry if (fmode & FWRITE) 49966027Spendry --vp->v_writecount; 50066027Spendry return (VOP_CLOSE(vp, fmode)); 50166027Spendry } 50266027Spendry 50366027Spendry void 50466027Spendry union_removed_upper(un) 50566027Spendry struct union_node *un; 50666027Spendry { 50766027Spendry vrele(un->un_uppervp); 50866027Spendry un->un_uppervp = NULLVP; 50966027Spendry } 51066028Spendry 51166028Spendry struct vnode * 51266028Spendry union_lowervp(vp) 51366028Spendry struct vnode *vp; 51466028Spendry { 51566028Spendry struct union_node *un = VTOUNION(vp); 51666028Spendry 51766028Spendry if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) { 51866028Spendry if (vget(un->un_lowervp, 0)) 51966028Spendry return (NULLVP); 52066028Spendry } 52166028Spendry 52266028Spendry return (un->un_lowervp); 52366028Spendry } 524