xref: /csrg-svn/sys/miscfs/union/union_vnops.c (revision 65935)
1*65935Spendry /*
2*65935Spendry  * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
3*65935Spendry  * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
4*65935Spendry  * All rights reserved.
5*65935Spendry  *
6*65935Spendry  * This code is derived from software contributed to Berkeley by
7*65935Spendry  * Jan-Simon Pendry and by John Heidemann of the UCLA Ficus project.
8*65935Spendry  *
9*65935Spendry  * %sccs.include.redist.c%
10*65935Spendry  *
11*65935Spendry  *	@(#)union_vnops.c	1.1 (Berkeley) 01/28/94
12*65935Spendry  */
13*65935Spendry 
14*65935Spendry #include <sys/param.h>
15*65935Spendry #include <sys/systm.h>
16*65935Spendry #include <sys/proc.h>
17*65935Spendry #include <sys/file.h>
18*65935Spendry #include <sys/filedesc.h>
19*65935Spendry #include <sys/time.h>
20*65935Spendry #include <sys/types.h>
21*65935Spendry #include <sys/vnode.h>
22*65935Spendry #include <sys/mount.h>
23*65935Spendry #include <sys/namei.h>
24*65935Spendry #include <sys/malloc.h>
25*65935Spendry #include <sys/buf.h>
26*65935Spendry #include "union.h"
27*65935Spendry 
28*65935Spendry 
29*65935Spendry int union_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
30*65935Spendry 
31*65935Spendry /*
32*65935Spendry  * This is the 10-Apr-92 bypass routine.
33*65935Spendry  *    This version has been optimized for speed, throwing away some
34*65935Spendry  * safety checks.  It should still always work, but it's not as
35*65935Spendry  * robust to programmer errors.
36*65935Spendry  *    Define SAFETY to include some error checking code.
37*65935Spendry  *
38*65935Spendry  * In general, we map all vnodes going down and unmap them on the way back.
39*65935Spendry  * As an exception to this, vnodes can be marked "unmapped" by setting
40*65935Spendry  * the Nth bit in operation's vdesc_flags.
41*65935Spendry  *
42*65935Spendry  * Also, some BSD vnode operations have the side effect of vrele'ing
43*65935Spendry  * their arguments.  With stacking, the reference counts are held
44*65935Spendry  * by the upper node, not the lower one, so we must handle these
45*65935Spendry  * side-effects here.  This is not of concern in Sun-derived systems
46*65935Spendry  * since there are no such side-effects.
47*65935Spendry  *
48*65935Spendry  * This makes the following assumptions:
49*65935Spendry  * - only one returned vpp
50*65935Spendry  * - no INOUT vpp's (Sun's vop_open has one of these)
51*65935Spendry  * - the vnode operation vector of the first vnode should be used
52*65935Spendry  *   to determine what implementation of the op should be invoked
53*65935Spendry  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
54*65935Spendry  *   problems on rmdir'ing mount points and renaming?)
55*65935Spendry  */
56*65935Spendry int
57*65935Spendry union_bypass(ap)
58*65935Spendry 	struct vop_generic_args /* {
59*65935Spendry 		struct vnodeop_desc *a_desc;
60*65935Spendry 		<other random data follows, presumably>
61*65935Spendry 	} */ *ap;
62*65935Spendry {
63*65935Spendry 	struct vnode **this_vp_p;
64*65935Spendry 	int error;
65*65935Spendry 	struct vnode *old_vps[VDESC_MAX_VPS];
66*65935Spendry 	struct vnode **vps_p[VDESC_MAX_VPS];
67*65935Spendry 	struct vnode ***vppp;
68*65935Spendry 	struct vnodeop_desc *descp = ap->a_desc;
69*65935Spendry 	int reles, i;
70*65935Spendry 
71*65935Spendry 	if (union_bug_bypass)
72*65935Spendry 		printf ("union_bypass: %s\n", descp->vdesc_name);
73*65935Spendry 
74*65935Spendry #ifdef SAFETY
75*65935Spendry 	/*
76*65935Spendry 	 * We require at least one vp.
77*65935Spendry 	 */
78*65935Spendry 	if (descp->vdesc_vp_offsets == NULL ||
79*65935Spendry 	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
80*65935Spendry 		panic ("union_bypass: no vp's in map.\n");
81*65935Spendry #endif
82*65935Spendry 
83*65935Spendry 	/*
84*65935Spendry 	 * Map the vnodes going in.
85*65935Spendry 	 * Later, we'll invoke the operation based on
86*65935Spendry 	 * the first mapped vnode's operation vector.
87*65935Spendry 	 */
88*65935Spendry 	reles = descp->vdesc_flags;
89*65935Spendry 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
90*65935Spendry 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
91*65935Spendry 			break;   /* bail out at end of list */
92*65935Spendry 		vps_p[i] = this_vp_p =
93*65935Spendry 			VOPARG_OFFSETTO(struct vnode **, descp->vdesc_vp_offsets[i],ap);
94*65935Spendry 		/*
95*65935Spendry 		 * We're not guaranteed that any but the first vnode
96*65935Spendry 		 * are of our type.  Check for and don't map any
97*65935Spendry 		 * that aren't.  (We must always map first vp or vclean fails.)
98*65935Spendry 		 */
99*65935Spendry 		if (i && (*this_vp_p)->v_op != union_vnodeop_p) {
100*65935Spendry 			old_vps[i] = NULL;
101*65935Spendry 		} else {
102*65935Spendry 			old_vps[i] = *this_vp_p;
103*65935Spendry 			*(vps_p[i]) = OTHERVP(*this_vp_p);
104*65935Spendry 			/*
105*65935Spendry 			 * XXX - Several operations have the side effect
106*65935Spendry 			 * of vrele'ing their vp's.  We must account for
107*65935Spendry 			 * that.  (This should go away in the future.)
108*65935Spendry 			 */
109*65935Spendry 			if (reles & 1)
110*65935Spendry 				VREF(*this_vp_p);
111*65935Spendry 		}
112*65935Spendry 
113*65935Spendry 	}
114*65935Spendry 
115*65935Spendry 	/*
116*65935Spendry 	 * Call the operation on the lower layer
117*65935Spendry 	 * with the modified argument structure.
118*65935Spendry 	 */
119*65935Spendry 	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
120*65935Spendry 
121*65935Spendry 	/*
122*65935Spendry 	 * Maintain the illusion of call-by-value
123*65935Spendry 	 * by restoring vnodes in the argument structure
124*65935Spendry 	 * to their original value.
125*65935Spendry 	 */
126*65935Spendry 	reles = descp->vdesc_flags;
127*65935Spendry 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
128*65935Spendry 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
129*65935Spendry 			break;   /* bail out at end of list */
130*65935Spendry 		if (old_vps[i]) {
131*65935Spendry 			*(vps_p[i]) = old_vps[i];
132*65935Spendry 			if (reles & 1)
133*65935Spendry 				vrele(*(vps_p[i]));
134*65935Spendry 		}
135*65935Spendry 	}
136*65935Spendry 
137*65935Spendry 	/*
138*65935Spendry 	 * Map the possible out-going vpp
139*65935Spendry 	 * (Assumes that the lower layer always returns
140*65935Spendry 	 * a VREF'ed vpp unless it gets an error.)
141*65935Spendry 	 */
142*65935Spendry 	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
143*65935Spendry 	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
144*65935Spendry 	    !error) {
145*65935Spendry 		/*
146*65935Spendry 		 * XXX - even though some ops have vpp returned vp's,
147*65935Spendry 		 * several ops actually vrele this before returning.
148*65935Spendry 		 * We must avoid these ops.
149*65935Spendry 		 * (This should go away when these ops are regularized.)
150*65935Spendry 		 */
151*65935Spendry 		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
152*65935Spendry 			goto out;
153*65935Spendry 		vppp = VOPARG_OFFSETTO(struct vnode***,
154*65935Spendry 				 descp->vdesc_vpp_offset,ap);
155*65935Spendry 		panic("union: failed to handled returned vnode");
156*65935Spendry 		error = union_allocvp(0, 0, 0, 0, 0, 0);
157*65935Spendry 	}
158*65935Spendry 
159*65935Spendry out:
160*65935Spendry 	return (error);
161*65935Spendry }
162*65935Spendry 
163*65935Spendry /*
164*65935Spendry  * Check access permission on the union vnode.
165*65935Spendry  * The access check being enforced is to check
166*65935Spendry  * against both the underlying vnode, and any
167*65935Spendry  * copied vnode.  This ensures that no additional
168*65935Spendry  * file permissions are given away simply because
169*65935Spendry  * the user caused an implicit file copy.
170*65935Spendry  */
171*65935Spendry int
172*65935Spendry union_access(ap)
173*65935Spendry 	struct vop_access_args /* {
174*65935Spendry 		struct vnodeop_desc *a_desc;
175*65935Spendry 		struct vnode *a_vp;
176*65935Spendry 		int a_mode;
177*65935Spendry 		struct ucred *a_cred;
178*65935Spendry 		struct proc *a_p;
179*65935Spendry 	} */ *ap;
180*65935Spendry {
181*65935Spendry 	struct union_node *un = VTOUNION(ap->a_vp);
182*65935Spendry 	struct vnode *vp;
183*65935Spendry 
184*65935Spendry 	if (vp = un->un_lowervp) {
185*65935Spendry 		int error;
186*65935Spendry 
187*65935Spendry 		error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
188*65935Spendry 		if (error)
189*65935Spendry 			return (error);
190*65935Spendry 	}
191*65935Spendry 
192*65935Spendry 	if (vp = un->un_uppervp)
193*65935Spendry 		return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
194*65935Spendry 
195*65935Spendry 	return (0);
196*65935Spendry }
197*65935Spendry 
198*65935Spendry static int
199*65935Spendry union_mkshadow(dvp, cnp, vpp)
200*65935Spendry 	struct vnode *dvp;
201*65935Spendry 	struct componentname *cnp;
202*65935Spendry 	struct vnode *vpp;
203*65935Spendry {
204*65935Spendry 	int error;
205*65935Spendry 	struct vattr va;
206*65935Spendry 	struct proc *p = cnp->cn_proc;
207*65935Spendry 	int lockparent = (cnp->cn_flags & LOCKPARENT);
208*65935Spendry 
209*65935Spendry 	/*
210*65935Spendry 	 * policy: when creating the shadow directory in the
211*65935Spendry 	 * upper layer, create it owned by the current user,
212*65935Spendry 	 * group from parent directory, and mode 777 modified
213*65935Spendry 	 * by umask (ie mostly identical to the mkdir syscall).
214*65935Spendry 	 * (jsp, kb)
215*65935Spendry 	 * TODO: create the directory owned by the user who
216*65935Spendry 	 * did the mount (um->um_cred).
217*65935Spendry 	 */
218*65935Spendry 
219*65935Spendry 	VATTR_NULL(&va);
220*65935Spendry 	va.va_type = VDIR;
221*65935Spendry 	va.va_mode = UN_DIRMODE &~ p->p_fd->fd_cmask;
222*65935Spendry 	if (lockparent)
223*65935Spendry 		VOP_UNLOCK(dvp);
224*65935Spendry 	LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
225*65935Spendry 	VOP_LOCK(dvp);
226*65935Spendry 	error = VOP_MKDIR(dvp, vpp, cnp, &va);
227*65935Spendry 	if (lockparent)
228*65935Spendry 		VOP_LOCK(dvp);
229*65935Spendry 	return (error);
230*65935Spendry }
231*65935Spendry 
232*65935Spendry static int
233*65935Spendry union_lookup1(dvp, vpp, cnp)
234*65935Spendry 	struct vnode *dvp;
235*65935Spendry 	struct vnode **vpp;
236*65935Spendry 	struct componentname *cnp;
237*65935Spendry {
238*65935Spendry 	int error;
239*65935Spendry 	struct vnode *tdvp;
240*65935Spendry 	struct mount *mp;
241*65935Spendry 
242*65935Spendry 	if (cnp->cn_flags & ISDOTDOT) {
243*65935Spendry 		for (;;) {
244*65935Spendry 			if ((dvp->v_flag & VROOT) == 0 ||
245*65935Spendry 			    (cnp->cn_flags & NOCROSSMOUNT))
246*65935Spendry 				break;
247*65935Spendry 
248*65935Spendry 			tdvp = dvp;
249*65935Spendry 			dvp = dvp->v_mount->mnt_vnodecovered;
250*65935Spendry 			vput(tdvp);
251*65935Spendry 			VREF(dvp);
252*65935Spendry 			VOP_LOCK(dvp);
253*65935Spendry 		}
254*65935Spendry 	}
255*65935Spendry 
256*65935Spendry         error = VOP_LOOKUP(dvp, &tdvp, cnp);
257*65935Spendry 	if (error)
258*65935Spendry 		return (error);
259*65935Spendry 
260*65935Spendry 	dvp = tdvp;
261*65935Spendry 	while ((dvp->v_type == VDIR) && (mp = dvp->v_mountedhere) &&
262*65935Spendry 	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
263*65935Spendry 
264*65935Spendry 		if (mp->mnt_flag & MNT_MLOCK) {
265*65935Spendry 			mp->mnt_flag |= MNT_MWAIT;
266*65935Spendry 			sleep((caddr_t) mp, PVFS);
267*65935Spendry 			continue;
268*65935Spendry 		}
269*65935Spendry 
270*65935Spendry 		if (error = VFS_ROOT(mp, &tdvp)) {
271*65935Spendry 			vput(dvp);
272*65935Spendry 			return (error);
273*65935Spendry 		}
274*65935Spendry 
275*65935Spendry 		vput(tdvp);
276*65935Spendry 		dvp = tdvp;
277*65935Spendry 	}
278*65935Spendry 
279*65935Spendry 	*vpp = dvp;
280*65935Spendry 	return (0);
281*65935Spendry }
282*65935Spendry 
283*65935Spendry int
284*65935Spendry union_lookup(ap)
285*65935Spendry 	struct vop_lookup_args /* {
286*65935Spendry 		struct vnodeop_desc *a_desc;
287*65935Spendry 		struct vnode *a_dvp;
288*65935Spendry 		struct vnode **a_vpp;
289*65935Spendry 		struct componentname *a_cnp;
290*65935Spendry 	} */ *ap;
291*65935Spendry {
292*65935Spendry 	int uerror, lerror;
293*65935Spendry 	struct vnode *uppervp, *lowervp;
294*65935Spendry 	struct vnode *upperdvp, *lowerdvp;
295*65935Spendry 	struct vnode *dvp = ap->a_dvp;
296*65935Spendry 	struct union_node *dun = VTOUNION(ap->a_dvp);
297*65935Spendry 	struct componentname *cnp = ap->a_cnp;
298*65935Spendry 	int lockparent = cnp->cn_flags & LOCKPARENT;
299*65935Spendry 
300*65935Spendry 	upperdvp = dun->un_uppervp;
301*65935Spendry 	lowerdvp = dun->un_lowervp;
302*65935Spendry 
303*65935Spendry 	/*
304*65935Spendry 	 * do the lookup in the upper level.
305*65935Spendry 	 * if that level comsumes additional pathnames,
306*65935Spendry 	 * then assume that something special is going
307*65935Spendry 	 * on and just return that vnode.
308*65935Spendry 	 */
309*65935Spendry 	uppervp = 0;
310*65935Spendry 	if (upperdvp) {
311*65935Spendry 		uerror = union_lookup1(upperdvp, &uppervp, cnp);
312*65935Spendry 		if (cnp->cn_consume != 0) {
313*65935Spendry 			*ap->a_vpp = uppervp;
314*65935Spendry 			return (uerror);
315*65935Spendry 		}
316*65935Spendry 		if (!lockparent)
317*65935Spendry 			VOP_LOCK(upperdvp);
318*65935Spendry 	} else {
319*65935Spendry 		uerror = ENOENT;
320*65935Spendry 	}
321*65935Spendry 
322*65935Spendry 	/*
323*65935Spendry 	 * in a similar way to the upper layer, do the lookup
324*65935Spendry 	 * in the lower layer.   this time, if there is some
325*65935Spendry 	 * component magic going on, then vput whatever we got
326*65935Spendry 	 * back from the upper layer and return the lower vnode
327*65935Spendry 	 * instead.
328*65935Spendry 	 */
329*65935Spendry 	lowervp = 0;
330*65935Spendry 	if (lowerdvp) {
331*65935Spendry 		lerror = union_lookup1(lowerdvp, &lowervp, cnp);
332*65935Spendry 		if (cnp->cn_consume != 0) {
333*65935Spendry 			if (uppervp) {
334*65935Spendry 				vput(uppervp);
335*65935Spendry 				uppervp = 0;
336*65935Spendry 			}
337*65935Spendry 			*ap->a_vpp = lowervp;
338*65935Spendry 			return (lerror);
339*65935Spendry 		}
340*65935Spendry 		if (!lockparent)
341*65935Spendry 			VOP_LOCK(lowerdvp);
342*65935Spendry 	} else {
343*65935Spendry 		lerror = ENOENT;
344*65935Spendry 	}
345*65935Spendry 
346*65935Spendry 	/*
347*65935Spendry 	 * at this point, we have uerror and lerror indicating
348*65935Spendry 	 * possible errors with the lookups in the upper and lower
349*65935Spendry 	 * layers.  additionally, uppervp and lowervp are (locked)
350*65935Spendry 	 * references to existing vnodes in the upper and lower layers.
351*65935Spendry 	 *
352*65935Spendry 	 * there are now three cases to consider.
353*65935Spendry 	 * 1. if both layers returned an error, then return whatever
354*65935Spendry 	 *    error the upper layer generated.
355*65935Spendry 	 *
356*65935Spendry 	 * 2. if the top layer failed and the bottom layer succeeded
357*65935Spendry 	 *    then two subcases occur.
358*65935Spendry 	 *    a.  the bottom vnode is not a directory, in which
359*65935Spendry 	 *	  case just return a new union vnode referencing
360*65935Spendry 	 *	  an empty top layer and the existing bottom layer.
361*65935Spendry 	 *    b.  the bottom vnode is a directory, in which case
362*65935Spendry 	 *	  create a new directory in the top-level and
363*65935Spendry 	 *	  continue as in case 3.
364*65935Spendry 	 *
365*65935Spendry 	 * 3. if the top layer succeeded then return a new union
366*65935Spendry 	 *    vnode referencing whatever the new top layer and
367*65935Spendry 	 *    whatever the bottom layer returned.
368*65935Spendry 	 */
369*65935Spendry 
370*65935Spendry 	/* case 1. */
371*65935Spendry 	if ((uerror != 0) && (lerror != 0)) {
372*65935Spendry 		*ap->a_vpp = 0;
373*65935Spendry 		return (uerror);
374*65935Spendry 	}
375*65935Spendry 
376*65935Spendry 	/* case 2. */
377*65935Spendry 	if (uerror != 0 /* && (lerror == 0) */ ) {
378*65935Spendry 		if (lowervp->v_type == VDIR) { /* case 2b. */
379*65935Spendry 			uerror = union_mkshadow(upperdvp, cnp, &uppervp);
380*65935Spendry 			if (uerror) {
381*65935Spendry 				if (lowervp) {
382*65935Spendry 					vput(lowervp);
383*65935Spendry 					lowervp = 0;
384*65935Spendry 				}
385*65935Spendry 				return (uerror);
386*65935Spendry 			}
387*65935Spendry 		}
388*65935Spendry 	}
389*65935Spendry 
390*65935Spendry 	return (union_allocvp(ap->a_vpp, dvp->v_mount, dvp, cnp,
391*65935Spendry 			      uppervp, lowervp));
392*65935Spendry }
393*65935Spendry 
394*65935Spendry /*
395*65935Spendry  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
396*65935Spendry  * using a sequence of reads and writes.
397*65935Spendry  */
398*65935Spendry static int
399*65935Spendry union_copyfile(p, cred, fvp, tvp)
400*65935Spendry 	struct proc *p;
401*65935Spendry 	struct ucred *cred;
402*65935Spendry 	struct vnode *fvp;
403*65935Spendry 	struct vnode *tvp;
404*65935Spendry {
405*65935Spendry 	char *buf;
406*65935Spendry 	struct uio uio;
407*65935Spendry 	struct iovec iov;
408*65935Spendry 	int error = 0;
409*65935Spendry 	off_t offset;
410*65935Spendry 
411*65935Spendry 	/*
412*65935Spendry 	 * strategy:
413*65935Spendry 	 * allocate a buffer of size MAXBSIZE.
414*65935Spendry 	 * loop doing reads and writes, keeping track
415*65935Spendry 	 * of the current uio offset.
416*65935Spendry 	 * give up at the first sign of trouble.
417*65935Spendry 	 */
418*65935Spendry 
419*65935Spendry 	uio.uio_procp = p;
420*65935Spendry 	uio.uio_segflg = UIO_SYSSPACE;
421*65935Spendry 	offset = 0;
422*65935Spendry 
423*65935Spendry 	VOP_UNLOCK(fvp);				/* XXX */
424*65935Spendry 	LEASE_CHECK(fvp, p, cred, LEASE_READ);
425*65935Spendry 	VOP_LOCK(fvp);					/* XXX */
426*65935Spendry 	VOP_UNLOCK(tvp);				/* XXX */
427*65935Spendry 	LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
428*65935Spendry 	VOP_LOCK(tvp);					/* XXX */
429*65935Spendry 
430*65935Spendry 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
431*65935Spendry 	do {
432*65935Spendry 		uio.uio_iov = &iov;
433*65935Spendry 		uio.uio_iovcnt = 1;
434*65935Spendry 		iov.iov_base = buf;
435*65935Spendry 		iov.iov_len = MAXBSIZE;
436*65935Spendry 		uio.uio_resid = iov.iov_len;
437*65935Spendry 		uio.uio_offset = offset;
438*65935Spendry 		uio.uio_rw = UIO_READ;
439*65935Spendry 		error = VOP_READ(fvp, &uio, 0, cred);
440*65935Spendry 
441*65935Spendry 		if (error == 0) {
442*65935Spendry 			uio.uio_iov = &iov;
443*65935Spendry 			uio.uio_iovcnt = 1;
444*65935Spendry 			iov.iov_base = buf;
445*65935Spendry 			iov.iov_len = MAXBSIZE - uio.uio_resid;
446*65935Spendry 			uio.uio_rw = UIO_WRITE;
447*65935Spendry 			uio.uio_resid = iov.iov_len;
448*65935Spendry 			uio.uio_offset = offset;
449*65935Spendry 
450*65935Spendry 			do {
451*65935Spendry 				error = VOP_WRITE(tvp, &uio, 0, cred);
452*65935Spendry 			} while (error == 0 && uio.uio_resid > 0);
453*65935Spendry 			if (error == 0)
454*65935Spendry 				offset = uio.uio_offset;
455*65935Spendry 		}
456*65935Spendry 	} while ((uio.uio_resid == 0) && (error == 0));
457*65935Spendry 
458*65935Spendry 	free(buf, M_TEMP);
459*65935Spendry 	return (error);
460*65935Spendry }
461*65935Spendry 
462*65935Spendry int
463*65935Spendry union_open(ap)
464*65935Spendry 	struct vop_open_args /* {
465*65935Spendry 		struct vnodeop_desc *a_desc;
466*65935Spendry 		struct vnode *a_vp;
467*65935Spendry 		int a_mode;
468*65935Spendry 		struct ucred *a_cred;
469*65935Spendry 		struct proc *a_p;
470*65935Spendry 	} */ *ap;
471*65935Spendry {
472*65935Spendry 	struct union_node *un = VTOUNION(ap->a_vp);
473*65935Spendry 	int mode = ap->a_mode;
474*65935Spendry 	struct ucred *cred = ap->a_cred;
475*65935Spendry 	struct proc *p = ap->a_p;
476*65935Spendry 
477*65935Spendry 	/*
478*65935Spendry 	 * If there is an existing upper vp then simply open that.
479*65935Spendry 	 */
480*65935Spendry 	if (un->un_uppervp)
481*65935Spendry 		return (VOP_OPEN(un->un_uppervp, mode, cred, p));
482*65935Spendry 
483*65935Spendry 	/*
484*65935Spendry 	 * If the lower vnode is being opened for writing, then
485*65935Spendry 	 * copy the file contents to the upper vnode and open that,
486*65935Spendry 	 * otherwise can simply open the lower vnode.
487*65935Spendry 	 */
488*65935Spendry 	if ((ap->a_mode & FWRITE) && (un->un_lowervp->v_type == VREG)) {
489*65935Spendry 		int error;
490*65935Spendry 		struct nameidata nd;
491*65935Spendry 		struct filedesc *fdp = p->p_fd;
492*65935Spendry 		int fmode;
493*65935Spendry 		int cmode;
494*65935Spendry 
495*65935Spendry 		/*
496*65935Spendry 		 * Open the named file in the upper layer.  Note that
497*65935Spendry 		 * the file may have come into existence *since* the lookup
498*65935Spendry 		 * was done, since the upper layer may really be a
499*65935Spendry 		 * loopback mount of some other filesystem... so open
500*65935Spendry 		 * the file with exclusive create and barf if it already
501*65935Spendry 		 * exists.
502*65935Spendry 		 * XXX - perhaps shoudl re-lookup the node (once more with
503*65935Spendry 		 * feeling) and simply open that.  Who knows.
504*65935Spendry 		 */
505*65935Spendry 		NDINIT(&nd, CREATE, 0, UIO_SYSSPACE, un->un_path, p);
506*65935Spendry 		fmode = (O_CREAT|O_TRUNC|O_EXCL);
507*65935Spendry 		cmode = UN_FILEMODE & ~fdp->fd_cmask;
508*65935Spendry 		error = vn_open(&nd, fmode, cmode);
509*65935Spendry 		if (error)
510*65935Spendry 			return (error);
511*65935Spendry 		un->un_uppervp = nd.ni_vp;
512*65935Spendry 		/*
513*65935Spendry 		 * Now, if the file is being opened with truncation, then
514*65935Spendry 		 * the (new) upper vnode is ready to fly, otherwise the
515*65935Spendry 		 * data from the lower vnode must be copied to the upper
516*65935Spendry 		 * layer first.  This only works for regular files (check
517*65935Spendry 		 * is made above).
518*65935Spendry 		 */
519*65935Spendry 		if ((mode & O_TRUNC) == 0) {
520*65935Spendry 			/* XXX - should not ignore errors from VOP_CLOSE */
521*65935Spendry 			error = VOP_OPEN(un->un_lowervp, FREAD, cred, p);
522*65935Spendry 			if (error == 0) {
523*65935Spendry 				error = union_copyfile(p, cred,
524*65935Spendry 					       un->un_lowervp, un->un_uppervp);
525*65935Spendry 				(void) VOP_CLOSE(un->un_lowervp, FREAD);
526*65935Spendry 			}
527*65935Spendry 			(void) VOP_CLOSE(un->un_uppervp, FWRITE);
528*65935Spendry 		}
529*65935Spendry 		if (error == 0)
530*65935Spendry 			error = VOP_OPEN(un->un_uppervp, FREAD, cred, p);
531*65935Spendry 		return (error);
532*65935Spendry 	}
533*65935Spendry 
534*65935Spendry 	return (VOP_OPEN(un->un_lowervp, mode, cred, p));
535*65935Spendry }
536*65935Spendry 
537*65935Spendry /*
538*65935Spendry  *  We handle getattr only to change the fsid.
539*65935Spendry  */
540*65935Spendry int
541*65935Spendry union_getattr(ap)
542*65935Spendry 	struct vop_getattr_args /* {
543*65935Spendry 		struct vnode *a_vp;
544*65935Spendry 		struct vattr *a_vap;
545*65935Spendry 		struct ucred *a_cred;
546*65935Spendry 		struct proc *a_p;
547*65935Spendry 	} */ *ap;
548*65935Spendry {
549*65935Spendry 	int error;
550*65935Spendry 
551*65935Spendry 	if (error = union_bypass(ap))
552*65935Spendry 		return (error);
553*65935Spendry 	/* Requires that arguments be restored. */
554*65935Spendry 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
555*65935Spendry 	return (0);
556*65935Spendry }
557*65935Spendry 
558*65935Spendry /*
559*65935Spendry  * union_readdir works in concert with getdirentries and
560*65935Spendry  * readdir(3) to provide a list of entries in the unioned
561*65935Spendry  * directories.  getdirentries is responsible for walking
562*65935Spendry  * down the union stack.  readdir(3) is responsible for
563*65935Spendry  * eliminating duplicate names from the returned data stream.
564*65935Spendry  */
565*65935Spendry int
566*65935Spendry union_readdir(ap)
567*65935Spendry 	struct vop_readdir_args /* {
568*65935Spendry 		struct vnodeop_desc *a_desc;
569*65935Spendry 		struct vnode *a_vp;
570*65935Spendry 		struct uio *a_uio;
571*65935Spendry 		struct ucred *a_cred;
572*65935Spendry 	} */ *ap;
573*65935Spendry {
574*65935Spendry 	struct union_node *un = VTOUNION(ap->a_vp);
575*65935Spendry 
576*65935Spendry 	if (un->un_uppervp)
577*65935Spendry 		return (union_bypass(ap));
578*65935Spendry 
579*65935Spendry 	return (0);
580*65935Spendry }
581*65935Spendry 
582*65935Spendry int
583*65935Spendry union_inactive(ap)
584*65935Spendry 	struct vop_inactive_args /* {
585*65935Spendry 		struct vnode *a_vp;
586*65935Spendry 	} */ *ap;
587*65935Spendry {
588*65935Spendry 
589*65935Spendry 	/*
590*65935Spendry 	 * Do nothing (and _don't_ bypass).
591*65935Spendry 	 * Wait to vrele lowervp until reclaim,
592*65935Spendry 	 * so that until then our union_node is in the
593*65935Spendry 	 * cache and reusable.
594*65935Spendry 	 *
595*65935Spendry 	 * NEEDSWORK: Someday, consider inactive'ing
596*65935Spendry 	 * the lowervp and then trying to reactivate it
597*65935Spendry 	 * with capabilities (v_id)
598*65935Spendry 	 * like they do in the name lookup cache code.
599*65935Spendry 	 * That's too much work for now.
600*65935Spendry 	 */
601*65935Spendry 	return (0);
602*65935Spendry }
603*65935Spendry 
604*65935Spendry int
605*65935Spendry union_reclaim(ap)
606*65935Spendry 	struct vop_reclaim_args /* {
607*65935Spendry 		struct vnode *a_vp;
608*65935Spendry 	} */ *ap;
609*65935Spendry {
610*65935Spendry 	struct vnode *vp = ap->a_vp;
611*65935Spendry 	struct union_node *un = VTOUNION(vp);
612*65935Spendry 	struct vnode *uppervp = un->un_uppervp;
613*65935Spendry 	struct vnode *lowervp = un->un_lowervp;
614*65935Spendry 	struct vnode *dirvp = un->un_dirvp;
615*65935Spendry 	char *path = un->un_path;
616*65935Spendry 
617*65935Spendry 	/*
618*65935Spendry 	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
619*65935Spendry 	 * so we can't call VOPs on ourself.
620*65935Spendry 	 */
621*65935Spendry 	/* After this assignment, this node will not be re-used. */
622*65935Spendry 	un->un_uppervp = 0;
623*65935Spendry 	un->un_lowervp = 0;
624*65935Spendry 	un->un_dirvp = 0;
625*65935Spendry 	un->un_path = NULL;
626*65935Spendry 	union_freevp(vp);
627*65935Spendry 	if (uppervp)
628*65935Spendry 		vrele(uppervp);
629*65935Spendry 	if (lowervp)
630*65935Spendry 		vrele(lowervp);
631*65935Spendry 	if (dirvp)
632*65935Spendry 		vrele(dirvp);
633*65935Spendry 	if (path)
634*65935Spendry 		free(path, M_TEMP);
635*65935Spendry 	return (0);
636*65935Spendry }
637*65935Spendry 
638*65935Spendry 
639*65935Spendry int
640*65935Spendry union_print(ap)
641*65935Spendry 	struct vop_print_args /* {
642*65935Spendry 		struct vnode *a_vp;
643*65935Spendry 	} */ *ap;
644*65935Spendry {
645*65935Spendry 	struct vnode *vp = ap->a_vp;
646*65935Spendry 
647*65935Spendry 	printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
648*65935Spendry 			vp, UPPERVP(vp), LOWERVP(vp));
649*65935Spendry 	return (0);
650*65935Spendry }
651*65935Spendry 
652*65935Spendry 
653*65935Spendry /*
654*65935Spendry  * XXX - vop_strategy must be hand coded because it has no
655*65935Spendry  * vnode in its arguments.
656*65935Spendry  * This goes away with a merged VM/buffer cache.
657*65935Spendry  */
658*65935Spendry int
659*65935Spendry union_strategy(ap)
660*65935Spendry 	struct vop_strategy_args /* {
661*65935Spendry 		struct buf *a_bp;
662*65935Spendry 	} */ *ap;
663*65935Spendry {
664*65935Spendry 	struct buf *bp = ap->a_bp;
665*65935Spendry 	int error;
666*65935Spendry 	struct vnode *savedvp;
667*65935Spendry 
668*65935Spendry 	savedvp = bp->b_vp;
669*65935Spendry 	bp->b_vp = OTHERVP(bp->b_vp);
670*65935Spendry 
671*65935Spendry #ifdef DIAGNOSTIC
672*65935Spendry 	if (bp->b_vp == 0)
673*65935Spendry 		panic("union_strategy: nil vp");
674*65935Spendry 	if (((bp->b_flags & B_READ) == 0) &&
675*65935Spendry 	    (bp->b_vp == LOWERVP(savedvp)))
676*65935Spendry 		panic("union_strategy: writing to lowervp");
677*65935Spendry #endif
678*65935Spendry 
679*65935Spendry 	error = VOP_STRATEGY(bp);
680*65935Spendry 	bp->b_vp = savedvp;
681*65935Spendry 
682*65935Spendry 	return (error);
683*65935Spendry }
684*65935Spendry 
685*65935Spendry 
686*65935Spendry /*
687*65935Spendry  * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
688*65935Spendry  * vnode in its arguments.
689*65935Spendry  * This goes away with a merged VM/buffer cache.
690*65935Spendry  */
691*65935Spendry int
692*65935Spendry union_bwrite(ap)
693*65935Spendry 	struct vop_bwrite_args /* {
694*65935Spendry 		struct buf *a_bp;
695*65935Spendry 	} */ *ap;
696*65935Spendry {
697*65935Spendry 	struct buf *bp = ap->a_bp;
698*65935Spendry 	int error;
699*65935Spendry 	struct vnode *savedvp;
700*65935Spendry 
701*65935Spendry 	savedvp = bp->b_vp;
702*65935Spendry 	bp->b_vp = UPPERVP(bp->b_vp);
703*65935Spendry 
704*65935Spendry #ifdef DIAGNOSTIC
705*65935Spendry 	if (bp->b_vp == 0)
706*65935Spendry 		panic("union_bwrite: no upper vp");
707*65935Spendry #endif
708*65935Spendry 
709*65935Spendry 	error = VOP_BWRITE(bp);
710*65935Spendry 
711*65935Spendry 	bp->b_vp = savedvp;
712*65935Spendry 
713*65935Spendry 	return (error);
714*65935Spendry }
715*65935Spendry 
716*65935Spendry int
717*65935Spendry union_lock(ap)
718*65935Spendry 	struct vop_lock_args *ap;
719*65935Spendry {
720*65935Spendry 	struct union_node *un = VTOUNION(ap->a_vp);
721*65935Spendry 
722*65935Spendry #ifdef DIAGNOSTIC
723*65935Spendry 	if (un->un_pid == curproc->p_pid)
724*65935Spendry 		panic("union: locking agsinst myself");
725*65935Spendry #endif
726*65935Spendry 	while (un->un_flags & UN_LOCKED) {
727*65935Spendry 		un->un_flags |= UN_WANT;
728*65935Spendry 		sleep((caddr_t) &un->un_flags, PINOD);
729*65935Spendry 	}
730*65935Spendry 	un->un_flags |= UN_LOCKED;
731*65935Spendry #ifdef DIAGNOSTIC
732*65935Spendry 	un->un_pid = curproc->p_pid;
733*65935Spendry #endif
734*65935Spendry 
735*65935Spendry 	if (un->un_lowervp && !VOP_ISLOCKED(un->un_lowervp))
736*65935Spendry 		VOP_LOCK(un->un_lowervp);
737*65935Spendry 	if (un->un_uppervp && !VOP_ISLOCKED(un->un_uppervp))
738*65935Spendry 		VOP_LOCK(un->un_uppervp);
739*65935Spendry }
740*65935Spendry 
741*65935Spendry int
742*65935Spendry union_unlock(ap)
743*65935Spendry 	struct vop_lock_args *ap;
744*65935Spendry {
745*65935Spendry 	struct union_node *un = VTOUNION(ap->a_vp);
746*65935Spendry 
747*65935Spendry #ifdef DIAGNOSTIC
748*65935Spendry 	if (un->un_pid != curproc->p_pid)
749*65935Spendry 		panic("union: unlocking other process's union node");
750*65935Spendry 	if ((un->un_flags & UN_LOCKED) == 0)
751*65935Spendry 		panic("union: unlock unlocked node");
752*65935Spendry #endif
753*65935Spendry 
754*65935Spendry 	if (un->un_uppervp && VOP_ISLOCKED(un->un_uppervp))
755*65935Spendry 		VOP_UNLOCK(un->un_uppervp);
756*65935Spendry 	if (un->un_lowervp && VOP_ISLOCKED(un->un_lowervp))
757*65935Spendry 		VOP_UNLOCK(un->un_lowervp);
758*65935Spendry 
759*65935Spendry 	un->un_flags &= ~UN_LOCKED;
760*65935Spendry 	if (un->un_flags & UN_WANT) {
761*65935Spendry 		un->un_flags &= ~UN_WANT;
762*65935Spendry 		wakeup((caddr_t) &un->un_flags);
763*65935Spendry 	}
764*65935Spendry 
765*65935Spendry #ifdef DIAGNOSTIC
766*65935Spendry 	un->un_pid = 0;
767*65935Spendry #endif
768*65935Spendry }
769*65935Spendry 
770*65935Spendry /*
771*65935Spendry  * Global vfs data structures
772*65935Spendry  */
773*65935Spendry int (**union_vnodeop_p)();
774*65935Spendry struct vnodeopv_entry_desc union_vnodeop_entries[] = {
775*65935Spendry 	{ &vop_default_desc, union_bypass },
776*65935Spendry 
777*65935Spendry 	{ &vop_getattr_desc, union_getattr },
778*65935Spendry 	{ &vop_inactive_desc, union_inactive },
779*65935Spendry 	{ &vop_reclaim_desc, union_reclaim },
780*65935Spendry 	{ &vop_print_desc, union_print },
781*65935Spendry 
782*65935Spendry 	{ &vop_strategy_desc, union_strategy },
783*65935Spendry 	{ &vop_bwrite_desc, union_bwrite },
784*65935Spendry 
785*65935Spendry 	{ &vop_lock_desc, union_lock },
786*65935Spendry 	{ &vop_unlock_desc, union_unlock },
787*65935Spendry 
788*65935Spendry 	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
789*65935Spendry };
790*65935Spendry struct vnodeopv_desc union_vnodeop_opv_desc =
791*65935Spendry 	{ &union_vnodeop_p, union_vnodeop_entries };
792