165935Spendry /*
269364Spendry  * Copyright (c) 1994, 1995 The Regents of the University of California.
369364Spendry  * Copyright (c) 1994, 1995 Jan-Simon Pendry.
465935Spendry  * All rights reserved.
565935Spendry  *
665935Spendry  * This code is derived from software donated to Berkeley by
765935Spendry  * Jan-Simon Pendry.
865935Spendry  *
965935Spendry  * %sccs.include.redist.c%
1065935Spendry  *
11*69586Spendry  *	@(#)union_vfsops.c	8.20 (Berkeley) 05/20/95
1265935Spendry  */
1365935Spendry 
1465935Spendry /*
1565994Spendry  * Union Layer
1665935Spendry  */
1765935Spendry 
1865935Spendry #include <sys/param.h>
1965935Spendry #include <sys/systm.h>
2065935Spendry #include <sys/time.h>
2165935Spendry #include <sys/types.h>
2265935Spendry #include <sys/proc.h>
2365935Spendry #include <sys/vnode.h>
2465935Spendry #include <sys/mount.h>
2565935Spendry #include <sys/namei.h>
2665935Spendry #include <sys/malloc.h>
2765997Spendry #include <sys/filedesc.h>
2866053Spendry #include <sys/queue.h>
2966055Spendry #include <miscfs/union/union.h>
3065935Spendry 
3165935Spendry /*
3265935Spendry  * Mount union filesystem
3365935Spendry  */
3465935Spendry int
union_mount(mp,path,data,ndp,p)3565935Spendry union_mount(mp, path, data, ndp, p)
3665935Spendry 	struct mount *mp;
3765935Spendry 	char *path;
3865935Spendry 	caddr_t data;
3965935Spendry 	struct nameidata *ndp;
4065935Spendry 	struct proc *p;
4165935Spendry {
4265935Spendry 	int error = 0;
4365935Spendry 	struct union_args args;
4466034Spendry 	struct vnode *lowerrootvp = NULLVP;
4566034Spendry 	struct vnode *upperrootvp = NULLVP;
4667575Spendry 	struct union_mount *um = 0;
4766034Spendry 	struct ucred *cred = 0;
4866188Spendry 	struct ucred *scred;
4966152Spendry 	struct vattr va;
5066034Spendry 	char *cp;
5166034Spendry 	int len;
5265935Spendry 	u_int size;
5365935Spendry 
5465935Spendry #ifdef UNION_DIAGNOSTIC
5565935Spendry 	printf("union_mount(mp = %x)\n", mp);
5665935Spendry #endif
5765935Spendry 
5865935Spendry 	/*
5965935Spendry 	 * Update is a no-op
6065935Spendry 	 */
6165994Spendry 	if (mp->mnt_flag & MNT_UPDATE) {
6265994Spendry 		/*
6365994Spendry 		 * Need to provide.
6465994Spendry 		 * 1. a way to convert between rdonly and rdwr mounts.
6565994Spendry 		 * 2. support for nfs exports.
6665994Spendry 		 */
6766034Spendry 		error = EOPNOTSUPP;
6866034Spendry 		goto bad;
6965994Spendry 	}
7065935Spendry 
7165935Spendry 	/*
7265935Spendry 	 * Get argument
7365935Spendry 	 */
7465935Spendry 	if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
7566034Spendry 		goto bad;
7665935Spendry 
7765935Spendry 	lowerrootvp = mp->mnt_vnodecovered;
7865935Spendry 	VREF(lowerrootvp);
7965935Spendry 
8065935Spendry 	/*
8167575Spendry 	 * Find upper node.
8265935Spendry 	 */
8365935Spendry 	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
8465935Spendry 	       UIO_USERSPACE, args.target, p);
8566188Spendry 
8666034Spendry 	if (error = namei(ndp))
8766034Spendry 		goto bad;
8866034Spendry 
8965935Spendry 	upperrootvp = ndp->ni_vp;
9065935Spendry 	vrele(ndp->ni_dvp);
9165935Spendry 	ndp->ni_dvp = NULL;
9265935Spendry 
9365935Spendry 	if (upperrootvp->v_type != VDIR) {
9466034Spendry 		error = EINVAL;
9566034Spendry 		goto bad;
9665935Spendry 	}
9765935Spendry 
9865935Spendry 	um = (struct union_mount *) malloc(sizeof(struct union_mount),
9965935Spendry 				M_UFSMNT, M_WAITOK);	/* XXX */
10065935Spendry 
10165935Spendry 	/*
10265935Spendry 	 * Keep a held reference to the target vnodes.
10365935Spendry 	 * They are vrele'd in union_unmount.
10466034Spendry 	 *
10566034Spendry 	 * Depending on the _BELOW flag, the filesystems are
10666034Spendry 	 * viewed in a different order.  In effect, this is the
10766034Spendry 	 * same as providing a mount under option to the mount syscall.
10865935Spendry 	 */
10966034Spendry 
11066152Spendry 	um->um_op = args.mntflags & UNMNT_OPMASK;
11166152Spendry 	switch (um->um_op) {
11266034Spendry 	case UNMNT_ABOVE:
11366034Spendry 		um->um_lowervp = lowerrootvp;
11466034Spendry 		um->um_uppervp = upperrootvp;
11566034Spendry 		break;
11666034Spendry 
11766034Spendry 	case UNMNT_BELOW:
11866034Spendry 		um->um_lowervp = upperrootvp;
11966034Spendry 		um->um_uppervp = lowerrootvp;
12066034Spendry 		break;
12166034Spendry 
12266034Spendry 	case UNMNT_REPLACE:
12366034Spendry 		vrele(lowerrootvp);
12466034Spendry 		lowerrootvp = NULLVP;
12566034Spendry 		um->um_uppervp = upperrootvp;
12666034Spendry 		um->um_lowervp = lowerrootvp;
12766034Spendry 		break;
12866034Spendry 
12966034Spendry 	default:
13066034Spendry 		error = EINVAL;
13166034Spendry 		goto bad;
13266034Spendry 	}
13366034Spendry 
13467575Spendry 	/*
13567575Spendry 	 * Unless the mount is readonly, ensure that the top layer
13667575Spendry 	 * supports whiteout operations
13767575Spendry 	 */
13867575Spendry 	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
13967575Spendry 		error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
14067575Spendry 		if (error)
14167575Spendry 			goto bad;
14267575Spendry 	}
14367575Spendry 
14467575Spendry 	um->um_cred = p->p_ucred;
14567575Spendry 	crhold(um->um_cred);
14665997Spendry 	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
14765935Spendry 
14865997Spendry 	/*
14965997Spendry 	 * Depending on what you think the MNT_LOCAL flag might mean,
15065997Spendry 	 * you may want the && to be || on the conditional below.
15165997Spendry 	 * At the moment it has been defined that the filesystem is
15265997Spendry 	 * only local if it is all local, ie the MNT_LOCAL flag implies
15365997Spendry 	 * that the entire namespace is local.  If you think the MNT_LOCAL
15465997Spendry 	 * flag implies that some of the files might be stored locally
15565997Spendry 	 * then you will want to change the conditional.
15665997Spendry 	 */
15766152Spendry 	if (um->um_op == UNMNT_ABOVE) {
15866152Spendry 		if (((um->um_lowervp == NULLVP) ||
15966152Spendry 		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
16066152Spendry 		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
16166152Spendry 			mp->mnt_flag |= MNT_LOCAL;
16266152Spendry 	}
16365997Spendry 
16465994Spendry 	/*
16565994Spendry 	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
16665994Spendry 	 * of lookup() which explicitly checks the flag, rather than asking
16765994Spendry 	 * the filesystem for it's own opinion.  This means, that an update
16865994Spendry 	 * mount of the underlying filesystem to go from rdonly to rdwr
16965994Spendry 	 * will leave the unioned view as read-only.
17065994Spendry 	 */
17166034Spendry 	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
17266175Spendry 
17365935Spendry 	mp->mnt_data = (qaddr_t) um;
17468625Smckusick 	vfs_getnewfsid(mp);
17565935Spendry 
17665935Spendry 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
17765935Spendry 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
17866034Spendry 
17966152Spendry 	switch (um->um_op) {
18066034Spendry 	case UNMNT_ABOVE:
18167229Spendry 		cp = "<above>:";
18266034Spendry 		break;
18366034Spendry 	case UNMNT_BELOW:
18467229Spendry 		cp = "<below>:";
18566034Spendry 		break;
18666034Spendry 	case UNMNT_REPLACE:
18766311Spendry 		cp = "";
18866034Spendry 		break;
18966034Spendry 	}
19066034Spendry 	len = strlen(cp);
19166034Spendry 	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
19266034Spendry 
19366034Spendry 	cp = mp->mnt_stat.f_mntfromname + len;
19466034Spendry 	len = MNAMELEN - len;
19566034Spendry 
19666034Spendry 	(void) copyinstr(args.target, cp, len - 1, &size);
19766034Spendry 	bzero(cp + size, len - size);
19866034Spendry 
19965935Spendry #ifdef UNION_DIAGNOSTIC
20066034Spendry 	printf("union_mount: from %s, on %s\n",
20165935Spendry 		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
20265935Spendry #endif
20365935Spendry 	return (0);
20466034Spendry 
20566034Spendry bad:
20667575Spendry 	if (um)
20767575Spendry 		free(um, M_UFSMNT);
20866034Spendry 	if (cred)
20966034Spendry 		crfree(cred);
21066034Spendry 	if (upperrootvp)
21166034Spendry 		vrele(upperrootvp);
21266034Spendry 	if (lowerrootvp)
21366034Spendry 		vrele(lowerrootvp);
21466034Spendry 	return (error);
21565935Spendry }
21665935Spendry 
21765935Spendry /*
21865935Spendry  * VFS start.  Nothing needed here - the start routine
21965935Spendry  * on the underlying filesystem(s) will have been called
22065935Spendry  * when that filesystem was mounted.
22165935Spendry  */
22265935Spendry int
union_start(mp,flags,p)22365935Spendry union_start(mp, flags, p)
22465935Spendry 	struct mount *mp;
22565935Spendry 	int flags;
22665935Spendry 	struct proc *p;
22765935Spendry {
22865935Spendry 
22965935Spendry 	return (0);
23065935Spendry }
23165935Spendry 
23265935Spendry /*
23365935Spendry  * Free reference to union layer
23465935Spendry  */
23565935Spendry int
union_unmount(mp,mntflags,p)23665935Spendry union_unmount(mp, mntflags, p)
23765935Spendry 	struct mount *mp;
23865935Spendry 	int mntflags;
23965935Spendry 	struct proc *p;
24065935Spendry {
24165935Spendry 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
24265935Spendry 	struct vnode *um_rootvp;
24365935Spendry 	int error;
24467416Spendry 	int freeing;
24565935Spendry 	int flags = 0;
24665935Spendry 
24765935Spendry #ifdef UNION_DIAGNOSTIC
24865935Spendry 	printf("union_unmount(mp = %x)\n", mp);
24965935Spendry #endif
25065935Spendry 
25169341Smckusick 	if (mntflags & MNT_FORCE)
25265935Spendry 		flags |= FORCECLOSE;
25365935Spendry 
25465935Spendry 	if (error = union_root(mp, &um_rootvp))
25565935Spendry 		return (error);
25667416Spendry 
25767416Spendry 	/*
25867416Spendry 	 * Keep flushing vnodes from the mount list.
25967416Spendry 	 * This is needed because of the un_pvp held
26067416Spendry 	 * reference to the parent vnode.
26167416Spendry 	 * If more vnodes have been freed on a given pass,
26267416Spendry 	 * the try again.  The loop will iterate at most
26367416Spendry 	 * (d) times, where (d) is the maximum tree depth
26467416Spendry 	 * in the filesystem.
26567416Spendry 	 */
26667416Spendry 	for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
26767416Spendry 		struct vnode *vp;
26867416Spendry 		int n;
26967416Spendry 
27067416Spendry 		/* count #vnodes held on mount list */
27167416Spendry 		for (n = 0, vp = mp->mnt_vnodelist.lh_first;
27267416Spendry 				vp != NULLVP;
27367416Spendry 				vp = vp->v_mntvnodes.le_next)
27467416Spendry 			n++;
27567416Spendry 
27667416Spendry 		/* if this is unchanged then stop */
27767416Spendry 		if (n == freeing)
27867416Spendry 			break;
27967416Spendry 
28067416Spendry 		/* otherwise try once more time */
28167416Spendry 		freeing = n;
28267416Spendry 	}
28367416Spendry 
28467416Spendry 	/* At this point the root vnode should have a single reference */
28565965Spendry 	if (um_rootvp->v_usecount > 1) {
28665965Spendry 		vput(um_rootvp);
28765935Spendry 		return (EBUSY);
28865965Spendry 	}
28965935Spendry 
29065935Spendry #ifdef UNION_DIAGNOSTIC
29167416Spendry 	vprint("union root", um_rootvp);
29265935Spendry #endif
29365935Spendry 	/*
29465935Spendry 	 * Discard references to upper and lower target vnodes.
29565935Spendry 	 */
29666034Spendry 	if (um->um_lowervp)
29766034Spendry 		vrele(um->um_lowervp);
29865935Spendry 	vrele(um->um_uppervp);
29965935Spendry 	crfree(um->um_cred);
30065935Spendry 	/*
30165935Spendry 	 * Release reference on underlying root vnode
30265935Spendry 	 */
30365965Spendry 	vput(um_rootvp);
30465935Spendry 	/*
30565935Spendry 	 * And blow it away for future re-use
30665935Spendry 	 */
30769364Spendry 	vgone(um_rootvp);
30865935Spendry 	/*
30965935Spendry 	 * Finally, throw away the union_mount structure
31065935Spendry 	 */
31165935Spendry 	free(mp->mnt_data, M_UFSMNT);	/* XXX */
31265935Spendry 	mp->mnt_data = 0;
31365997Spendry 	return (0);
31465935Spendry }
31565935Spendry 
31665935Spendry int
union_root(mp,vpp)31765935Spendry union_root(mp, vpp)
31865935Spendry 	struct mount *mp;
31965935Spendry 	struct vnode **vpp;
32065935Spendry {
32169447Smckusick 	struct proc *p = curproc;	/* XXX */
32265935Spendry 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
32365935Spendry 	int error;
32466152Spendry 	int loselock;
32565935Spendry 
32665935Spendry 	/*
32765935Spendry 	 * Return locked reference to root.
32865935Spendry 	 */
32965965Spendry 	VREF(um->um_uppervp);
33066152Spendry 	if ((um->um_op == UNMNT_BELOW) &&
33166152Spendry 	     VOP_ISLOCKED(um->um_uppervp)) {
33266152Spendry 		loselock = 1;
33366152Spendry 	} else {
33469447Smckusick 		vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
33566152Spendry 		loselock = 0;
33666152Spendry 	}
33766034Spendry 	if (um->um_lowervp)
33866034Spendry 		VREF(um->um_lowervp);
33965990Spendry 	error = union_allocvp(vpp, mp,
34065990Spendry 			      (struct vnode *) 0,
34165990Spendry 			      (struct vnode *) 0,
34265935Spendry 			      (struct componentname *) 0,
34365935Spendry 			      um->um_uppervp,
34468078Spendry 			      um->um_lowervp,
34568078Spendry 			      1);
34665965Spendry 
34765965Spendry 	if (error) {
348*69586Spendry 		if (loselock)
349*69586Spendry 			vrele(um->um_uppervp);
350*69586Spendry 		else
351*69586Spendry 			vput(um->um_uppervp);
35266034Spendry 		if (um->um_lowervp)
35366034Spendry 			vrele(um->um_lowervp);
35465965Spendry 	} else {
35566152Spendry 		if (loselock)
35666152Spendry 			VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
35765965Spendry 	}
35865935Spendry 
35965935Spendry 	return (error);
36065935Spendry }
36165935Spendry 
36265935Spendry int
union_statfs(mp,sbp,p)36365935Spendry union_statfs(mp, sbp, p)
36465935Spendry 	struct mount *mp;
36565935Spendry 	struct statfs *sbp;
36665935Spendry 	struct proc *p;
36765935Spendry {
36865935Spendry 	int error;
36965935Spendry 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
37065935Spendry 	struct statfs mstat;
37165935Spendry 	int lbsize;
37265935Spendry 
37365935Spendry #ifdef UNION_DIAGNOSTIC
37465935Spendry 	printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
37565935Spendry 			um->um_lowervp,
37665935Spendry 	       		um->um_uppervp);
37765935Spendry #endif
37865935Spendry 
37965935Spendry 	bzero(&mstat, sizeof(mstat));
38065935Spendry 
38166034Spendry 	if (um->um_lowervp) {
38266034Spendry 		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
38366034Spendry 		if (error)
38466034Spendry 			return (error);
38566034Spendry 	}
38665935Spendry 
38765935Spendry 	/* now copy across the "interesting" information and fake the rest */
38865935Spendry #if 0
38965935Spendry 	sbp->f_type = mstat.f_type;
39065935Spendry 	sbp->f_flags = mstat.f_flags;
39165935Spendry 	sbp->f_bsize = mstat.f_bsize;
39265935Spendry 	sbp->f_iosize = mstat.f_iosize;
39365935Spendry #endif
39465935Spendry 	lbsize = mstat.f_bsize;
39565935Spendry 	sbp->f_blocks = mstat.f_blocks;
39665935Spendry 	sbp->f_bfree = mstat.f_bfree;
39765935Spendry 	sbp->f_bavail = mstat.f_bavail;
39865935Spendry 	sbp->f_files = mstat.f_files;
39965935Spendry 	sbp->f_ffree = mstat.f_ffree;
40065935Spendry 
40165935Spendry 	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
40265935Spendry 	if (error)
40365935Spendry 		return (error);
40465935Spendry 
40565935Spendry 	sbp->f_flags = mstat.f_flags;
40665935Spendry 	sbp->f_bsize = mstat.f_bsize;
40765935Spendry 	sbp->f_iosize = mstat.f_iosize;
40865935Spendry 
40965935Spendry 	/*
41065935Spendry 	 * if the lower and upper blocksizes differ, then frig the
41165935Spendry 	 * block counts so that the sizes reported by df make some
41265935Spendry 	 * kind of sense.  none of this makes sense though.
41365935Spendry 	 */
41465935Spendry 
41568108Spendry 	if (mstat.f_bsize != lbsize)
41665935Spendry 		sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
41768108Spendry 
41868108Spendry 	/*
41968108Spendry 	 * The "total" fields count total resources in all layers,
42068108Spendry 	 * the "free" fields count only those resources which are
42168108Spendry 	 * free in the upper layer (since only the upper layer
42268108Spendry 	 * is writeable).
42368108Spendry 	 */
42465935Spendry 	sbp->f_blocks += mstat.f_blocks;
42568108Spendry 	sbp->f_bfree = mstat.f_bfree;
42668108Spendry 	sbp->f_bavail = mstat.f_bavail;
42765935Spendry 	sbp->f_files += mstat.f_files;
42868108Spendry 	sbp->f_ffree = mstat.f_ffree;
42965935Spendry 
43065935Spendry 	if (sbp != &mp->mnt_stat) {
43168625Smckusick 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
43265935Spendry 		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
43365935Spendry 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
43465935Spendry 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
43565935Spendry 	}
43665935Spendry 	return (0);
43765935Spendry }
43865935Spendry 
43968625Smckusick /*
44068625Smckusick  * XXX - Assumes no data cached at union layer.
44168625Smckusick  */
44268625Smckusick #define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \
44368625Smckusick 	    struct proc *)))nullop)
44465935Spendry 
44568625Smckusick #define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \
44668625Smckusick 	    struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp)
44768625Smckusick int union_init __P((struct vfsconf *));
44868625Smckusick #define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
44968625Smckusick 	    struct proc *)))eopnotsupp)
45068625Smckusick #define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
45168625Smckusick 	    size_t, struct proc *)))eopnotsupp)
45268625Smckusick #define union_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
45368625Smckusick 	    eopnotsupp)
45468625Smckusick #define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
45565935Spendry 
45665935Spendry struct vfsops union_vfsops = {
45765935Spendry 	union_mount,
45865935Spendry 	union_start,
45965935Spendry 	union_unmount,
46065935Spendry 	union_root,
46165935Spendry 	union_quotactl,
46265935Spendry 	union_statfs,
46365935Spendry 	union_sync,
46465935Spendry 	union_vget,
46565935Spendry 	union_fhtovp,
46665935Spendry 	union_vptofh,
46765935Spendry 	union_init,
46868625Smckusick 	union_sysctl,
46965935Spendry };
470