xref: /csrg-svn/sys/miscfs/nullfs/null_vnops.c (revision 54944)
154753Sjohnh /*
254753Sjohnh  * Copyright (c) 1992 The Regents of the University of California
354753Sjohnh  * All rights reserved.
454753Sjohnh  *
554766Sjohnh  * This code is derived from the null layer of
654893Sheideman  * John Heidemann from the UCLA Ficus project and
754893Sheideman  * Jan-Simon Pendry's loopback file system.
854753Sjohnh  *
954753Sjohnh  * %sccs.include.redist.c%
1054753Sjohnh  *
11*54944Sheideman  *	@(#)null_vnops.c	1.6 (Berkeley) 07/11/92
1254766Sjohnh  *
1354766Sjohnh  * Ancestors:
1454753Sjohnh  *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
1554766Sjohnh  *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
1654766Sjohnh  *	...and...
1754766Sjohnh  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
1854753Sjohnh  */
1954753Sjohnh 
2054753Sjohnh /*
2154766Sjohnh  * Null Layer
2254766Sjohnh  *
2354766Sjohnh  * The null layer duplicates a portion of the file system
2454766Sjohnh  * name space under a new name.  In this respect, it is
2554766Sjohnh  * similar to the loopback file system.  It differs from
2654766Sjohnh  * the loopback fs in two respects:  it is implemented using
2754893Sheideman  * a bypass operation, and it's "null-node"s stack above
2854766Sjohnh  * all lower-layer vnodes, not just over directory vnodes.
2954766Sjohnh  *
3054766Sjohnh  * The null layer is the minimum file system layer,
3154766Sjohnh  * simply bypassing all possible operations to the lower layer
3254766Sjohnh  * for processing there.  All but vop_getattr, _inactive, _reclaim,
3354766Sjohnh  * and _print are bypassed.
3454766Sjohnh  *
3554766Sjohnh  * Vop_getattr is not bypassed so that we can change the fsid being
3654766Sjohnh  * returned.  Vop_{inactive,reclaim} are bypassed so that
3754766Sjohnh  * they can handle freeing null-layer specific data.
3854766Sjohnh  * Vop_print is not bypassed for debugging.
3954766Sjohnh  *
4054893Sheideman  *
4154893Sheideman  * INVOKING OPERATIONS ON LOWER LAYERS
4254893Sheideman  *
4354938Sheideman  *
4454766Sjohnh  * NEEDSWORK: Describe methods to invoke operations on the lower layer
4554766Sjohnh  * (bypass vs. VOP).
4654893Sheideman  *
4754893Sheideman  *
4854893Sheideman  * CREATING NEW FILESYSTEM LAYERS
4954893Sheideman  *
5054893Sheideman  * One of the easiest ways to construct new file system layers is to make
5154893Sheideman  * a copy of the null layer, rename all files and variables, and
5254893Sheideman  * then begin modifing the copy.  Sed can be used to easily rename
5354893Sheideman  * all variables.
5454893Sheideman  *
5554753Sjohnh  */
5654753Sjohnh 
5754753Sjohnh #include <sys/param.h>
5854753Sjohnh #include <sys/systm.h>
5954753Sjohnh #include <sys/proc.h>
6054753Sjohnh #include <sys/time.h>
6154753Sjohnh #include <sys/types.h>
6254753Sjohnh #include <sys/vnode.h>
6354753Sjohnh #include <sys/mount.h>
6454753Sjohnh #include <sys/namei.h>
6554753Sjohnh #include <sys/malloc.h>
6654753Sjohnh #include <sys/buf.h>
6754893Sheideman #include <nullfs/null.h>
6854753Sjohnh 
6954753Sjohnh 
7054766Sjohnh int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
7154753Sjohnh 
7254753Sjohnh /*
7354766Sjohnh  * This is the 10-Apr-92 bypass routine.
7454766Sjohnh  *    This version has been optimized for speed, throwing away some
7554766Sjohnh  * safety checks.  It should still always work, but it's not as
7654766Sjohnh  * robust to programmer errors.
7754766Sjohnh  *    Define SAFETY to include some error checking code.
7854766Sjohnh  *
7954766Sjohnh  * In general, we map all vnodes going down and unmap them on the way back.
8054766Sjohnh  * As an exception to this, vnodes can be marked "unmapped" by setting
8154766Sjohnh  * the Nth bit in operation's vdesc_flags.
8254766Sjohnh  *
8354766Sjohnh  * Also, some BSD vnode operations have the side effect of vrele'ing
8454766Sjohnh  * their arguments.  With stacking, the reference counts are held
8554766Sjohnh  * by the upper node, not the lower one, so we must handle these
8654766Sjohnh  * side-effects here.  This is not of concern in Sun-derived systems
8754766Sjohnh  * since there are no such side-effects.
8854766Sjohnh  *
8954766Sjohnh  * This makes the following assumptions:
9054766Sjohnh  * - only one returned vpp
9154766Sjohnh  * - no INOUT vpp's (Sun's vop_open has one of these)
9254766Sjohnh  * - the vnode operation vector of the first vnode should be used
9354766Sjohnh  *   to determine what implementation of the op should be invoked
9454766Sjohnh  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
9554766Sjohnh  *   problems on rmdir'ing mount points and renaming?)
9654766Sjohnh  */
9754766Sjohnh int
9854766Sjohnh null_bypass(ap)
9954893Sheideman 	struct vop_generic_args *ap;
10054753Sjohnh {
10154893Sheideman 	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
10254893Sheideman 	register struct vnode **this_vp_p;
10354753Sjohnh 	int error;
10454766Sjohnh 	struct vnode *old_vps[VDESC_MAX_VPS];
10554766Sjohnh 	struct vnode **vps_p[VDESC_MAX_VPS];
10654766Sjohnh 	struct vnode ***vppp;
10754766Sjohnh 	struct vnodeop_desc *descp = ap->a_desc;
10854893Sheideman 	int reles, i;
10954753Sjohnh 
11054766Sjohnh 	if (null_bug_bypass)
11154766Sjohnh 		printf ("null_bypass: %s\n", descp->vdesc_name);
11254753Sjohnh 
11354766Sjohnh #ifdef SAFETY
11454753Sjohnh 	/*
11554766Sjohnh 	 * We require at least one vp.
11654753Sjohnh 	 */
11754938Sheideman 	if (descp->vdesc_vp_offsets == NULL ||
11854938Sheideman 	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
11954766Sjohnh 		panic ("null_bypass: no vp's in map.\n");
12054753Sjohnh #endif
12154753Sjohnh 
12254753Sjohnh 	/*
12354766Sjohnh 	 * Map the vnodes going in.
12454766Sjohnh 	 * Later, we'll invoke the operation based on
12554766Sjohnh 	 * the first mapped vnode's operation vector.
12654753Sjohnh 	 */
12754893Sheideman 	reles = descp->vdesc_flags;
12854938Sheideman 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
12954938Sheideman 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
13054766Sjohnh 			break;   /* bail out at end of list */
13154766Sjohnh 		vps_p[i] = this_vp_p =
13254766Sjohnh 			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
13354893Sheideman 		/*
13454893Sheideman 		 * We're not guaranteed that any but the first vnode
13554893Sheideman 		 * are of our type.  Check for and don't map any
136*54944Sheideman 		 * that aren't.  (Must map first vp or vclean fails.)
13754893Sheideman 		 */
138*54944Sheideman 		if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
13954893Sheideman 			old_vps[i] = NULL;
14054893Sheideman 		} else {
14154893Sheideman 			old_vps[i] = *this_vp_p;
14254893Sheideman 			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
14354938Sheideman 			/*
14454938Sheideman 			 * XXX - Several operations have the side effect
14554938Sheideman 			 * of vrele'ing their vp's.  We must account for
14654938Sheideman 			 * that.  (This should go away in the future.)
14754938Sheideman 			 */
14854893Sheideman 			if (reles & 1)
14954893Sheideman 				VREF(*this_vp_p);
15054938Sheideman 		}
15154766Sjohnh 
15254938Sheideman 	}
15354753Sjohnh 
15454753Sjohnh 	/*
15554766Sjohnh 	 * Call the operation on the lower layer
15654766Sjohnh 	 * with the modified argument structure.
15754753Sjohnh 	 */
15854766Sjohnh 	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
15954753Sjohnh 
16054753Sjohnh 	/*
16154766Sjohnh 	 * Maintain the illusion of call-by-value
16254766Sjohnh 	 * by restoring vnodes in the argument structure
16354766Sjohnh 	 * to their original value.
16454753Sjohnh 	 */
16554893Sheideman 	reles = descp->vdesc_flags;
16654938Sheideman 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
16754938Sheideman 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
16854766Sjohnh 			break;   /* bail out at end of list */
16954893Sheideman 		if (old_vps[i]) {
17054893Sheideman 			*(vps_p[i]) = old_vps[i];
17154893Sheideman 			if (reles & 1)
17254893Sheideman 				vrele(*(vps_p[i]));
17354938Sheideman 		}
17454938Sheideman 	}
17554766Sjohnh 
17654753Sjohnh 	/*
17754938Sheideman 	 * Map the possible out-going vpp
17854938Sheideman 	 * (Assumes that the lower layer always returns
17954938Sheideman 	 * a VREF'ed vpp unless it gets an error.)
18054753Sjohnh 	 */
18154766Sjohnh 	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
18254766Sjohnh 	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
18354766Sjohnh 	    !error) {
18454938Sheideman 		/*
18554938Sheideman 		 * XXX - even though some ops have vpp returned vp's,
18654938Sheideman 		 * several ops actually vrele this before returning.
18754938Sheideman 		 * We must avoid these ops.
18854938Sheideman 		 * (This should go away.)
18954938Sheideman 		 */
19054938Sheideman 		if (descp->vdesc_flags & VDESC_VPP_WILLRELE) {
19154938Sheideman #ifdef NULLFS_DIAGNOSTIC
19254938Sheideman 			printf("null_bypass (%s), lowervpp->usecount = %d\n", vdesc->vdesc_name, (**vppp)->v_usecount);
19354938Sheideman #endif
19454938Sheideman 			return (error);
19554938Sheideman 		}
19654938Sheideman 		vppp = VOPARG_OFFSETTO(struct vnode***,
19754766Sjohnh 				 descp->vdesc_vpp_offset,ap);
19854893Sheideman 		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
19954938Sheideman 	}
20054753Sjohnh 
20154766Sjohnh 	return (error);
20254753Sjohnh }
20354753Sjohnh 
20454753Sjohnh 
20554753Sjohnh /*
20654766Sjohnh  *  We handle getattr to change the fsid.
20754753Sjohnh  */
20854766Sjohnh int
20954766Sjohnh null_getattr(ap)
21054893Sheideman 	struct vop_getattr_args *ap;
21154753Sjohnh {
21254753Sjohnh 	int error;
21354938Sheideman 	if (error = null_bypass(ap))
21454766Sjohnh 		return error;
21554766Sjohnh 	/* Requires that arguments be restored. */
21654766Sjohnh 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
21754766Sjohnh 	return 0;
21854766Sjohnh }
21954753Sjohnh 
22054938Sheideman /*
22154938Sheideman  * XXX - Ideally inactive does not release the lowervp
22254938Sheideman  * so the null_node can stay around in the cache and be reused.
22354938Sheideman  * Unfortunately, this currently causes "locking against self"
22454938Sheideman  * problems in the UFS, so currently AVOID_CACHING hacks
22554938Sheideman  * around the bug.
22654938Sheideman  */
227*54944Sheideman /* #define AVOID_CACHING */
22854753Sjohnh 
22954766Sjohnh int
23054754Sjohnh null_inactive (ap)
23154753Sjohnh 	struct vop_inactive_args *ap;
23254753Sjohnh {
23354938Sheideman #ifdef AVOID_CACHING
23454938Sheideman 	struct vnode *vp = ap->a_vp;
23554938Sheideman 	struct null_node *xp = VTONULL(vp);
23654938Sheideman 	struct vnode *lowervp = xp->null_lowervp;
23754938Sheideman 
23854938Sheideman 	xp->null_lowervp = NULL;
23954938Sheideman 	remque(xp);
24054938Sheideman 	FREE(vp->v_data, M_TEMP);
24154938Sheideman 	vp->v_data = NULL;
24254938Sheideman 	vp->v_type = VBAD;   /* The node is clean (no reclaim needed). */
24354938Sheideman 	vrele (lowervp);
24454938Sheideman #else
245*54944Sheideman #ifdef DIAGNOSTIC  /* NEEDSWORK: goes away */
246*54944Sheideman 	if (VOP_ISLOCKED(NULLVPTOLOWERVP(ap->a_vp))) {
247*54944Sheideman 		panic ("null_inactive: inactive's lowervp is locked.");
24854938Sheideman 	};
24954753Sjohnh #endif
25054766Sjohnh 	/*
251*54944Sheideman 	 * Remember we're inactive so we
252*54944Sheideman 	 * don't send locks through.
253*54944Sheideman 	 */
254*54944Sheideman 	VTONULL(ap->a_vp)->null_isinactive = 1;
255*54944Sheideman 	/*
25654766Sjohnh 	 * Do nothing (and _don't_ bypass).
25754766Sjohnh 	 * Wait to vrele lowervp until reclaim,
25854766Sjohnh 	 * so that until then our null_node is in the
25954766Sjohnh 	 * cache and reusable.
26054766Sjohnh 	 *
26154766Sjohnh 	 * NEEDSWORK: Someday, consider inactive'ing
26254766Sjohnh 	 * the lowervp and then trying to reactivate it
26354766Sjohnh 	 * like they do in the name lookup cache code.
26454766Sjohnh 	 * That's too much work for now.
26554766Sjohnh 	 */
26654766Sjohnh 	return 0;
26754938Sheideman #endif
26854753Sjohnh }
26954753Sjohnh 
27054938Sheideman int
27154754Sjohnh null_reclaim (ap)
27254753Sjohnh 	struct vop_reclaim_args *ap;
27354753Sjohnh {
27454938Sheideman 	struct vnode *vp = ap->a_vp;
27554938Sheideman 	struct null_node *xp = VTONULL(vp);
27654938Sheideman 	struct vnode *lowervp = xp->null_lowervp;
27754938Sheideman 
27854938Sheideman #ifdef AVOID_CACHING
27954938Sheideman 	return 0;
28054938Sheideman #else
28154938Sheideman 	/*
28254938Sheideman 	 * Note: at this point, vp->v_op == dead_vnodeop_p,
28354938Sheideman 	 * so we can't call VOPs on ourself.
28454938Sheideman 	 */
28554938Sheideman 	/* After this assignment, this node will not be re-used. */
28654938Sheideman #ifdef DIAGNOSTIC
287*54944Sheideman 	/* XXX - this is only a bug if it's locked by ourselves */
288*54944Sheideman 	if (lowervp->v_usecount == 1 && VOP_ISLOCKED(lowervp)) {
28954938Sheideman 		panic("null_reclaim: lowervp is locked but must go away.");
29054938Sheideman 	};
29154753Sjohnh #endif
29254938Sheideman 	xp->null_lowervp = NULL;
29354938Sheideman 	remque(xp);
29454938Sheideman 	FREE(vp->v_data, M_TEMP);
29554938Sheideman 	vp->v_data = NULL;
29654938Sheideman 	vrele (lowervp);
29754938Sheideman 	return 0;
29854938Sheideman #endif
29954753Sjohnh }
30054753Sjohnh 
30154938Sheideman int
30254754Sjohnh null_bmap (ap)
30354753Sjohnh 	struct vop_bmap_args *ap;
30454753Sjohnh {
30554754Sjohnh #ifdef NULLFS_DIAGNOSTIC
30654893Sheideman 	printf("null_bmap(ap->a_vp = %x->%x)\n", ap->a_vp, NULLVPTOLOWERVP(ap->a_vp));
30754753Sjohnh #endif
30854753Sjohnh 
30954893Sheideman 	return VOP_BMAP(NULLVPTOLOWERVP(ap->a_vp), ap->a_bn, ap->a_vpp, ap->a_bnp);
31054753Sjohnh }
31154753Sjohnh 
31254938Sheideman int
31354754Sjohnh null_strategy (ap)
31454753Sjohnh 	struct vop_strategy_args *ap;
31554753Sjohnh {
31654753Sjohnh 	int error;
31754766Sjohnh 	struct vnode *savedvp;
31854753Sjohnh 
31954754Sjohnh #ifdef NULLFS_DIAGNOSTIC
32054893Sheideman 	printf("null_strategy(vp = %x->%x)\n", ap->a_bp->b_vp, NULLVPTOLOWERVP(ap->a_bp->b_vp));
32154753Sjohnh #endif
32254753Sjohnh 
32354766Sjohnh 	savedvp = ap->a_bp->b_vp;
32454753Sjohnh 
32554753Sjohnh 	error = VOP_STRATEGY(ap->a_bp);
32654753Sjohnh 
32754766Sjohnh 	ap->a_bp->b_vp = savedvp;
32854753Sjohnh 
32954766Sjohnh 	return error;
33054753Sjohnh }
33154753Sjohnh 
33254766Sjohnh 
33354766Sjohnh int
33454754Sjohnh null_print (ap)
33554753Sjohnh 	struct vop_print_args *ap;
33654753Sjohnh {
33754766Sjohnh 	register struct vnode *vp = ap->a_vp;
33854938Sheideman 	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
33954766Sjohnh 	return 0;
34054753Sjohnh }
34154753Sjohnh 
342*54944Sheideman #if 0
343*54944Sheideman int
344*54944Sheideman null_lock(ap)
345*54944Sheideman 	struct vop_lock_args *ap;
346*54944Sheideman {
347*54944Sheideman 	if (VTONULL(ap->a_vp)->null_isinactive)
348*54944Sheideman 		return 0;
349*54944Sheideman 	else return null_bypass(ap);
350*54944Sheideman }
35154753Sjohnh 
352*54944Sheideman int
353*54944Sheideman null_unlock(ap)
354*54944Sheideman 	struct vop_lock_args *ap;
355*54944Sheideman {
356*54944Sheideman 	if (VTONULL(ap->a_vp)->null_isinactive)
357*54944Sheideman 		return 0;
358*54944Sheideman 	else return null_bypass(ap);
359*54944Sheideman }
360*54944Sheideman #endif
361*54944Sheideman 
36254753Sjohnh /*
36354766Sjohnh  * Global vfs data structures
36454753Sjohnh  */
36554753Sjohnh /*
36654766Sjohnh  * NEEDSWORK: strategy,bmap are hand coded currently.  They should
36754766Sjohnh  * go away with a merged buffer/block cache.
36854766Sjohnh  *
36954753Sjohnh  */
37054766Sjohnh int (**null_vnodeop_p)();
37154893Sheideman struct vnodeopv_entry_desc null_vnodeop_entries[] = {
37254766Sjohnh 	{ &vop_default_desc, null_bypass },
37354753Sjohnh 
37454766Sjohnh 	{ &vop_getattr_desc, null_getattr },
37554766Sjohnh 	{ &vop_inactive_desc, null_inactive },
37654766Sjohnh 	{ &vop_reclaim_desc, null_reclaim },
37754766Sjohnh 	{ &vop_print_desc, null_print },
378*54944Sheideman #if 0
379*54944Sheideman 	{ &vop_lock_desc, null_lock },
380*54944Sheideman 	{ &vop_unlock_desc, null_unlock },
381*54944Sheideman #endif
38254753Sjohnh 
38354766Sjohnh 	{ &vop_bmap_desc, null_bmap },
38454766Sjohnh 	{ &vop_strategy_desc, null_strategy },
38554753Sjohnh 
38654753Sjohnh 	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
38754753Sjohnh };
38854893Sheideman struct vnodeopv_desc null_vnodeop_opv_desc =
38954893Sheideman 	{ &null_vnodeop_p, null_vnodeop_entries };
390