miscfs/nullfs/null_vnops.c

54753Sjohnh/*
*63245Sbostic * Copyright (c) 1992, 1993
*63245Sbostic *	The Regents of the University of California.  All rights reserved.
54753Sjohnh *
54951Sheideman * This code is derived from software contributed to Berkeley by
54951Sheideman * John Heidemann of the UCLA Ficus project.
54753Sjohnh *
54753Sjohnh * %sccs.include.redist.c%
54753Sjohnh *
*63245Sbostic *	@(#)null_vnops.c	8.1 (Berkeley) 06/10/93
54766Sjohnh *
54766Sjohnh * Ancestors:
54753Sjohnh *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
54766Sjohnh *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
54766Sjohnh *	...and...
54766Sjohnh *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
54753Sjohnh */
54753Sjohnh
54753Sjohnh/*
54766Sjohnh * Null Layer
54766Sjohnh *
54951Sheideman * (See mount_null(8) for more information.)
54951Sheideman *
54766Sjohnh * The null layer duplicates a portion of the file system
54766Sjohnh * name space under a new name.  In this respect, it is
54766Sjohnh * similar to the loopback file system.  It differs from
54766Sjohnh * the loopback fs in two respects:  it is implemented using
54951Sheideman * a stackable layers techniques, and it's "null-node"s stack above
54766Sjohnh * all lower-layer vnodes, not just over directory vnodes.
54766Sjohnh *
54951Sheideman * The null layer has two purposes.  First, it serves as a demonstration
54951Sheideman * of layering by proving a layer which does nothing.  (It actually
54951Sheideman * does everything the loopback file system does, which is slightly
54951Sheideman * more than nothing.)  Second, the null layer can serve as a prototype
54951Sheideman * layer.  Since it provides all necessary layer framework,
54951Sheideman * new file system layers can be created very easily be starting
54951Sheideman * with a null layer.
54951Sheideman *
54951Sheideman * The remainder of this man page examines the null layer as a basis
54951Sheideman * for constructing new layers.
54951Sheideman *
54951Sheideman *
54951Sheideman * INSTANTIATING NEW NULL LAYERS
54951Sheideman *
54951Sheideman * New null layers are created with mount_null(8).
54951Sheideman * Mount_null(8) takes two arguments, the pathname
54951Sheideman * of the lower vfs (target-pn) and the pathname where the null
54951Sheideman * layer will appear in the namespace (alias-pn).  After
54951Sheideman * the null layer is put into place, the contents
54951Sheideman * of target-pn subtree will be aliased under alias-pn.
54951Sheideman *
54951Sheideman *
54951Sheideman * OPERATION OF A NULL LAYER
54951Sheideman *
54766Sjohnh * The null layer is the minimum file system layer,
54766Sjohnh * simply bypassing all possible operations to the lower layer
54951Sheideman * for processing there.  The majority of its activity centers
54951Sheideman * on the bypass routine, though which nearly all vnode operations
54951Sheideman * pass.
54766Sjohnh *
54951Sheideman * The bypass routine accepts arbitrary vnode operations for
54951Sheideman * handling by the lower layer.  It begins by examing vnode
54951Sheideman * operation arguments and replacing any null-nodes by their
54951Sheideman * lower-layer equivlants.  It then invokes the operation
54951Sheideman * on the lower layer.  Finally, it replaces the null-nodes
54951Sheideman * in the arguments and, if a vnode is return by the operation,
54951Sheideman * stacks a null-node on top of the returned vnode.
54951Sheideman *
54951Sheideman * Although bypass handles most operations,
54951Sheideman * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
54951Sheideman * Vop_getattr must change the fsid being returned.
54951Sheideman * Vop_inactive and vop_reclaim are not bypassed so that
54766Sjohnh * they can handle freeing null-layer specific data.
54951Sheideman * Vop_print is not bypassed to avoid excessive debugging
54951Sheideman * information.
54766Sjohnh *
54893Sheideman *
54951Sheideman * INSTANTIATING VNODE STACKS
54893Sheideman *
54951Sheideman * Mounting associates the null layer with a lower layer,
54951Sheideman * effect stacking two VFSes.  Vnode stacks are instead
54951Sheideman * created on demand as files are accessed.
54893Sheideman *
54951Sheideman * The initial mount creates a single vnode stack for the
54951Sheideman * root of the new null layer.  All other vnode stacks
54951Sheideman * are created as a result of vnode operations on
54951Sheideman * this or other null vnode stacks.
54893Sheideman *
54951Sheideman * New vnode stacks come into existance as a result of
54951Sheideman * an operation which returns a vnode.
54951Sheideman * The bypass routine stacks a null-node above the new
54951Sheideman * vnode before returning it to the caller.
54893Sheideman *
54951Sheideman * For example, imagine mounting a null layer with
54951Sheideman * "mount_null /usr/include /dev/layer/null".
55025Smckusick * Changing directory to /dev/layer/null will assign
54951Sheideman * the root null-node (which was created when the null layer was mounted).
54951Sheideman * Now consider opening "sys".  A vop_lookup would be
54951Sheideman * done on the root null-node.  This operation would bypass through
54951Sheideman * to the lower layer which would return a vnode representing
54951Sheideman * the UFS "sys".  Null_bypass then builds a null-node
54951Sheideman * aliasing the UFS "sys" and returns this to the caller.
54951Sheideman * Later operations on the null-node "sys" will repeat this
54951Sheideman * process when constructing other vnode stacks.
54951Sheideman *
54951Sheideman *
54951Sheideman * CREATING OTHER FILE SYSTEM LAYERS
54951Sheideman *
54893Sheideman * One of the easiest ways to construct new file system layers is to make
54893Sheideman * a copy of the null layer, rename all files and variables, and
54893Sheideman * then begin modifing the copy.  Sed can be used to easily rename
54893Sheideman * all variables.
54893Sheideman *
54951Sheideman * The umap layer is an example of a layer descended from the
54951Sheideman * null layer.
54951Sheideman *
54951Sheideman *
54951Sheideman * INVOKING OPERATIONS ON LOWER LAYERS
54951Sheideman *
54951Sheideman * There are two techniques to invoke operations on a lower layer
54951Sheideman * when the operation cannot be completely bypassed.  Each method
54951Sheideman * is appropriate in different situations.  In both cases,
54951Sheideman * it is the responsibility of the aliasing layer to make
54951Sheideman * the operation arguments "correct" for the lower layer
54951Sheideman * by mapping an vnode arguments to the lower layer.
54951Sheideman *
54951Sheideman * The first approach is to call the aliasing layer's bypass routine.
54951Sheideman * This method is most suitable when you wish to invoke the operation
54951Sheideman * currently being hanldled on the lower layer.  It has the advantage
55025Smckusick * that the bypass routine already must do argument mapping.
54951Sheideman * An example of this is null_getattrs in the null layer.
54951Sheideman *
54951Sheideman * A second approach is to directly invoked vnode operations on
54951Sheideman * the lower layer with the VOP_OPERATIONNAME interface.
54951Sheideman * The advantage of this method is that it is easy to invoke
54951Sheideman * arbitrary operations on the lower layer.  The disadvantage
54951Sheideman * is that vnodes arguments must be manualy mapped.
54951Sheideman *
54753Sjohnh */
54753Sjohnh
54753Sjohnh#include <sys/param.h>
54753Sjohnh#include <sys/systm.h>
54753Sjohnh#include <sys/proc.h>
54753Sjohnh#include <sys/time.h>
54753Sjohnh#include <sys/types.h>
54753Sjohnh#include <sys/vnode.h>
54753Sjohnh#include <sys/mount.h>
54753Sjohnh#include <sys/namei.h>
54753Sjohnh#include <sys/malloc.h>
54753Sjohnh#include <sys/buf.h>
55025Smckusick#include <miscfs/nullfs/null.h>
54753Sjohnh
54753Sjohnh
54766Sjohnhint null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
54753Sjohnh
54753Sjohnh/*
54766Sjohnh * This is the 10-Apr-92 bypass routine.
54766Sjohnh *    This version has been optimized for speed, throwing away some
54766Sjohnh * safety checks.  It should still always work, but it's not as
54766Sjohnh * robust to programmer errors.
54766Sjohnh *    Define SAFETY to include some error checking code.
54766Sjohnh *
54766Sjohnh * In general, we map all vnodes going down and unmap them on the way back.
54766Sjohnh * As an exception to this, vnodes can be marked "unmapped" by setting
54766Sjohnh * the Nth bit in operation's vdesc_flags.
54766Sjohnh *
54766Sjohnh * Also, some BSD vnode operations have the side effect of vrele'ing
54766Sjohnh * their arguments.  With stacking, the reference counts are held
54766Sjohnh * by the upper node, not the lower one, so we must handle these
54766Sjohnh * side-effects here.  This is not of concern in Sun-derived systems
54766Sjohnh * since there are no such side-effects.
54766Sjohnh *
54766Sjohnh * This makes the following assumptions:
54766Sjohnh * - only one returned vpp
54766Sjohnh * - no INOUT vpp's (Sun's vop_open has one of these)
54766Sjohnh * - the vnode operation vector of the first vnode should be used
54766Sjohnh *   to determine what implementation of the op should be invoked
54766Sjohnh * - all mapped vnodes are of our vnode-type (NEEDSWORK:
54766Sjohnh *   problems on rmdir'ing mount points and renaming?)
54766Sjohnh */
54766Sjohnhint
54766Sjohnhnull_bypass(ap)
55025Smckusick	struct vop_generic_args /* {
55025Smckusick		struct vnodeop_desc *a_desc;
55025Smckusick		<other random data follows, presumably>
55025Smckusick	} */ *ap;
54753Sjohnh{
54893Sheideman	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
54893Sheideman	register struct vnode **this_vp_p;
54753Sjohnh	int error;
54766Sjohnh	struct vnode *old_vps[VDESC_MAX_VPS];
54766Sjohnh	struct vnode **vps_p[VDESC_MAX_VPS];
54766Sjohnh	struct vnode ***vppp;
54766Sjohnh	struct vnodeop_desc *descp = ap->a_desc;
54893Sheideman	int reles, i;
54753Sjohnh
54766Sjohnh	if (null_bug_bypass)
54766Sjohnh		printf ("null_bypass: %s\n", descp->vdesc_name);
54753Sjohnh
54766Sjohnh#ifdef SAFETY
54753Sjohnh	/*
54766Sjohnh	 * We require at least one vp.
54753Sjohnh	 */
54938Sheideman	if (descp->vdesc_vp_offsets == NULL ||
54938Sheideman	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
54766Sjohnh		panic ("null_bypass: no vp's in map.\n");
54753Sjohnh#endif
54753Sjohnh
54753Sjohnh	/*
54766Sjohnh	 * Map the vnodes going in.
54766Sjohnh	 * Later, we'll invoke the operation based on
54766Sjohnh	 * the first mapped vnode's operation vector.
54753Sjohnh	 */
54893Sheideman	reles = descp->vdesc_flags;
54938Sheideman	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
54938Sheideman		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
54766Sjohnh			break;   /* bail out at end of list */
54766Sjohnh		vps_p[i] = this_vp_p =
54766Sjohnh			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
54893Sheideman		/*
54893Sheideman		 * We're not guaranteed that any but the first vnode
54893Sheideman		 * are of our type.  Check for and don't map any
54951Sheideman		 * that aren't.  (We must always map first vp or vclean fails.)
54893Sheideman		 */
54944Sheideman		if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
54893Sheideman			old_vps[i] = NULL;
54893Sheideman		} else {
54893Sheideman			old_vps[i] = *this_vp_p;
54893Sheideman			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
54938Sheideman			/*
54938Sheideman			 * XXX - Several operations have the side effect
54938Sheideman			 * of vrele'ing their vp's.  We must account for
54938Sheideman			 * that.  (This should go away in the future.)
54938Sheideman			 */
54893Sheideman			if (reles & 1)
54893Sheideman				VREF(*this_vp_p);
54938Sheideman		}
54766Sjohnh
54938Sheideman	}
54753Sjohnh
54753Sjohnh	/*
54766Sjohnh	 * Call the operation on the lower layer
54766Sjohnh	 * with the modified argument structure.
54753Sjohnh	 */
54766Sjohnh	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
54753Sjohnh
54753Sjohnh	/*
54766Sjohnh	 * Maintain the illusion of call-by-value
54766Sjohnh	 * by restoring vnodes in the argument structure
54766Sjohnh	 * to their original value.
54753Sjohnh	 */
54893Sheideman	reles = descp->vdesc_flags;
54938Sheideman	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
54938Sheideman		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
54766Sjohnh			break;   /* bail out at end of list */
54893Sheideman		if (old_vps[i]) {
54893Sheideman			*(vps_p[i]) = old_vps[i];
54893Sheideman			if (reles & 1)
54893Sheideman				vrele(*(vps_p[i]));
54938Sheideman		}
54938Sheideman	}
54766Sjohnh
54753Sjohnh	/*
54938Sheideman	 * Map the possible out-going vpp
54938Sheideman	 * (Assumes that the lower layer always returns
54938Sheideman	 * a VREF'ed vpp unless it gets an error.)
54753Sjohnh	 */
54766Sjohnh	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
54766Sjohnh	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
54766Sjohnh	    !error) {
54938Sheideman		/*
54938Sheideman		 * XXX - even though some ops have vpp returned vp's,
54938Sheideman		 * several ops actually vrele this before returning.
54938Sheideman		 * We must avoid these ops.
54951Sheideman		 * (This should go away when these ops are regularized.)
54938Sheideman		 */
54951Sheideman		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
54951Sheideman			goto out;
54938Sheideman		vppp = VOPARG_OFFSETTO(struct vnode***,
54766Sjohnh				 descp->vdesc_vpp_offset,ap);
54893Sheideman		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
54938Sheideman	}
54753Sjohnh
54951Sheideman out:
54766Sjohnh	return (error);
54753Sjohnh}
54753Sjohnh
54753Sjohnh
54753Sjohnh/*
54951Sheideman *  We handle getattr only to change the fsid.
54753Sjohnh */
54766Sjohnhint
54766Sjohnhnull_getattr(ap)
55025Smckusick	struct vop_getattr_args /* {
55025Smckusick		struct vnode *a_vp;
55025Smckusick		struct vattr *a_vap;
55025Smckusick		struct ucred *a_cred;
55025Smckusick		struct proc *a_p;
55025Smckusick	} */ *ap;
54753Sjohnh{
54753Sjohnh	int error;
54938Sheideman	if (error = null_bypass(ap))
55025Smckusick		return (error);
54766Sjohnh	/* Requires that arguments be restored. */
54766Sjohnh	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
55025Smckusick	return (0);
54766Sjohnh}
54753Sjohnh
54753Sjohnh
54766Sjohnhint
55025Smckusicknull_inactive(ap)
55025Smckusick	struct vop_inactive_args /* {
55025Smckusick		struct vnode *a_vp;
55025Smckusick	} */ *ap;
54753Sjohnh{
54766Sjohnh	/*
54766Sjohnh	 * Do nothing (and _don't_ bypass).
54766Sjohnh	 * Wait to vrele lowervp until reclaim,
54766Sjohnh	 * so that until then our null_node is in the
54766Sjohnh	 * cache and reusable.
54766Sjohnh	 *
54766Sjohnh	 * NEEDSWORK: Someday, consider inactive'ing
54766Sjohnh	 * the lowervp and then trying to reactivate it
54951Sheideman	 * with capabilities (v_id)
54766Sjohnh	 * like they do in the name lookup cache code.
54766Sjohnh	 * That's too much work for now.
54766Sjohnh	 */
55025Smckusick	return (0);
54753Sjohnh}
54753Sjohnh
54938Sheidemanint
55025Smckusicknull_reclaim(ap)
55025Smckusick	struct vop_reclaim_args /* {
55025Smckusick		struct vnode *a_vp;
55025Smckusick	} */ *ap;
54753Sjohnh{
54938Sheideman	struct vnode *vp = ap->a_vp;
54938Sheideman	struct null_node *xp = VTONULL(vp);
54938Sheideman	struct vnode *lowervp = xp->null_lowervp;
54938Sheideman
54938Sheideman	/*
54951Sheideman	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
54938Sheideman	 * so we can't call VOPs on ourself.
54938Sheideman	 */
54938Sheideman	/* After this assignment, this node will not be re-used. */
54938Sheideman	xp->null_lowervp = NULL;
54938Sheideman	remque(xp);
54938Sheideman	FREE(vp->v_data, M_TEMP);
54938Sheideman	vp->v_data = NULL;
54938Sheideman	vrele (lowervp);
55025Smckusick	return (0);
54753Sjohnh}
54753Sjohnh
54951Sheideman
54938Sheidemanint
55025Smckusicknull_print(ap)
55025Smckusick	struct vop_print_args /* {
55025Smckusick		struct vnode *a_vp;
55025Smckusick	} */ *ap;
54753Sjohnh{
54951Sheideman	register struct vnode *vp = ap->a_vp;
54951Sheideman	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
55025Smckusick	return (0);
54753Sjohnh}
54753Sjohnh
54951Sheideman
54951Sheideman/*
54951Sheideman * XXX - vop_strategy must be hand coded because it has no
54951Sheideman * vnode in its arguments.
54951Sheideman * This goes away with a merged VM/buffer cache.
54951Sheideman */
54938Sheidemanint
55025Smckusicknull_strategy(ap)
55025Smckusick	struct vop_strategy_args /* {
55025Smckusick		struct buf *a_bp;
55025Smckusick	} */ *ap;
54753Sjohnh{
54951Sheideman	struct buf *bp = ap->a_bp;
54753Sjohnh	int error;
54766Sjohnh	struct vnode *savedvp;
54753Sjohnh
54951Sheideman	savedvp = bp->b_vp;
54951Sheideman	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
54753Sjohnh
54951Sheideman	error = VOP_STRATEGY(bp);
54753Sjohnh
54951Sheideman	bp->b_vp = savedvp;
54753Sjohnh
55025Smckusick	return (error);
54753Sjohnh}
54753Sjohnh
54766Sjohnh
54951Sheideman/*
54951Sheideman * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
54951Sheideman * vnode in its arguments.
54951Sheideman * This goes away with a merged VM/buffer cache.
54951Sheideman */
54766Sjohnhint
55025Smckusicknull_bwrite(ap)
55025Smckusick	struct vop_bwrite_args /* {
55025Smckusick		struct buf *a_bp;
55025Smckusick	} */ *ap;
54753Sjohnh{
54951Sheideman	struct buf *bp = ap->a_bp;
54951Sheideman	int error;
54951Sheideman	struct vnode *savedvp;
54753Sjohnh
54951Sheideman	savedvp = bp->b_vp;
54951Sheideman	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
54753Sjohnh
54951Sheideman	error = VOP_BWRITE(bp);
54951Sheideman
54951Sheideman	bp->b_vp = savedvp;
54951Sheideman
55025Smckusick	return (error);
54944Sheideman}
54944Sheideman
54753Sjohnh/*
54766Sjohnh * Global vfs data structures
54753Sjohnh */
54766Sjohnhint (**null_vnodeop_p)();
54893Sheidemanstruct vnodeopv_entry_desc null_vnodeop_entries[] = {
54766Sjohnh	{ &vop_default_desc, null_bypass },
54753Sjohnh
54766Sjohnh	{ &vop_getattr_desc, null_getattr },
54766Sjohnh	{ &vop_inactive_desc, null_inactive },
54766Sjohnh	{ &vop_reclaim_desc, null_reclaim },
54766Sjohnh	{ &vop_print_desc, null_print },
54753Sjohnh
54766Sjohnh	{ &vop_strategy_desc, null_strategy },
54951Sheideman	{ &vop_bwrite_desc, null_bwrite },
54753Sjohnh
54753Sjohnh	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
54753Sjohnh};
54893Sheidemanstruct vnodeopv_desc null_vnodeop_opv_desc =
54893Sheideman	{ &null_vnodeop_p, null_vnodeop_entries };