xref: /csrg-svn/sys/miscfs/nullfs/null_vnops.c (revision 54944)
1 /*
2  * Copyright (c) 1992 The Regents of the University of California
3  * All rights reserved.
4  *
5  * This code is derived from the null layer of
6  * John Heidemann from the UCLA Ficus project and
7  * Jan-Simon Pendry's loopback file system.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)null_vnops.c	1.6 (Berkeley) 07/11/92
12  *
13  * Ancestors:
14  *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
15  *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
16  *	...and...
17  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
18  */
19 
20 /*
21  * Null Layer
22  *
23  * The null layer duplicates a portion of the file system
24  * name space under a new name.  In this respect, it is
25  * similar to the loopback file system.  It differs from
26  * the loopback fs in two respects:  it is implemented using
27  * a bypass operation, and it's "null-node"s stack above
28  * all lower-layer vnodes, not just over directory vnodes.
29  *
30  * The null layer is the minimum file system layer,
31  * simply bypassing all possible operations to the lower layer
32  * for processing there.  All but vop_getattr, _inactive, _reclaim,
33  * and _print are bypassed.
34  *
35  * Vop_getattr is not bypassed so that we can change the fsid being
36  * returned.  Vop_{inactive,reclaim} are bypassed so that
37  * they can handle freeing null-layer specific data.
38  * Vop_print is not bypassed for debugging.
39  *
40  *
41  * INVOKING OPERATIONS ON LOWER LAYERS
42  *
43  *
44  * NEEDSWORK: Describe methods to invoke operations on the lower layer
45  * (bypass vs. VOP).
46  *
47  *
48  * CREATING NEW FILESYSTEM LAYERS
49  *
50  * One of the easiest ways to construct new file system layers is to make
51  * a copy of the null layer, rename all files and variables, and
52  * then begin modifing the copy.  Sed can be used to easily rename
53  * all variables.
54  *
55  */
56 
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/proc.h>
60 #include <sys/time.h>
61 #include <sys/types.h>
62 #include <sys/vnode.h>
63 #include <sys/mount.h>
64 #include <sys/namei.h>
65 #include <sys/malloc.h>
66 #include <sys/buf.h>
67 #include <nullfs/null.h>
68 
69 
70 int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
71 
72 /*
73  * This is the 10-Apr-92 bypass routine.
74  *    This version has been optimized for speed, throwing away some
75  * safety checks.  It should still always work, but it's not as
76  * robust to programmer errors.
77  *    Define SAFETY to include some error checking code.
78  *
79  * In general, we map all vnodes going down and unmap them on the way back.
80  * As an exception to this, vnodes can be marked "unmapped" by setting
81  * the Nth bit in operation's vdesc_flags.
82  *
83  * Also, some BSD vnode operations have the side effect of vrele'ing
84  * their arguments.  With stacking, the reference counts are held
85  * by the upper node, not the lower one, so we must handle these
86  * side-effects here.  This is not of concern in Sun-derived systems
87  * since there are no such side-effects.
88  *
89  * This makes the following assumptions:
90  * - only one returned vpp
91  * - no INOUT vpp's (Sun's vop_open has one of these)
92  * - the vnode operation vector of the first vnode should be used
93  *   to determine what implementation of the op should be invoked
94  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
95  *   problems on rmdir'ing mount points and renaming?)
96  */
97 int
98 null_bypass(ap)
99 	struct vop_generic_args *ap;
100 {
101 	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
102 	register struct vnode **this_vp_p;
103 	int error;
104 	struct vnode *old_vps[VDESC_MAX_VPS];
105 	struct vnode **vps_p[VDESC_MAX_VPS];
106 	struct vnode ***vppp;
107 	struct vnodeop_desc *descp = ap->a_desc;
108 	int reles, i;
109 
110 	if (null_bug_bypass)
111 		printf ("null_bypass: %s\n", descp->vdesc_name);
112 
113 #ifdef SAFETY
114 	/*
115 	 * We require at least one vp.
116 	 */
117 	if (descp->vdesc_vp_offsets == NULL ||
118 	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
119 		panic ("null_bypass: no vp's in map.\n");
120 #endif
121 
122 	/*
123 	 * Map the vnodes going in.
124 	 * Later, we'll invoke the operation based on
125 	 * the first mapped vnode's operation vector.
126 	 */
127 	reles = descp->vdesc_flags;
128 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
129 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
130 			break;   /* bail out at end of list */
131 		vps_p[i] = this_vp_p =
132 			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
133 		/*
134 		 * We're not guaranteed that any but the first vnode
135 		 * are of our type.  Check for and don't map any
136 		 * that aren't.  (Must map first vp or vclean fails.)
137 		 */
138 		if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
139 			old_vps[i] = NULL;
140 		} else {
141 			old_vps[i] = *this_vp_p;
142 			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
143 			/*
144 			 * XXX - Several operations have the side effect
145 			 * of vrele'ing their vp's.  We must account for
146 			 * that.  (This should go away in the future.)
147 			 */
148 			if (reles & 1)
149 				VREF(*this_vp_p);
150 		}
151 
152 	}
153 
154 	/*
155 	 * Call the operation on the lower layer
156 	 * with the modified argument structure.
157 	 */
158 	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
159 
160 	/*
161 	 * Maintain the illusion of call-by-value
162 	 * by restoring vnodes in the argument structure
163 	 * to their original value.
164 	 */
165 	reles = descp->vdesc_flags;
166 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
167 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
168 			break;   /* bail out at end of list */
169 		if (old_vps[i]) {
170 			*(vps_p[i]) = old_vps[i];
171 			if (reles & 1)
172 				vrele(*(vps_p[i]));
173 		}
174 	}
175 
176 	/*
177 	 * Map the possible out-going vpp
178 	 * (Assumes that the lower layer always returns
179 	 * a VREF'ed vpp unless it gets an error.)
180 	 */
181 	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
182 	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
183 	    !error) {
184 		/*
185 		 * XXX - even though some ops have vpp returned vp's,
186 		 * several ops actually vrele this before returning.
187 		 * We must avoid these ops.
188 		 * (This should go away.)
189 		 */
190 		if (descp->vdesc_flags & VDESC_VPP_WILLRELE) {
191 #ifdef NULLFS_DIAGNOSTIC
192 			printf("null_bypass (%s), lowervpp->usecount = %d\n", vdesc->vdesc_name, (**vppp)->v_usecount);
193 #endif
194 			return (error);
195 		}
196 		vppp = VOPARG_OFFSETTO(struct vnode***,
197 				 descp->vdesc_vpp_offset,ap);
198 		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
199 	}
200 
201 	return (error);
202 }
203 
204 
205 /*
206  *  We handle getattr to change the fsid.
207  */
208 int
209 null_getattr(ap)
210 	struct vop_getattr_args *ap;
211 {
212 	int error;
213 	if (error = null_bypass(ap))
214 		return error;
215 	/* Requires that arguments be restored. */
216 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
217 	return 0;
218 }
219 
220 /*
221  * XXX - Ideally inactive does not release the lowervp
222  * so the null_node can stay around in the cache and be reused.
223  * Unfortunately, this currently causes "locking against self"
224  * problems in the UFS, so currently AVOID_CACHING hacks
225  * around the bug.
226  */
227 /* #define AVOID_CACHING */
228 
229 int
230 null_inactive (ap)
231 	struct vop_inactive_args *ap;
232 {
233 #ifdef AVOID_CACHING
234 	struct vnode *vp = ap->a_vp;
235 	struct null_node *xp = VTONULL(vp);
236 	struct vnode *lowervp = xp->null_lowervp;
237 
238 	xp->null_lowervp = NULL;
239 	remque(xp);
240 	FREE(vp->v_data, M_TEMP);
241 	vp->v_data = NULL;
242 	vp->v_type = VBAD;   /* The node is clean (no reclaim needed). */
243 	vrele (lowervp);
244 #else
245 #ifdef DIAGNOSTIC  /* NEEDSWORK: goes away */
246 	if (VOP_ISLOCKED(NULLVPTOLOWERVP(ap->a_vp))) {
247 		panic ("null_inactive: inactive's lowervp is locked.");
248 	};
249 #endif
250 	/*
251 	 * Remember we're inactive so we
252 	 * don't send locks through.
253 	 */
254 	VTONULL(ap->a_vp)->null_isinactive = 1;
255 	/*
256 	 * Do nothing (and _don't_ bypass).
257 	 * Wait to vrele lowervp until reclaim,
258 	 * so that until then our null_node is in the
259 	 * cache and reusable.
260 	 *
261 	 * NEEDSWORK: Someday, consider inactive'ing
262 	 * the lowervp and then trying to reactivate it
263 	 * like they do in the name lookup cache code.
264 	 * That's too much work for now.
265 	 */
266 	return 0;
267 #endif
268 }
269 
270 int
271 null_reclaim (ap)
272 	struct vop_reclaim_args *ap;
273 {
274 	struct vnode *vp = ap->a_vp;
275 	struct null_node *xp = VTONULL(vp);
276 	struct vnode *lowervp = xp->null_lowervp;
277 
278 #ifdef AVOID_CACHING
279 	return 0;
280 #else
281 	/*
282 	 * Note: at this point, vp->v_op == dead_vnodeop_p,
283 	 * so we can't call VOPs on ourself.
284 	 */
285 	/* After this assignment, this node will not be re-used. */
286 #ifdef DIAGNOSTIC
287 	/* XXX - this is only a bug if it's locked by ourselves */
288 	if (lowervp->v_usecount == 1 && VOP_ISLOCKED(lowervp)) {
289 		panic("null_reclaim: lowervp is locked but must go away.");
290 	};
291 #endif
292 	xp->null_lowervp = NULL;
293 	remque(xp);
294 	FREE(vp->v_data, M_TEMP);
295 	vp->v_data = NULL;
296 	vrele (lowervp);
297 	return 0;
298 #endif
299 }
300 
301 int
302 null_bmap (ap)
303 	struct vop_bmap_args *ap;
304 {
305 #ifdef NULLFS_DIAGNOSTIC
306 	printf("null_bmap(ap->a_vp = %x->%x)\n", ap->a_vp, NULLVPTOLOWERVP(ap->a_vp));
307 #endif
308 
309 	return VOP_BMAP(NULLVPTOLOWERVP(ap->a_vp), ap->a_bn, ap->a_vpp, ap->a_bnp);
310 }
311 
312 int
313 null_strategy (ap)
314 	struct vop_strategy_args *ap;
315 {
316 	int error;
317 	struct vnode *savedvp;
318 
319 #ifdef NULLFS_DIAGNOSTIC
320 	printf("null_strategy(vp = %x->%x)\n", ap->a_bp->b_vp, NULLVPTOLOWERVP(ap->a_bp->b_vp));
321 #endif
322 
323 	savedvp = ap->a_bp->b_vp;
324 
325 	error = VOP_STRATEGY(ap->a_bp);
326 
327 	ap->a_bp->b_vp = savedvp;
328 
329 	return error;
330 }
331 
332 
333 int
334 null_print (ap)
335 	struct vop_print_args *ap;
336 {
337 	register struct vnode *vp = ap->a_vp;
338 	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
339 	return 0;
340 }
341 
342 #if 0
343 int
344 null_lock(ap)
345 	struct vop_lock_args *ap;
346 {
347 	if (VTONULL(ap->a_vp)->null_isinactive)
348 		return 0;
349 	else return null_bypass(ap);
350 }
351 
352 int
353 null_unlock(ap)
354 	struct vop_lock_args *ap;
355 {
356 	if (VTONULL(ap->a_vp)->null_isinactive)
357 		return 0;
358 	else return null_bypass(ap);
359 }
360 #endif
361 
362 /*
363  * Global vfs data structures
364  */
365 /*
366  * NEEDSWORK: strategy,bmap are hand coded currently.  They should
367  * go away with a merged buffer/block cache.
368  *
369  */
370 int (**null_vnodeop_p)();
371 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
372 	{ &vop_default_desc, null_bypass },
373 
374 	{ &vop_getattr_desc, null_getattr },
375 	{ &vop_inactive_desc, null_inactive },
376 	{ &vop_reclaim_desc, null_reclaim },
377 	{ &vop_print_desc, null_print },
378 #if 0
379 	{ &vop_lock_desc, null_lock },
380 	{ &vop_unlock_desc, null_unlock },
381 #endif
382 
383 	{ &vop_bmap_desc, null_bmap },
384 	{ &vop_strategy_desc, null_strategy },
385 
386 	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
387 };
388 struct vnodeopv_desc null_vnodeop_opv_desc =
389 	{ &null_vnodeop_p, null_vnodeop_entries };
390