154753Sjohnh /* 2*63245Sbostic * Copyright (c) 1992, 1993 3*63245Sbostic * The Regents of the University of California. All rights reserved. 454753Sjohnh * 554951Sheideman * This code is derived from software contributed to Berkeley by 654951Sheideman * John Heidemann of the UCLA Ficus project. 754753Sjohnh * 854753Sjohnh * %sccs.include.redist.c% 954753Sjohnh * 10*63245Sbostic * @(#)null_vnops.c 8.1 (Berkeley) 06/10/93 1154766Sjohnh * 1254766Sjohnh * Ancestors: 1354753Sjohnh * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 1454766Sjohnh * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $ 1554766Sjohnh * ...and... 1654766Sjohnh * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 1754753Sjohnh */ 1854753Sjohnh 1954753Sjohnh /* 2054766Sjohnh * Null Layer 2154766Sjohnh * 2254951Sheideman * (See mount_null(8) for more information.) 2354951Sheideman * 2454766Sjohnh * The null layer duplicates a portion of the file system 2554766Sjohnh * name space under a new name. In this respect, it is 2654766Sjohnh * similar to the loopback file system. It differs from 2754766Sjohnh * the loopback fs in two respects: it is implemented using 2854951Sheideman * a stackable layers techniques, and it's "null-node"s stack above 2954766Sjohnh * all lower-layer vnodes, not just over directory vnodes. 3054766Sjohnh * 3154951Sheideman * The null layer has two purposes. First, it serves as a demonstration 3254951Sheideman * of layering by proving a layer which does nothing. (It actually 3354951Sheideman * does everything the loopback file system does, which is slightly 3454951Sheideman * more than nothing.) Second, the null layer can serve as a prototype 3554951Sheideman * layer. Since it provides all necessary layer framework, 3654951Sheideman * new file system layers can be created very easily be starting 3754951Sheideman * with a null layer. 3854951Sheideman * 3954951Sheideman * The remainder of this man page examines the null layer as a basis 4054951Sheideman * for constructing new layers. 4154951Sheideman * 4254951Sheideman * 4354951Sheideman * INSTANTIATING NEW NULL LAYERS 4454951Sheideman * 4554951Sheideman * New null layers are created with mount_null(8). 4654951Sheideman * Mount_null(8) takes two arguments, the pathname 4754951Sheideman * of the lower vfs (target-pn) and the pathname where the null 4854951Sheideman * layer will appear in the namespace (alias-pn). After 4954951Sheideman * the null layer is put into place, the contents 5054951Sheideman * of target-pn subtree will be aliased under alias-pn. 5154951Sheideman * 5254951Sheideman * 5354951Sheideman * OPERATION OF A NULL LAYER 5454951Sheideman * 5554766Sjohnh * The null layer is the minimum file system layer, 5654766Sjohnh * simply bypassing all possible operations to the lower layer 5754951Sheideman * for processing there. The majority of its activity centers 5854951Sheideman * on the bypass routine, though which nearly all vnode operations 5954951Sheideman * pass. 6054766Sjohnh * 6154951Sheideman * The bypass routine accepts arbitrary vnode operations for 6254951Sheideman * handling by the lower layer. It begins by examing vnode 6354951Sheideman * operation arguments and replacing any null-nodes by their 6454951Sheideman * lower-layer equivlants. It then invokes the operation 6554951Sheideman * on the lower layer. Finally, it replaces the null-nodes 6654951Sheideman * in the arguments and, if a vnode is return by the operation, 6754951Sheideman * stacks a null-node on top of the returned vnode. 6854951Sheideman * 6954951Sheideman * Although bypass handles most operations, 7054951Sheideman * vop_getattr, _inactive, _reclaim, and _print are not bypassed. 7154951Sheideman * Vop_getattr must change the fsid being returned. 7254951Sheideman * Vop_inactive and vop_reclaim are not bypassed so that 7354766Sjohnh * they can handle freeing null-layer specific data. 7454951Sheideman * Vop_print is not bypassed to avoid excessive debugging 7554951Sheideman * information. 7654766Sjohnh * 7754893Sheideman * 7854951Sheideman * INSTANTIATING VNODE STACKS 7954893Sheideman * 8054951Sheideman * Mounting associates the null layer with a lower layer, 8154951Sheideman * effect stacking two VFSes. Vnode stacks are instead 8254951Sheideman * created on demand as files are accessed. 8354893Sheideman * 8454951Sheideman * The initial mount creates a single vnode stack for the 8554951Sheideman * root of the new null layer. All other vnode stacks 8654951Sheideman * are created as a result of vnode operations on 8754951Sheideman * this or other null vnode stacks. 8854893Sheideman * 8954951Sheideman * New vnode stacks come into existance as a result of 9054951Sheideman * an operation which returns a vnode. 9154951Sheideman * The bypass routine stacks a null-node above the new 9254951Sheideman * vnode before returning it to the caller. 9354893Sheideman * 9454951Sheideman * For example, imagine mounting a null layer with 9554951Sheideman * "mount_null /usr/include /dev/layer/null". 9655025Smckusick * Changing directory to /dev/layer/null will assign 9754951Sheideman * the root null-node (which was created when the null layer was mounted). 9854951Sheideman * Now consider opening "sys". A vop_lookup would be 9954951Sheideman * done on the root null-node. This operation would bypass through 10054951Sheideman * to the lower layer which would return a vnode representing 10154951Sheideman * the UFS "sys". Null_bypass then builds a null-node 10254951Sheideman * aliasing the UFS "sys" and returns this to the caller. 10354951Sheideman * Later operations on the null-node "sys" will repeat this 10454951Sheideman * process when constructing other vnode stacks. 10554951Sheideman * 10654951Sheideman * 10754951Sheideman * CREATING OTHER FILE SYSTEM LAYERS 10854951Sheideman * 10954893Sheideman * One of the easiest ways to construct new file system layers is to make 11054893Sheideman * a copy of the null layer, rename all files and variables, and 11154893Sheideman * then begin modifing the copy. Sed can be used to easily rename 11254893Sheideman * all variables. 11354893Sheideman * 11454951Sheideman * The umap layer is an example of a layer descended from the 11554951Sheideman * null layer. 11654951Sheideman * 11754951Sheideman * 11854951Sheideman * INVOKING OPERATIONS ON LOWER LAYERS 11954951Sheideman * 12054951Sheideman * There are two techniques to invoke operations on a lower layer 12154951Sheideman * when the operation cannot be completely bypassed. Each method 12254951Sheideman * is appropriate in different situations. In both cases, 12354951Sheideman * it is the responsibility of the aliasing layer to make 12454951Sheideman * the operation arguments "correct" for the lower layer 12554951Sheideman * by mapping an vnode arguments to the lower layer. 12654951Sheideman * 12754951Sheideman * The first approach is to call the aliasing layer's bypass routine. 12854951Sheideman * This method is most suitable when you wish to invoke the operation 12954951Sheideman * currently being hanldled on the lower layer. It has the advantage 13055025Smckusick * that the bypass routine already must do argument mapping. 13154951Sheideman * An example of this is null_getattrs in the null layer. 13254951Sheideman * 13354951Sheideman * A second approach is to directly invoked vnode operations on 13454951Sheideman * the lower layer with the VOP_OPERATIONNAME interface. 13554951Sheideman * The advantage of this method is that it is easy to invoke 13654951Sheideman * arbitrary operations on the lower layer. The disadvantage 13754951Sheideman * is that vnodes arguments must be manualy mapped. 13854951Sheideman * 13954753Sjohnh */ 14054753Sjohnh 14154753Sjohnh #include <sys/param.h> 14254753Sjohnh #include <sys/systm.h> 14354753Sjohnh #include <sys/proc.h> 14454753Sjohnh #include <sys/time.h> 14554753Sjohnh #include <sys/types.h> 14654753Sjohnh #include <sys/vnode.h> 14754753Sjohnh #include <sys/mount.h> 14854753Sjohnh #include <sys/namei.h> 14954753Sjohnh #include <sys/malloc.h> 15054753Sjohnh #include <sys/buf.h> 15155025Smckusick #include <miscfs/nullfs/null.h> 15254753Sjohnh 15354753Sjohnh 15454766Sjohnh int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 15554753Sjohnh 15654753Sjohnh /* 15754766Sjohnh * This is the 10-Apr-92 bypass routine. 15854766Sjohnh * This version has been optimized for speed, throwing away some 15954766Sjohnh * safety checks. It should still always work, but it's not as 16054766Sjohnh * robust to programmer errors. 16154766Sjohnh * Define SAFETY to include some error checking code. 16254766Sjohnh * 16354766Sjohnh * In general, we map all vnodes going down and unmap them on the way back. 16454766Sjohnh * As an exception to this, vnodes can be marked "unmapped" by setting 16554766Sjohnh * the Nth bit in operation's vdesc_flags. 16654766Sjohnh * 16754766Sjohnh * Also, some BSD vnode operations have the side effect of vrele'ing 16854766Sjohnh * their arguments. With stacking, the reference counts are held 16954766Sjohnh * by the upper node, not the lower one, so we must handle these 17054766Sjohnh * side-effects here. This is not of concern in Sun-derived systems 17154766Sjohnh * since there are no such side-effects. 17254766Sjohnh * 17354766Sjohnh * This makes the following assumptions: 17454766Sjohnh * - only one returned vpp 17554766Sjohnh * - no INOUT vpp's (Sun's vop_open has one of these) 17654766Sjohnh * - the vnode operation vector of the first vnode should be used 17754766Sjohnh * to determine what implementation of the op should be invoked 17854766Sjohnh * - all mapped vnodes are of our vnode-type (NEEDSWORK: 17954766Sjohnh * problems on rmdir'ing mount points and renaming?) 18054766Sjohnh */ 18154766Sjohnh int 18254766Sjohnh null_bypass(ap) 18355025Smckusick struct vop_generic_args /* { 18455025Smckusick struct vnodeop_desc *a_desc; 18555025Smckusick <other random data follows, presumably> 18655025Smckusick } */ *ap; 18754753Sjohnh { 18854893Sheideman extern int (**null_vnodeop_p)(); /* not extern, really "forward" */ 18954893Sheideman register struct vnode **this_vp_p; 19054753Sjohnh int error; 19154766Sjohnh struct vnode *old_vps[VDESC_MAX_VPS]; 19254766Sjohnh struct vnode **vps_p[VDESC_MAX_VPS]; 19354766Sjohnh struct vnode ***vppp; 19454766Sjohnh struct vnodeop_desc *descp = ap->a_desc; 19554893Sheideman int reles, i; 19654753Sjohnh 19754766Sjohnh if (null_bug_bypass) 19854766Sjohnh printf ("null_bypass: %s\n", descp->vdesc_name); 19954753Sjohnh 20054766Sjohnh #ifdef SAFETY 20154753Sjohnh /* 20254766Sjohnh * We require at least one vp. 20354753Sjohnh */ 20454938Sheideman if (descp->vdesc_vp_offsets == NULL || 20554938Sheideman descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 20654766Sjohnh panic ("null_bypass: no vp's in map.\n"); 20754753Sjohnh #endif 20854753Sjohnh 20954753Sjohnh /* 21054766Sjohnh * Map the vnodes going in. 21154766Sjohnh * Later, we'll invoke the operation based on 21254766Sjohnh * the first mapped vnode's operation vector. 21354753Sjohnh */ 21454893Sheideman reles = descp->vdesc_flags; 21554938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 21654938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 21754766Sjohnh break; /* bail out at end of list */ 21854766Sjohnh vps_p[i] = this_vp_p = 21954766Sjohnh VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 22054893Sheideman /* 22154893Sheideman * We're not guaranteed that any but the first vnode 22254893Sheideman * are of our type. Check for and don't map any 22354951Sheideman * that aren't. (We must always map first vp or vclean fails.) 22454893Sheideman */ 22554944Sheideman if (i && (*this_vp_p)->v_op != null_vnodeop_p) { 22654893Sheideman old_vps[i] = NULL; 22754893Sheideman } else { 22854893Sheideman old_vps[i] = *this_vp_p; 22954893Sheideman *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 23054938Sheideman /* 23154938Sheideman * XXX - Several operations have the side effect 23254938Sheideman * of vrele'ing their vp's. We must account for 23354938Sheideman * that. (This should go away in the future.) 23454938Sheideman */ 23554893Sheideman if (reles & 1) 23654893Sheideman VREF(*this_vp_p); 23754938Sheideman } 23854766Sjohnh 23954938Sheideman } 24054753Sjohnh 24154753Sjohnh /* 24254766Sjohnh * Call the operation on the lower layer 24354766Sjohnh * with the modified argument structure. 24454753Sjohnh */ 24554766Sjohnh error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 24654753Sjohnh 24754753Sjohnh /* 24854766Sjohnh * Maintain the illusion of call-by-value 24954766Sjohnh * by restoring vnodes in the argument structure 25054766Sjohnh * to their original value. 25154753Sjohnh */ 25254893Sheideman reles = descp->vdesc_flags; 25354938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 25454938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 25554766Sjohnh break; /* bail out at end of list */ 25654893Sheideman if (old_vps[i]) { 25754893Sheideman *(vps_p[i]) = old_vps[i]; 25854893Sheideman if (reles & 1) 25954893Sheideman vrele(*(vps_p[i])); 26054938Sheideman } 26154938Sheideman } 26254766Sjohnh 26354753Sjohnh /* 26454938Sheideman * Map the possible out-going vpp 26554938Sheideman * (Assumes that the lower layer always returns 26654938Sheideman * a VREF'ed vpp unless it gets an error.) 26754753Sjohnh */ 26854766Sjohnh if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 26954766Sjohnh !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 27054766Sjohnh !error) { 27154938Sheideman /* 27254938Sheideman * XXX - even though some ops have vpp returned vp's, 27354938Sheideman * several ops actually vrele this before returning. 27454938Sheideman * We must avoid these ops. 27554951Sheideman * (This should go away when these ops are regularized.) 27654938Sheideman */ 27754951Sheideman if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 27854951Sheideman goto out; 27954938Sheideman vppp = VOPARG_OFFSETTO(struct vnode***, 28054766Sjohnh descp->vdesc_vpp_offset,ap); 28154893Sheideman error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 28254938Sheideman } 28354753Sjohnh 28454951Sheideman out: 28554766Sjohnh return (error); 28654753Sjohnh } 28754753Sjohnh 28854753Sjohnh 28954753Sjohnh /* 29054951Sheideman * We handle getattr only to change the fsid. 29154753Sjohnh */ 29254766Sjohnh int 29354766Sjohnh null_getattr(ap) 29455025Smckusick struct vop_getattr_args /* { 29555025Smckusick struct vnode *a_vp; 29655025Smckusick struct vattr *a_vap; 29755025Smckusick struct ucred *a_cred; 29855025Smckusick struct proc *a_p; 29955025Smckusick } */ *ap; 30054753Sjohnh { 30154753Sjohnh int error; 30254938Sheideman if (error = null_bypass(ap)) 30355025Smckusick return (error); 30454766Sjohnh /* Requires that arguments be restored. */ 30554766Sjohnh ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 30655025Smckusick return (0); 30754766Sjohnh } 30854753Sjohnh 30954753Sjohnh 31054766Sjohnh int 31155025Smckusick null_inactive(ap) 31255025Smckusick struct vop_inactive_args /* { 31355025Smckusick struct vnode *a_vp; 31455025Smckusick } */ *ap; 31554753Sjohnh { 31654766Sjohnh /* 31754766Sjohnh * Do nothing (and _don't_ bypass). 31854766Sjohnh * Wait to vrele lowervp until reclaim, 31954766Sjohnh * so that until then our null_node is in the 32054766Sjohnh * cache and reusable. 32154766Sjohnh * 32254766Sjohnh * NEEDSWORK: Someday, consider inactive'ing 32354766Sjohnh * the lowervp and then trying to reactivate it 32454951Sheideman * with capabilities (v_id) 32554766Sjohnh * like they do in the name lookup cache code. 32654766Sjohnh * That's too much work for now. 32754766Sjohnh */ 32855025Smckusick return (0); 32954753Sjohnh } 33054753Sjohnh 33154938Sheideman int 33255025Smckusick null_reclaim(ap) 33355025Smckusick struct vop_reclaim_args /* { 33455025Smckusick struct vnode *a_vp; 33555025Smckusick } */ *ap; 33654753Sjohnh { 33754938Sheideman struct vnode *vp = ap->a_vp; 33854938Sheideman struct null_node *xp = VTONULL(vp); 33954938Sheideman struct vnode *lowervp = xp->null_lowervp; 34054938Sheideman 34154938Sheideman /* 34254951Sheideman * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, 34354938Sheideman * so we can't call VOPs on ourself. 34454938Sheideman */ 34554938Sheideman /* After this assignment, this node will not be re-used. */ 34654938Sheideman xp->null_lowervp = NULL; 34754938Sheideman remque(xp); 34854938Sheideman FREE(vp->v_data, M_TEMP); 34954938Sheideman vp->v_data = NULL; 35054938Sheideman vrele (lowervp); 35155025Smckusick return (0); 35254753Sjohnh } 35354753Sjohnh 35454951Sheideman 35554938Sheideman int 35655025Smckusick null_print(ap) 35755025Smckusick struct vop_print_args /* { 35855025Smckusick struct vnode *a_vp; 35955025Smckusick } */ *ap; 36054753Sjohnh { 36154951Sheideman register struct vnode *vp = ap->a_vp; 36254951Sheideman printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp)); 36355025Smckusick return (0); 36454753Sjohnh } 36554753Sjohnh 36654951Sheideman 36754951Sheideman /* 36854951Sheideman * XXX - vop_strategy must be hand coded because it has no 36954951Sheideman * vnode in its arguments. 37054951Sheideman * This goes away with a merged VM/buffer cache. 37154951Sheideman */ 37254938Sheideman int 37355025Smckusick null_strategy(ap) 37455025Smckusick struct vop_strategy_args /* { 37555025Smckusick struct buf *a_bp; 37655025Smckusick } */ *ap; 37754753Sjohnh { 37854951Sheideman struct buf *bp = ap->a_bp; 37954753Sjohnh int error; 38054766Sjohnh struct vnode *savedvp; 38154753Sjohnh 38254951Sheideman savedvp = bp->b_vp; 38354951Sheideman bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 38454753Sjohnh 38554951Sheideman error = VOP_STRATEGY(bp); 38654753Sjohnh 38754951Sheideman bp->b_vp = savedvp; 38854753Sjohnh 38955025Smckusick return (error); 39054753Sjohnh } 39154753Sjohnh 39254766Sjohnh 39354951Sheideman /* 39454951Sheideman * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no 39554951Sheideman * vnode in its arguments. 39654951Sheideman * This goes away with a merged VM/buffer cache. 39754951Sheideman */ 39854766Sjohnh int 39955025Smckusick null_bwrite(ap) 40055025Smckusick struct vop_bwrite_args /* { 40155025Smckusick struct buf *a_bp; 40255025Smckusick } */ *ap; 40354753Sjohnh { 40454951Sheideman struct buf *bp = ap->a_bp; 40554951Sheideman int error; 40654951Sheideman struct vnode *savedvp; 40754753Sjohnh 40854951Sheideman savedvp = bp->b_vp; 40954951Sheideman bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 41054753Sjohnh 41154951Sheideman error = VOP_BWRITE(bp); 41254951Sheideman 41354951Sheideman bp->b_vp = savedvp; 41454951Sheideman 41555025Smckusick return (error); 41654944Sheideman } 41754944Sheideman 41854753Sjohnh /* 41954766Sjohnh * Global vfs data structures 42054753Sjohnh */ 42154766Sjohnh int (**null_vnodeop_p)(); 42254893Sheideman struct vnodeopv_entry_desc null_vnodeop_entries[] = { 42354766Sjohnh { &vop_default_desc, null_bypass }, 42454753Sjohnh 42554766Sjohnh { &vop_getattr_desc, null_getattr }, 42654766Sjohnh { &vop_inactive_desc, null_inactive }, 42754766Sjohnh { &vop_reclaim_desc, null_reclaim }, 42854766Sjohnh { &vop_print_desc, null_print }, 42954753Sjohnh 43054766Sjohnh { &vop_strategy_desc, null_strategy }, 43154951Sheideman { &vop_bwrite_desc, null_bwrite }, 43254753Sjohnh 43354753Sjohnh { (struct vnodeop_desc*)NULL, (int(*)())NULL } 43454753Sjohnh }; 43554893Sheideman struct vnodeopv_desc null_vnodeop_opv_desc = 43654893Sheideman { &null_vnodeop_p, null_vnodeop_entries }; 437