154753Sjohnh /* 254753Sjohnh * Copyright (c) 1992 The Regents of the University of California 354753Sjohnh * All rights reserved. 454753Sjohnh * 554766Sjohnh * This code is derived from the null layer of 654893Sheideman * John Heidemann from the UCLA Ficus project and 754893Sheideman * Jan-Simon Pendry's loopback file system. 854753Sjohnh * 954753Sjohnh * %sccs.include.redist.c% 1054753Sjohnh * 11*54944Sheideman * @(#)null_vnops.c 1.6 (Berkeley) 07/11/92 1254766Sjohnh * 1354766Sjohnh * Ancestors: 1454753Sjohnh * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 1554766Sjohnh * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $ 1654766Sjohnh * ...and... 1754766Sjohnh * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 1854753Sjohnh */ 1954753Sjohnh 2054753Sjohnh /* 2154766Sjohnh * Null Layer 2254766Sjohnh * 2354766Sjohnh * The null layer duplicates a portion of the file system 2454766Sjohnh * name space under a new name. In this respect, it is 2554766Sjohnh * similar to the loopback file system. It differs from 2654766Sjohnh * the loopback fs in two respects: it is implemented using 2754893Sheideman * a bypass operation, and it's "null-node"s stack above 2854766Sjohnh * all lower-layer vnodes, not just over directory vnodes. 2954766Sjohnh * 3054766Sjohnh * The null layer is the minimum file system layer, 3154766Sjohnh * simply bypassing all possible operations to the lower layer 3254766Sjohnh * for processing there. All but vop_getattr, _inactive, _reclaim, 3354766Sjohnh * and _print are bypassed. 3454766Sjohnh * 3554766Sjohnh * Vop_getattr is not bypassed so that we can change the fsid being 3654766Sjohnh * returned. Vop_{inactive,reclaim} are bypassed so that 3754766Sjohnh * they can handle freeing null-layer specific data. 3854766Sjohnh * Vop_print is not bypassed for debugging. 3954766Sjohnh * 4054893Sheideman * 4154893Sheideman * INVOKING OPERATIONS ON LOWER LAYERS 4254893Sheideman * 4354938Sheideman * 4454766Sjohnh * NEEDSWORK: Describe methods to invoke operations on the lower layer 4554766Sjohnh * (bypass vs. VOP). 4654893Sheideman * 4754893Sheideman * 4854893Sheideman * CREATING NEW FILESYSTEM LAYERS 4954893Sheideman * 5054893Sheideman * One of the easiest ways to construct new file system layers is to make 5154893Sheideman * a copy of the null layer, rename all files and variables, and 5254893Sheideman * then begin modifing the copy. Sed can be used to easily rename 5354893Sheideman * all variables. 5454893Sheideman * 5554753Sjohnh */ 5654753Sjohnh 5754753Sjohnh #include <sys/param.h> 5854753Sjohnh #include <sys/systm.h> 5954753Sjohnh #include <sys/proc.h> 6054753Sjohnh #include <sys/time.h> 6154753Sjohnh #include <sys/types.h> 6254753Sjohnh #include <sys/vnode.h> 6354753Sjohnh #include <sys/mount.h> 6454753Sjohnh #include <sys/namei.h> 6554753Sjohnh #include <sys/malloc.h> 6654753Sjohnh #include <sys/buf.h> 6754893Sheideman #include <nullfs/null.h> 6854753Sjohnh 6954753Sjohnh 7054766Sjohnh int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 7154753Sjohnh 7254753Sjohnh /* 7354766Sjohnh * This is the 10-Apr-92 bypass routine. 7454766Sjohnh * This version has been optimized for speed, throwing away some 7554766Sjohnh * safety checks. It should still always work, but it's not as 7654766Sjohnh * robust to programmer errors. 7754766Sjohnh * Define SAFETY to include some error checking code. 7854766Sjohnh * 7954766Sjohnh * In general, we map all vnodes going down and unmap them on the way back. 8054766Sjohnh * As an exception to this, vnodes can be marked "unmapped" by setting 8154766Sjohnh * the Nth bit in operation's vdesc_flags. 8254766Sjohnh * 8354766Sjohnh * Also, some BSD vnode operations have the side effect of vrele'ing 8454766Sjohnh * their arguments. With stacking, the reference counts are held 8554766Sjohnh * by the upper node, not the lower one, so we must handle these 8654766Sjohnh * side-effects here. This is not of concern in Sun-derived systems 8754766Sjohnh * since there are no such side-effects. 8854766Sjohnh * 8954766Sjohnh * This makes the following assumptions: 9054766Sjohnh * - only one returned vpp 9154766Sjohnh * - no INOUT vpp's (Sun's vop_open has one of these) 9254766Sjohnh * - the vnode operation vector of the first vnode should be used 9354766Sjohnh * to determine what implementation of the op should be invoked 9454766Sjohnh * - all mapped vnodes are of our vnode-type (NEEDSWORK: 9554766Sjohnh * problems on rmdir'ing mount points and renaming?) 9654766Sjohnh */ 9754766Sjohnh int 9854766Sjohnh null_bypass(ap) 9954893Sheideman struct vop_generic_args *ap; 10054753Sjohnh { 10154893Sheideman extern int (**null_vnodeop_p)(); /* not extern, really "forward" */ 10254893Sheideman register struct vnode **this_vp_p; 10354753Sjohnh int error; 10454766Sjohnh struct vnode *old_vps[VDESC_MAX_VPS]; 10554766Sjohnh struct vnode **vps_p[VDESC_MAX_VPS]; 10654766Sjohnh struct vnode ***vppp; 10754766Sjohnh struct vnodeop_desc *descp = ap->a_desc; 10854893Sheideman int reles, i; 10954753Sjohnh 11054766Sjohnh if (null_bug_bypass) 11154766Sjohnh printf ("null_bypass: %s\n", descp->vdesc_name); 11254753Sjohnh 11354766Sjohnh #ifdef SAFETY 11454753Sjohnh /* 11554766Sjohnh * We require at least one vp. 11654753Sjohnh */ 11754938Sheideman if (descp->vdesc_vp_offsets == NULL || 11854938Sheideman descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 11954766Sjohnh panic ("null_bypass: no vp's in map.\n"); 12054753Sjohnh #endif 12154753Sjohnh 12254753Sjohnh /* 12354766Sjohnh * Map the vnodes going in. 12454766Sjohnh * Later, we'll invoke the operation based on 12554766Sjohnh * the first mapped vnode's operation vector. 12654753Sjohnh */ 12754893Sheideman reles = descp->vdesc_flags; 12854938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 12954938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 13054766Sjohnh break; /* bail out at end of list */ 13154766Sjohnh vps_p[i] = this_vp_p = 13254766Sjohnh VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 13354893Sheideman /* 13454893Sheideman * We're not guaranteed that any but the first vnode 13554893Sheideman * are of our type. Check for and don't map any 136*54944Sheideman * that aren't. (Must map first vp or vclean fails.) 13754893Sheideman */ 138*54944Sheideman if (i && (*this_vp_p)->v_op != null_vnodeop_p) { 13954893Sheideman old_vps[i] = NULL; 14054893Sheideman } else { 14154893Sheideman old_vps[i] = *this_vp_p; 14254893Sheideman *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 14354938Sheideman /* 14454938Sheideman * XXX - Several operations have the side effect 14554938Sheideman * of vrele'ing their vp's. We must account for 14654938Sheideman * that. (This should go away in the future.) 14754938Sheideman */ 14854893Sheideman if (reles & 1) 14954893Sheideman VREF(*this_vp_p); 15054938Sheideman } 15154766Sjohnh 15254938Sheideman } 15354753Sjohnh 15454753Sjohnh /* 15554766Sjohnh * Call the operation on the lower layer 15654766Sjohnh * with the modified argument structure. 15754753Sjohnh */ 15854766Sjohnh error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 15954753Sjohnh 16054753Sjohnh /* 16154766Sjohnh * Maintain the illusion of call-by-value 16254766Sjohnh * by restoring vnodes in the argument structure 16354766Sjohnh * to their original value. 16454753Sjohnh */ 16554893Sheideman reles = descp->vdesc_flags; 16654938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 16754938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 16854766Sjohnh break; /* bail out at end of list */ 16954893Sheideman if (old_vps[i]) { 17054893Sheideman *(vps_p[i]) = old_vps[i]; 17154893Sheideman if (reles & 1) 17254893Sheideman vrele(*(vps_p[i])); 17354938Sheideman } 17454938Sheideman } 17554766Sjohnh 17654753Sjohnh /* 17754938Sheideman * Map the possible out-going vpp 17854938Sheideman * (Assumes that the lower layer always returns 17954938Sheideman * a VREF'ed vpp unless it gets an error.) 18054753Sjohnh */ 18154766Sjohnh if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 18254766Sjohnh !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 18354766Sjohnh !error) { 18454938Sheideman /* 18554938Sheideman * XXX - even though some ops have vpp returned vp's, 18654938Sheideman * several ops actually vrele this before returning. 18754938Sheideman * We must avoid these ops. 18854938Sheideman * (This should go away.) 18954938Sheideman */ 19054938Sheideman if (descp->vdesc_flags & VDESC_VPP_WILLRELE) { 19154938Sheideman #ifdef NULLFS_DIAGNOSTIC 19254938Sheideman printf("null_bypass (%s), lowervpp->usecount = %d\n", vdesc->vdesc_name, (**vppp)->v_usecount); 19354938Sheideman #endif 19454938Sheideman return (error); 19554938Sheideman } 19654938Sheideman vppp = VOPARG_OFFSETTO(struct vnode***, 19754766Sjohnh descp->vdesc_vpp_offset,ap); 19854893Sheideman error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 19954938Sheideman } 20054753Sjohnh 20154766Sjohnh return (error); 20254753Sjohnh } 20354753Sjohnh 20454753Sjohnh 20554753Sjohnh /* 20654766Sjohnh * We handle getattr to change the fsid. 20754753Sjohnh */ 20854766Sjohnh int 20954766Sjohnh null_getattr(ap) 21054893Sheideman struct vop_getattr_args *ap; 21154753Sjohnh { 21254753Sjohnh int error; 21354938Sheideman if (error = null_bypass(ap)) 21454766Sjohnh return error; 21554766Sjohnh /* Requires that arguments be restored. */ 21654766Sjohnh ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 21754766Sjohnh return 0; 21854766Sjohnh } 21954753Sjohnh 22054938Sheideman /* 22154938Sheideman * XXX - Ideally inactive does not release the lowervp 22254938Sheideman * so the null_node can stay around in the cache and be reused. 22354938Sheideman * Unfortunately, this currently causes "locking against self" 22454938Sheideman * problems in the UFS, so currently AVOID_CACHING hacks 22554938Sheideman * around the bug. 22654938Sheideman */ 227*54944Sheideman /* #define AVOID_CACHING */ 22854753Sjohnh 22954766Sjohnh int 23054754Sjohnh null_inactive (ap) 23154753Sjohnh struct vop_inactive_args *ap; 23254753Sjohnh { 23354938Sheideman #ifdef AVOID_CACHING 23454938Sheideman struct vnode *vp = ap->a_vp; 23554938Sheideman struct null_node *xp = VTONULL(vp); 23654938Sheideman struct vnode *lowervp = xp->null_lowervp; 23754938Sheideman 23854938Sheideman xp->null_lowervp = NULL; 23954938Sheideman remque(xp); 24054938Sheideman FREE(vp->v_data, M_TEMP); 24154938Sheideman vp->v_data = NULL; 24254938Sheideman vp->v_type = VBAD; /* The node is clean (no reclaim needed). */ 24354938Sheideman vrele (lowervp); 24454938Sheideman #else 245*54944Sheideman #ifdef DIAGNOSTIC /* NEEDSWORK: goes away */ 246*54944Sheideman if (VOP_ISLOCKED(NULLVPTOLOWERVP(ap->a_vp))) { 247*54944Sheideman panic ("null_inactive: inactive's lowervp is locked."); 24854938Sheideman }; 24954753Sjohnh #endif 25054766Sjohnh /* 251*54944Sheideman * Remember we're inactive so we 252*54944Sheideman * don't send locks through. 253*54944Sheideman */ 254*54944Sheideman VTONULL(ap->a_vp)->null_isinactive = 1; 255*54944Sheideman /* 25654766Sjohnh * Do nothing (and _don't_ bypass). 25754766Sjohnh * Wait to vrele lowervp until reclaim, 25854766Sjohnh * so that until then our null_node is in the 25954766Sjohnh * cache and reusable. 26054766Sjohnh * 26154766Sjohnh * NEEDSWORK: Someday, consider inactive'ing 26254766Sjohnh * the lowervp and then trying to reactivate it 26354766Sjohnh * like they do in the name lookup cache code. 26454766Sjohnh * That's too much work for now. 26554766Sjohnh */ 26654766Sjohnh return 0; 26754938Sheideman #endif 26854753Sjohnh } 26954753Sjohnh 27054938Sheideman int 27154754Sjohnh null_reclaim (ap) 27254753Sjohnh struct vop_reclaim_args *ap; 27354753Sjohnh { 27454938Sheideman struct vnode *vp = ap->a_vp; 27554938Sheideman struct null_node *xp = VTONULL(vp); 27654938Sheideman struct vnode *lowervp = xp->null_lowervp; 27754938Sheideman 27854938Sheideman #ifdef AVOID_CACHING 27954938Sheideman return 0; 28054938Sheideman #else 28154938Sheideman /* 28254938Sheideman * Note: at this point, vp->v_op == dead_vnodeop_p, 28354938Sheideman * so we can't call VOPs on ourself. 28454938Sheideman */ 28554938Sheideman /* After this assignment, this node will not be re-used. */ 28654938Sheideman #ifdef DIAGNOSTIC 287*54944Sheideman /* XXX - this is only a bug if it's locked by ourselves */ 288*54944Sheideman if (lowervp->v_usecount == 1 && VOP_ISLOCKED(lowervp)) { 28954938Sheideman panic("null_reclaim: lowervp is locked but must go away."); 29054938Sheideman }; 29154753Sjohnh #endif 29254938Sheideman xp->null_lowervp = NULL; 29354938Sheideman remque(xp); 29454938Sheideman FREE(vp->v_data, M_TEMP); 29554938Sheideman vp->v_data = NULL; 29654938Sheideman vrele (lowervp); 29754938Sheideman return 0; 29854938Sheideman #endif 29954753Sjohnh } 30054753Sjohnh 30154938Sheideman int 30254754Sjohnh null_bmap (ap) 30354753Sjohnh struct vop_bmap_args *ap; 30454753Sjohnh { 30554754Sjohnh #ifdef NULLFS_DIAGNOSTIC 30654893Sheideman printf("null_bmap(ap->a_vp = %x->%x)\n", ap->a_vp, NULLVPTOLOWERVP(ap->a_vp)); 30754753Sjohnh #endif 30854753Sjohnh 30954893Sheideman return VOP_BMAP(NULLVPTOLOWERVP(ap->a_vp), ap->a_bn, ap->a_vpp, ap->a_bnp); 31054753Sjohnh } 31154753Sjohnh 31254938Sheideman int 31354754Sjohnh null_strategy (ap) 31454753Sjohnh struct vop_strategy_args *ap; 31554753Sjohnh { 31654753Sjohnh int error; 31754766Sjohnh struct vnode *savedvp; 31854753Sjohnh 31954754Sjohnh #ifdef NULLFS_DIAGNOSTIC 32054893Sheideman printf("null_strategy(vp = %x->%x)\n", ap->a_bp->b_vp, NULLVPTOLOWERVP(ap->a_bp->b_vp)); 32154753Sjohnh #endif 32254753Sjohnh 32354766Sjohnh savedvp = ap->a_bp->b_vp; 32454753Sjohnh 32554753Sjohnh error = VOP_STRATEGY(ap->a_bp); 32654753Sjohnh 32754766Sjohnh ap->a_bp->b_vp = savedvp; 32854753Sjohnh 32954766Sjohnh return error; 33054753Sjohnh } 33154753Sjohnh 33254766Sjohnh 33354766Sjohnh int 33454754Sjohnh null_print (ap) 33554753Sjohnh struct vop_print_args *ap; 33654753Sjohnh { 33754766Sjohnh register struct vnode *vp = ap->a_vp; 33854938Sheideman printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp)); 33954766Sjohnh return 0; 34054753Sjohnh } 34154753Sjohnh 342*54944Sheideman #if 0 343*54944Sheideman int 344*54944Sheideman null_lock(ap) 345*54944Sheideman struct vop_lock_args *ap; 346*54944Sheideman { 347*54944Sheideman if (VTONULL(ap->a_vp)->null_isinactive) 348*54944Sheideman return 0; 349*54944Sheideman else return null_bypass(ap); 350*54944Sheideman } 35154753Sjohnh 352*54944Sheideman int 353*54944Sheideman null_unlock(ap) 354*54944Sheideman struct vop_lock_args *ap; 355*54944Sheideman { 356*54944Sheideman if (VTONULL(ap->a_vp)->null_isinactive) 357*54944Sheideman return 0; 358*54944Sheideman else return null_bypass(ap); 359*54944Sheideman } 360*54944Sheideman #endif 361*54944Sheideman 36254753Sjohnh /* 36354766Sjohnh * Global vfs data structures 36454753Sjohnh */ 36554753Sjohnh /* 36654766Sjohnh * NEEDSWORK: strategy,bmap are hand coded currently. They should 36754766Sjohnh * go away with a merged buffer/block cache. 36854766Sjohnh * 36954753Sjohnh */ 37054766Sjohnh int (**null_vnodeop_p)(); 37154893Sheideman struct vnodeopv_entry_desc null_vnodeop_entries[] = { 37254766Sjohnh { &vop_default_desc, null_bypass }, 37354753Sjohnh 37454766Sjohnh { &vop_getattr_desc, null_getattr }, 37554766Sjohnh { &vop_inactive_desc, null_inactive }, 37654766Sjohnh { &vop_reclaim_desc, null_reclaim }, 37754766Sjohnh { &vop_print_desc, null_print }, 378*54944Sheideman #if 0 379*54944Sheideman { &vop_lock_desc, null_lock }, 380*54944Sheideman { &vop_unlock_desc, null_unlock }, 381*54944Sheideman #endif 38254753Sjohnh 38354766Sjohnh { &vop_bmap_desc, null_bmap }, 38454766Sjohnh { &vop_strategy_desc, null_strategy }, 38554753Sjohnh 38654753Sjohnh { (struct vnodeop_desc*)NULL, (int(*)())NULL } 38754753Sjohnh }; 38854893Sheideman struct vnodeopv_desc null_vnodeop_opv_desc = 38954893Sheideman { &null_vnodeop_p, null_vnodeop_entries }; 390