154753Sjohnh /* 254753Sjohnh * Copyright (c) 1992 The Regents of the University of California 354753Sjohnh * All rights reserved. 454753Sjohnh * 5*54951Sheideman * This code is derived from software contributed to Berkeley by 6*54951Sheideman * John Heidemann of the UCLA Ficus project. 754753Sjohnh * 854753Sjohnh * %sccs.include.redist.c% 954753Sjohnh * 10*54951Sheideman * @(#)null_vnops.c 1.7 (Berkeley) 07/11/92 1154766Sjohnh * 1254766Sjohnh * Ancestors: 1354753Sjohnh * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 1454766Sjohnh * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $ 1554766Sjohnh * ...and... 1654766Sjohnh * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 1754753Sjohnh */ 1854753Sjohnh 1954753Sjohnh /* 2054766Sjohnh * Null Layer 2154766Sjohnh * 22*54951Sheideman * (See mount_null(8) for more information.) 23*54951Sheideman * 2454766Sjohnh * The null layer duplicates a portion of the file system 2554766Sjohnh * name space under a new name. In this respect, it is 2654766Sjohnh * similar to the loopback file system. It differs from 2754766Sjohnh * the loopback fs in two respects: it is implemented using 28*54951Sheideman * a stackable layers techniques, and it's "null-node"s stack above 2954766Sjohnh * all lower-layer vnodes, not just over directory vnodes. 3054766Sjohnh * 31*54951Sheideman * The null layer has two purposes. First, it serves as a demonstration 32*54951Sheideman * of layering by proving a layer which does nothing. (It actually 33*54951Sheideman * does everything the loopback file system does, which is slightly 34*54951Sheideman * more than nothing.) Second, the null layer can serve as a prototype 35*54951Sheideman * layer. Since it provides all necessary layer framework, 36*54951Sheideman * new file system layers can be created very easily be starting 37*54951Sheideman * with a null layer. 38*54951Sheideman * 39*54951Sheideman * The remainder of this man page examines the null layer as a basis 40*54951Sheideman * for constructing new layers. 41*54951Sheideman * 42*54951Sheideman * 43*54951Sheideman * INSTANTIATING NEW NULL LAYERS 44*54951Sheideman * 45*54951Sheideman * New null layers are created with mount_null(8). 46*54951Sheideman * Mount_null(8) takes two arguments, the pathname 47*54951Sheideman * of the lower vfs (target-pn) and the pathname where the null 48*54951Sheideman * layer will appear in the namespace (alias-pn). After 49*54951Sheideman * the null layer is put into place, the contents 50*54951Sheideman * of target-pn subtree will be aliased under alias-pn. 51*54951Sheideman * 52*54951Sheideman * 53*54951Sheideman * OPERATION OF A NULL LAYER 54*54951Sheideman * 5554766Sjohnh * The null layer is the minimum file system layer, 5654766Sjohnh * simply bypassing all possible operations to the lower layer 57*54951Sheideman * for processing there. The majority of its activity centers 58*54951Sheideman * on the bypass routine, though which nearly all vnode operations 59*54951Sheideman * pass. 6054766Sjohnh * 61*54951Sheideman * The bypass routine accepts arbitrary vnode operations for 62*54951Sheideman * handling by the lower layer. It begins by examing vnode 63*54951Sheideman * operation arguments and replacing any null-nodes by their 64*54951Sheideman * lower-layer equivlants. It then invokes the operation 65*54951Sheideman * on the lower layer. Finally, it replaces the null-nodes 66*54951Sheideman * in the arguments and, if a vnode is return by the operation, 67*54951Sheideman * stacks a null-node on top of the returned vnode. 68*54951Sheideman * 69*54951Sheideman * Although bypass handles most operations, 70*54951Sheideman * vop_getattr, _inactive, _reclaim, and _print are not bypassed. 71*54951Sheideman * Vop_getattr must change the fsid being returned. 72*54951Sheideman * Vop_inactive and vop_reclaim are not bypassed so that 7354766Sjohnh * they can handle freeing null-layer specific data. 74*54951Sheideman * Vop_print is not bypassed to avoid excessive debugging 75*54951Sheideman * information. 7654766Sjohnh * 7754893Sheideman * 78*54951Sheideman * INSTANTIATING VNODE STACKS 7954893Sheideman * 80*54951Sheideman * Mounting associates the null layer with a lower layer, 81*54951Sheideman * effect stacking two VFSes. Vnode stacks are instead 82*54951Sheideman * created on demand as files are accessed. 8354893Sheideman * 84*54951Sheideman * The initial mount creates a single vnode stack for the 85*54951Sheideman * root of the new null layer. All other vnode stacks 86*54951Sheideman * are created as a result of vnode operations on 87*54951Sheideman * this or other null vnode stacks. 8854893Sheideman * 89*54951Sheideman * New vnode stacks come into existance as a result of 90*54951Sheideman * an operation which returns a vnode. 91*54951Sheideman * The bypass routine stacks a null-node above the new 92*54951Sheideman * vnode before returning it to the caller. 9354893Sheideman * 94*54951Sheideman * For example, imagine mounting a null layer with 95*54951Sheideman * "mount_null /usr/include /dev/layer/null". 96*54951Sheideman * Chainging directory to /dev/layer/null will assign 97*54951Sheideman * the root null-node (which was created when the null layer was mounted). 98*54951Sheideman * Now consider opening "sys". A vop_lookup would be 99*54951Sheideman * done on the root null-node. This operation would bypass through 100*54951Sheideman * to the lower layer which would return a vnode representing 101*54951Sheideman * the UFS "sys". Null_bypass then builds a null-node 102*54951Sheideman * aliasing the UFS "sys" and returns this to the caller. 103*54951Sheideman * Later operations on the null-node "sys" will repeat this 104*54951Sheideman * process when constructing other vnode stacks. 105*54951Sheideman * 106*54951Sheideman * 107*54951Sheideman * CREATING OTHER FILE SYSTEM LAYERS 108*54951Sheideman * 10954893Sheideman * One of the easiest ways to construct new file system layers is to make 11054893Sheideman * a copy of the null layer, rename all files and variables, and 11154893Sheideman * then begin modifing the copy. Sed can be used to easily rename 11254893Sheideman * all variables. 11354893Sheideman * 114*54951Sheideman * The umap layer is an example of a layer descended from the 115*54951Sheideman * null layer. 116*54951Sheideman * 117*54951Sheideman * 118*54951Sheideman * INVOKING OPERATIONS ON LOWER LAYERS 119*54951Sheideman * 120*54951Sheideman * There are two techniques to invoke operations on a lower layer 121*54951Sheideman * when the operation cannot be completely bypassed. Each method 122*54951Sheideman * is appropriate in different situations. In both cases, 123*54951Sheideman * it is the responsibility of the aliasing layer to make 124*54951Sheideman * the operation arguments "correct" for the lower layer 125*54951Sheideman * by mapping an vnode arguments to the lower layer. 126*54951Sheideman * 127*54951Sheideman * The first approach is to call the aliasing layer's bypass routine. 128*54951Sheideman * This method is most suitable when you wish to invoke the operation 129*54951Sheideman * currently being hanldled on the lower layer. It has the advantage 130*54951Sheideman * the the bypass routine already must do argument mapping. 131*54951Sheideman * An example of this is null_getattrs in the null layer. 132*54951Sheideman * 133*54951Sheideman * A second approach is to directly invoked vnode operations on 134*54951Sheideman * the lower layer with the VOP_OPERATIONNAME interface. 135*54951Sheideman * The advantage of this method is that it is easy to invoke 136*54951Sheideman * arbitrary operations on the lower layer. The disadvantage 137*54951Sheideman * is that vnodes arguments must be manualy mapped. 138*54951Sheideman * 13954753Sjohnh */ 14054753Sjohnh 14154753Sjohnh #include <sys/param.h> 14254753Sjohnh #include <sys/systm.h> 14354753Sjohnh #include <sys/proc.h> 14454753Sjohnh #include <sys/time.h> 14554753Sjohnh #include <sys/types.h> 14654753Sjohnh #include <sys/vnode.h> 14754753Sjohnh #include <sys/mount.h> 14854753Sjohnh #include <sys/namei.h> 14954753Sjohnh #include <sys/malloc.h> 15054753Sjohnh #include <sys/buf.h> 15154893Sheideman #include <nullfs/null.h> 15254753Sjohnh 15354753Sjohnh 15454766Sjohnh int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 15554753Sjohnh 15654753Sjohnh /* 15754766Sjohnh * This is the 10-Apr-92 bypass routine. 15854766Sjohnh * This version has been optimized for speed, throwing away some 15954766Sjohnh * safety checks. It should still always work, but it's not as 16054766Sjohnh * robust to programmer errors. 16154766Sjohnh * Define SAFETY to include some error checking code. 16254766Sjohnh * 16354766Sjohnh * In general, we map all vnodes going down and unmap them on the way back. 16454766Sjohnh * As an exception to this, vnodes can be marked "unmapped" by setting 16554766Sjohnh * the Nth bit in operation's vdesc_flags. 16654766Sjohnh * 16754766Sjohnh * Also, some BSD vnode operations have the side effect of vrele'ing 16854766Sjohnh * their arguments. With stacking, the reference counts are held 16954766Sjohnh * by the upper node, not the lower one, so we must handle these 17054766Sjohnh * side-effects here. This is not of concern in Sun-derived systems 17154766Sjohnh * since there are no such side-effects. 17254766Sjohnh * 17354766Sjohnh * This makes the following assumptions: 17454766Sjohnh * - only one returned vpp 17554766Sjohnh * - no INOUT vpp's (Sun's vop_open has one of these) 17654766Sjohnh * - the vnode operation vector of the first vnode should be used 17754766Sjohnh * to determine what implementation of the op should be invoked 17854766Sjohnh * - all mapped vnodes are of our vnode-type (NEEDSWORK: 17954766Sjohnh * problems on rmdir'ing mount points and renaming?) 18054766Sjohnh */ 18154766Sjohnh int 18254766Sjohnh null_bypass(ap) 18354893Sheideman struct vop_generic_args *ap; 18454753Sjohnh { 18554893Sheideman extern int (**null_vnodeop_p)(); /* not extern, really "forward" */ 18654893Sheideman register struct vnode **this_vp_p; 18754753Sjohnh int error; 18854766Sjohnh struct vnode *old_vps[VDESC_MAX_VPS]; 18954766Sjohnh struct vnode **vps_p[VDESC_MAX_VPS]; 19054766Sjohnh struct vnode ***vppp; 19154766Sjohnh struct vnodeop_desc *descp = ap->a_desc; 19254893Sheideman int reles, i; 19354753Sjohnh 19454766Sjohnh if (null_bug_bypass) 19554766Sjohnh printf ("null_bypass: %s\n", descp->vdesc_name); 19654753Sjohnh 19754766Sjohnh #ifdef SAFETY 19854753Sjohnh /* 19954766Sjohnh * We require at least one vp. 20054753Sjohnh */ 20154938Sheideman if (descp->vdesc_vp_offsets == NULL || 20254938Sheideman descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 20354766Sjohnh panic ("null_bypass: no vp's in map.\n"); 20454753Sjohnh #endif 20554753Sjohnh 20654753Sjohnh /* 20754766Sjohnh * Map the vnodes going in. 20854766Sjohnh * Later, we'll invoke the operation based on 20954766Sjohnh * the first mapped vnode's operation vector. 21054753Sjohnh */ 21154893Sheideman reles = descp->vdesc_flags; 21254938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 21354938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 21454766Sjohnh break; /* bail out at end of list */ 21554766Sjohnh vps_p[i] = this_vp_p = 21654766Sjohnh VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 21754893Sheideman /* 21854893Sheideman * We're not guaranteed that any but the first vnode 21954893Sheideman * are of our type. Check for and don't map any 220*54951Sheideman * that aren't. (We must always map first vp or vclean fails.) 22154893Sheideman */ 22254944Sheideman if (i && (*this_vp_p)->v_op != null_vnodeop_p) { 22354893Sheideman old_vps[i] = NULL; 22454893Sheideman } else { 22554893Sheideman old_vps[i] = *this_vp_p; 22654893Sheideman *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 22754938Sheideman /* 22854938Sheideman * XXX - Several operations have the side effect 22954938Sheideman * of vrele'ing their vp's. We must account for 23054938Sheideman * that. (This should go away in the future.) 23154938Sheideman */ 23254893Sheideman if (reles & 1) 23354893Sheideman VREF(*this_vp_p); 23454938Sheideman } 23554766Sjohnh 23654938Sheideman } 23754753Sjohnh 23854753Sjohnh /* 23954766Sjohnh * Call the operation on the lower layer 24054766Sjohnh * with the modified argument structure. 24154753Sjohnh */ 24254766Sjohnh error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 24354753Sjohnh 24454753Sjohnh /* 24554766Sjohnh * Maintain the illusion of call-by-value 24654766Sjohnh * by restoring vnodes in the argument structure 24754766Sjohnh * to their original value. 24854753Sjohnh */ 24954893Sheideman reles = descp->vdesc_flags; 25054938Sheideman for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 25154938Sheideman if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 25254766Sjohnh break; /* bail out at end of list */ 25354893Sheideman if (old_vps[i]) { 25454893Sheideman *(vps_p[i]) = old_vps[i]; 25554893Sheideman if (reles & 1) 25654893Sheideman vrele(*(vps_p[i])); 25754938Sheideman } 25854938Sheideman } 25954766Sjohnh 26054753Sjohnh /* 26154938Sheideman * Map the possible out-going vpp 26254938Sheideman * (Assumes that the lower layer always returns 26354938Sheideman * a VREF'ed vpp unless it gets an error.) 26454753Sjohnh */ 26554766Sjohnh if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 26654766Sjohnh !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 26754766Sjohnh !error) { 26854938Sheideman /* 26954938Sheideman * XXX - even though some ops have vpp returned vp's, 27054938Sheideman * several ops actually vrele this before returning. 27154938Sheideman * We must avoid these ops. 272*54951Sheideman * (This should go away when these ops are regularized.) 27354938Sheideman */ 274*54951Sheideman if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 275*54951Sheideman goto out; 27654938Sheideman vppp = VOPARG_OFFSETTO(struct vnode***, 27754766Sjohnh descp->vdesc_vpp_offset,ap); 27854893Sheideman error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 27954938Sheideman } 28054753Sjohnh 281*54951Sheideman out: 28254766Sjohnh return (error); 28354753Sjohnh } 28454753Sjohnh 28554753Sjohnh 28654753Sjohnh /* 287*54951Sheideman * We handle getattr only to change the fsid. 28854753Sjohnh */ 28954766Sjohnh int 29054766Sjohnh null_getattr(ap) 29154893Sheideman struct vop_getattr_args *ap; 29254753Sjohnh { 29354753Sjohnh int error; 29454938Sheideman if (error = null_bypass(ap)) 29554766Sjohnh return error; 29654766Sjohnh /* Requires that arguments be restored. */ 29754766Sjohnh ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 29854766Sjohnh return 0; 29954766Sjohnh } 30054753Sjohnh 30154753Sjohnh 30254766Sjohnh int 30354754Sjohnh null_inactive (ap) 30454753Sjohnh struct vop_inactive_args *ap; 30554753Sjohnh { 30654766Sjohnh /* 30754766Sjohnh * Do nothing (and _don't_ bypass). 30854766Sjohnh * Wait to vrele lowervp until reclaim, 30954766Sjohnh * so that until then our null_node is in the 31054766Sjohnh * cache and reusable. 31154766Sjohnh * 31254766Sjohnh * NEEDSWORK: Someday, consider inactive'ing 31354766Sjohnh * the lowervp and then trying to reactivate it 314*54951Sheideman * with capabilities (v_id) 31554766Sjohnh * like they do in the name lookup cache code. 31654766Sjohnh * That's too much work for now. 31754766Sjohnh */ 31854766Sjohnh return 0; 31954753Sjohnh } 32054753Sjohnh 32154938Sheideman int 32254754Sjohnh null_reclaim (ap) 32354753Sjohnh struct vop_reclaim_args *ap; 32454753Sjohnh { 32554938Sheideman struct vnode *vp = ap->a_vp; 32654938Sheideman struct null_node *xp = VTONULL(vp); 32754938Sheideman struct vnode *lowervp = xp->null_lowervp; 32854938Sheideman 32954938Sheideman /* 330*54951Sheideman * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, 33154938Sheideman * so we can't call VOPs on ourself. 33254938Sheideman */ 33354938Sheideman /* After this assignment, this node will not be re-used. */ 33454938Sheideman xp->null_lowervp = NULL; 33554938Sheideman remque(xp); 33654938Sheideman FREE(vp->v_data, M_TEMP); 33754938Sheideman vp->v_data = NULL; 33854938Sheideman vrele (lowervp); 33954938Sheideman return 0; 34054753Sjohnh } 34154753Sjohnh 342*54951Sheideman 34354938Sheideman int 344*54951Sheideman null_print (ap) 345*54951Sheideman struct vop_print_args *ap; 34654753Sjohnh { 347*54951Sheideman register struct vnode *vp = ap->a_vp; 348*54951Sheideman printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp)); 349*54951Sheideman return 0; 35054753Sjohnh } 35154753Sjohnh 352*54951Sheideman 353*54951Sheideman /* 354*54951Sheideman * XXX - vop_strategy must be hand coded because it has no 355*54951Sheideman * vnode in its arguments. 356*54951Sheideman * This goes away with a merged VM/buffer cache. 357*54951Sheideman */ 35854938Sheideman int 35954754Sjohnh null_strategy (ap) 36054753Sjohnh struct vop_strategy_args *ap; 36154753Sjohnh { 362*54951Sheideman struct buf *bp = ap->a_bp; 36354753Sjohnh int error; 36454766Sjohnh struct vnode *savedvp; 36554753Sjohnh 366*54951Sheideman savedvp = bp->b_vp; 367*54951Sheideman bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 36854753Sjohnh 369*54951Sheideman error = VOP_STRATEGY(bp); 37054753Sjohnh 371*54951Sheideman bp->b_vp = savedvp; 37254753Sjohnh 37354766Sjohnh return error; 37454753Sjohnh } 37554753Sjohnh 37654766Sjohnh 377*54951Sheideman /* 378*54951Sheideman * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no 379*54951Sheideman * vnode in its arguments. 380*54951Sheideman * This goes away with a merged VM/buffer cache. 381*54951Sheideman */ 38254766Sjohnh int 383*54951Sheideman null_bwrite (ap) 384*54951Sheideman struct vop_bwrite_args *ap; 38554753Sjohnh { 386*54951Sheideman struct buf *bp = ap->a_bp; 387*54951Sheideman int error; 388*54951Sheideman struct vnode *savedvp; 38954753Sjohnh 390*54951Sheideman savedvp = bp->b_vp; 391*54951Sheideman bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 39254753Sjohnh 393*54951Sheideman error = VOP_BWRITE(bp); 394*54951Sheideman 395*54951Sheideman bp->b_vp = savedvp; 396*54951Sheideman 397*54951Sheideman return error; 39854944Sheideman } 39954944Sheideman 400*54951Sheideman 40154753Sjohnh /* 40254766Sjohnh * Global vfs data structures 40354753Sjohnh */ 40454766Sjohnh int (**null_vnodeop_p)(); 40554893Sheideman struct vnodeopv_entry_desc null_vnodeop_entries[] = { 40654766Sjohnh { &vop_default_desc, null_bypass }, 40754753Sjohnh 40854766Sjohnh { &vop_getattr_desc, null_getattr }, 40954766Sjohnh { &vop_inactive_desc, null_inactive }, 41054766Sjohnh { &vop_reclaim_desc, null_reclaim }, 41154766Sjohnh { &vop_print_desc, null_print }, 41254753Sjohnh 41354766Sjohnh { &vop_strategy_desc, null_strategy }, 414*54951Sheideman { &vop_bwrite_desc, null_bwrite }, 41554753Sjohnh 41654753Sjohnh { (struct vnodeop_desc*)NULL, (int(*)())NULL } 41754753Sjohnh }; 41854893Sheideman struct vnodeopv_desc null_vnodeop_opv_desc = 41954893Sheideman { &null_vnodeop_p, null_vnodeop_entries }; 420