xref: /netbsd-src/sys/fs/union/union_vnops.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: union_vnops.c,v 1.49 2011/11/21 18:29:22 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.49 2011/11/21 18:29:22 hannken Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95 
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136 
137 static int union_lookup1(struct vnode *, struct vnode **,
138 			      struct vnode **, struct componentname *);
139 
140 
141 /*
142  * Global vfs data structures
143  */
144 int (**union_vnodeop_p)(void *);
145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146 	{ &vop_default_desc, vn_default_error },
147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
148 	{ &vop_create_desc, union_create },		/* create */
149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
151 	{ &vop_open_desc, union_open },			/* open */
152 	{ &vop_close_desc, union_close },		/* close */
153 	{ &vop_access_desc, union_access },		/* access */
154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
156 	{ &vop_read_desc, union_read },			/* read */
157 	{ &vop_write_desc, union_write },		/* write */
158 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
159 	{ &vop_poll_desc, union_poll },			/* select */
160 	{ &vop_revoke_desc, union_revoke },		/* revoke */
161 	{ &vop_mmap_desc, union_mmap },			/* mmap */
162 	{ &vop_fsync_desc, union_fsync },		/* fsync */
163 	{ &vop_seek_desc, union_seek },			/* seek */
164 	{ &vop_remove_desc, union_remove },		/* remove */
165 	{ &vop_link_desc, union_link },			/* link */
166 	{ &vop_rename_desc, union_rename },		/* rename */
167 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
168 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
169 	{ &vop_symlink_desc, union_symlink },		/* symlink */
170 	{ &vop_readdir_desc, union_readdir },		/* readdir */
171 	{ &vop_readlink_desc, union_readlink },		/* readlink */
172 	{ &vop_abortop_desc, union_abortop },		/* abortop */
173 	{ &vop_inactive_desc, union_inactive },		/* inactive */
174 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
175 	{ &vop_lock_desc, union_lock },			/* lock */
176 	{ &vop_unlock_desc, union_unlock },		/* unlock */
177 	{ &vop_bmap_desc, union_bmap },			/* bmap */
178 	{ &vop_strategy_desc, union_strategy },		/* strategy */
179 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
180 	{ &vop_print_desc, union_print },		/* print */
181 	{ &vop_islocked_desc, union_islocked },		/* islocked */
182 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
183 	{ &vop_advlock_desc, union_advlock },		/* advlock */
184 	{ &vop_getpages_desc, union_getpages },		/* getpages */
185 	{ &vop_putpages_desc, union_putpages },		/* putpages */
186 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
187 	{ NULL, NULL }
188 };
189 const struct vnodeopv_desc union_vnodeop_opv_desc =
190 	{ &union_vnodeop_p, union_vnodeop_entries };
191 
192 #define NODE_IS_SPECIAL(vp) \
193 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
194 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
195 
196 static int
197 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
198 	struct componentname *cnp)
199 {
200 	int error;
201 	struct vnode *tdvp;
202 	struct vnode *dvp;
203 	struct mount *mp;
204 
205 	dvp = *dvpp;
206 
207 	/*
208 	 * If stepping up the directory tree, check for going
209 	 * back across the mount point, in which case do what
210 	 * lookup would do by stepping back down the mount
211 	 * hierarchy.
212 	 */
213 	if (cnp->cn_flags & ISDOTDOT) {
214 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
215 			/*
216 			 * Don't do the NOCROSSMOUNT check
217 			 * at this level.  By definition,
218 			 * union fs deals with namespaces, not
219 			 * filesystems.
220 			 */
221 			tdvp = dvp;
222 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
223 			VOP_UNLOCK(tdvp);
224 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
225 		}
226 	}
227 
228         error = VOP_LOOKUP(dvp, &tdvp, cnp);
229 	if (error)
230 		return (error);
231 
232 	dvp = tdvp;
233 
234 	/*
235 	 * Lastly check if the current node is a mount point in
236 	 * which case walk up the mount hierarchy making sure not to
237 	 * bump into the root of the mount tree (ie. dvp != udvp).
238 	 */
239 	while (dvp != udvp && (dvp->v_type == VDIR) &&
240 	       (mp = dvp->v_mountedhere)) {
241 		if (vfs_busy(mp, NULL))
242 			continue;
243 		vput(dvp);
244 		error = VFS_ROOT(mp, &tdvp);
245 		vfs_unbusy(mp, false, NULL);
246 		if (error) {
247 			return (error);
248 		}
249 		dvp = tdvp;
250 	}
251 
252 	*vpp = dvp;
253 	return (0);
254 }
255 
256 int
257 union_lookup(void *v)
258 {
259 	struct vop_lookup_args /* {
260 		struct vnodeop_desc *a_desc;
261 		struct vnode *a_dvp;
262 		struct vnode **a_vpp;
263 		struct componentname *a_cnp;
264 	} */ *ap = v;
265 	int error;
266 	int uerror, lerror;
267 	struct vnode *uppervp, *lowervp;
268 	struct vnode *upperdvp, *lowerdvp;
269 	struct vnode *dvp = ap->a_dvp;
270 	struct union_node *dun = VTOUNION(dvp);
271 	struct componentname *cnp = ap->a_cnp;
272 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
273 	kauth_cred_t saved_cred = NULL;
274 	int iswhiteout;
275 	struct vattr va;
276 
277 #ifdef notyet
278 	if (cnp->cn_namelen == 3 &&
279 			cnp->cn_nameptr[2] == '.' &&
280 			cnp->cn_nameptr[1] == '.' &&
281 			cnp->cn_nameptr[0] == '.') {
282 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
283 		if (dvp == NULLVP)
284 			return (ENOENT);
285 		vref(dvp);
286 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
287 		return (0);
288 	}
289 #endif
290 
291 	if ((cnp->cn_flags & ISLASTCN) &&
292 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
293 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
294 		return (EROFS);
295 
296 start:
297 	upperdvp = dun->un_uppervp;
298 	lowerdvp = dun->un_lowervp;
299 	uppervp = NULLVP;
300 	lowervp = NULLVP;
301 	iswhiteout = 0;
302 
303 	/*
304 	 * do the lookup in the upper level.
305 	 * if that level comsumes additional pathnames,
306 	 * then assume that something special is going
307 	 * on and just return that vnode.
308 	 */
309 	if (upperdvp != NULLVP) {
310 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
311 					&uppervp, cnp);
312 		if (cnp->cn_consume != 0) {
313 			*ap->a_vpp = uppervp;
314 			return (uerror);
315 		}
316 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
317 			if (cnp->cn_flags & ISWHITEOUT) {
318 				iswhiteout = 1;
319 			} else if (lowerdvp != NULLVP) {
320 				lerror = VOP_GETATTR(upperdvp, &va,
321 					cnp->cn_cred);
322 				if (lerror == 0 && (va.va_flags & OPAQUE))
323 					iswhiteout = 1;
324 			}
325 		}
326 	} else {
327 		uerror = ENOENT;
328 	}
329 
330 	/*
331 	 * in a similar way to the upper layer, do the lookup
332 	 * in the lower layer.   this time, if there is some
333 	 * component magic going on, then vput whatever we got
334 	 * back from the upper layer and return the lower vnode
335 	 * instead.
336 	 */
337 	if (lowerdvp != NULLVP && !iswhiteout) {
338 		int nameiop;
339 
340 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
341 
342 		/*
343 		 * Only do a LOOKUP on the bottom node, since
344 		 * we won't be making changes to it anyway.
345 		 */
346 		nameiop = cnp->cn_nameiop;
347 		cnp->cn_nameiop = LOOKUP;
348 		if (um->um_op == UNMNT_BELOW) {
349 			saved_cred = cnp->cn_cred;
350 			cnp->cn_cred = um->um_cred;
351 		}
352 
353 		/*
354 		 * we shouldn't have to worry about locking interactions
355 		 * between the lower layer and our union layer (w.r.t.
356 		 * `..' processing) because we don't futz with lowervp
357 		 * locks in the union-node instantiation code path.
358 		 */
359 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
360 				&lowervp, cnp);
361 		if (um->um_op == UNMNT_BELOW)
362 			cnp->cn_cred = saved_cred;
363 		cnp->cn_nameiop = nameiop;
364 
365 		if (lowervp != lowerdvp)
366 			VOP_UNLOCK(lowerdvp);
367 
368 		if (cnp->cn_consume != 0) {
369 			if (uppervp != NULLVP) {
370 				if (uppervp == upperdvp)
371 					vrele(uppervp);
372 				else
373 					vput(uppervp);
374 				uppervp = NULLVP;
375 			}
376 			*ap->a_vpp = lowervp;
377 			return (lerror);
378 		}
379 	} else {
380 		lerror = ENOENT;
381 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
382 			lowervp = LOWERVP(dun->un_pvp);
383 			if (lowervp != NULLVP) {
384 				vref(lowervp);
385 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
386 				lerror = 0;
387 			}
388 		}
389 	}
390 
391 	/*
392 	 * EJUSTRETURN is used by underlying filesystems to indicate that
393 	 * a directory modification op was started successfully.
394 	 * This will only happen in the upper layer, since
395 	 * the lower layer only does LOOKUPs.
396 	 * If this union is mounted read-only, bounce it now.
397 	 */
398 
399 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
400 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
401 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
402 		uerror = EROFS;
403 
404 	/*
405 	 * at this point, we have uerror and lerror indicating
406 	 * possible errors with the lookups in the upper and lower
407 	 * layers.  additionally, uppervp and lowervp are (locked)
408 	 * references to existing vnodes in the upper and lower layers.
409 	 *
410 	 * there are now three cases to consider.
411 	 * 1. if both layers returned an error, then return whatever
412 	 *    error the upper layer generated.
413 	 *
414 	 * 2. if the top layer failed and the bottom layer succeeded
415 	 *    then two subcases occur.
416 	 *    a.  the bottom vnode is not a directory, in which
417 	 *	  case just return a new union vnode referencing
418 	 *	  an empty top layer and the existing bottom layer.
419 	 *    b.  the bottom vnode is a directory, in which case
420 	 *	  create a new directory in the top-level and
421 	 *	  continue as in case 3.
422 	 *
423 	 * 3. if the top layer succeeded then return a new union
424 	 *    vnode referencing whatever the new top layer and
425 	 *    whatever the bottom layer returned.
426 	 */
427 
428 	*ap->a_vpp = NULLVP;
429 
430 
431 	/* case 1. */
432 	if ((uerror != 0) && (lerror != 0)) {
433 		return (uerror);
434 	}
435 
436 	/* case 2. */
437 	if (uerror != 0 /* && (lerror == 0) */ ) {
438 		if (lowervp->v_type == VDIR) { /* case 2b. */
439 			/*
440 			 * We may be racing another process to make the
441 			 * upper-level shadow directory.  Be careful with
442 			 * locks/etc!
443 			 * If we have to create a shadow directory and want
444 			 * to commit the node we have to restart the lookup
445 			 * to get the componentname right.
446 			 */
447 			if (upperdvp) {
448 				VOP_UNLOCK(upperdvp);
449 				uerror = union_mkshadow(um, upperdvp, cnp,
450 				    &uppervp);
451 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
452 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
453 					vput(uppervp);
454 					if (lowervp != NULLVP)
455 						vput(lowervp);
456 					goto start;
457 				}
458 			}
459 			if (uerror) {
460 				if (lowervp != NULLVP) {
461 					vput(lowervp);
462 					lowervp = NULLVP;
463 				}
464 				return (uerror);
465 			}
466 		}
467 	}
468 
469 	if (lowervp != NULLVP)
470 		VOP_UNLOCK(lowervp);
471 
472 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
473 			      uppervp, lowervp, 1);
474 
475 	if (error) {
476 		if (uppervp != NULLVP)
477 			vput(uppervp);
478 		if (lowervp != NULLVP)
479 			vrele(lowervp);
480 	}
481 
482 	return (error);
483 }
484 
485 int
486 union_create(void *v)
487 {
488 	struct vop_create_args /* {
489 		struct vnode *a_dvp;
490 		struct vnode **a_vpp;
491 		struct componentname *a_cnp;
492 		struct vattr *a_vap;
493 	} */ *ap = v;
494 	struct union_node *un = VTOUNION(ap->a_dvp);
495 	struct vnode *dvp = un->un_uppervp;
496 	struct componentname *cnp = ap->a_cnp;
497 
498 	if (dvp != NULLVP) {
499 		int error;
500 		struct vnode *vp;
501 		struct mount *mp;
502 
503 		vref(dvp);
504 		un->un_flags |= UN_KLOCK;
505 		mp = ap->a_dvp->v_mount;
506 		vput(ap->a_dvp);
507 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
508 		if (error)
509 			return (error);
510 
511 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
512 				NULLVP, 1);
513 		if (error)
514 			vput(vp);
515 		return (error);
516 	}
517 
518 	vput(ap->a_dvp);
519 	return (EROFS);
520 }
521 
522 int
523 union_whiteout(void *v)
524 {
525 	struct vop_whiteout_args /* {
526 		struct vnode *a_dvp;
527 		struct componentname *a_cnp;
528 		int a_flags;
529 	} */ *ap = v;
530 	struct union_node *un = VTOUNION(ap->a_dvp);
531 	struct componentname *cnp = ap->a_cnp;
532 
533 	if (un->un_uppervp == NULLVP)
534 		return (EOPNOTSUPP);
535 
536 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
537 }
538 
539 int
540 union_mknod(void *v)
541 {
542 	struct vop_mknod_args /* {
543 		struct vnode *a_dvp;
544 		struct vnode **a_vpp;
545 		struct componentname *a_cnp;
546 		struct vattr *a_vap;
547 	} */ *ap = v;
548 	struct union_node *un = VTOUNION(ap->a_dvp);
549 	struct vnode *dvp = un->un_uppervp;
550 	struct componentname *cnp = ap->a_cnp;
551 
552 	if (dvp != NULLVP) {
553 		int error;
554 		struct vnode *vp;
555 		struct mount *mp;
556 
557 		vref(dvp);
558 		un->un_flags |= UN_KLOCK;
559 		mp = ap->a_dvp->v_mount;
560 		vput(ap->a_dvp);
561 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
562 		if (error)
563 			return (error);
564 
565 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
566 				      cnp, vp, NULLVP, 1);
567 		if (error)
568 		    vput(vp);
569 		return (error);
570 	}
571 
572 	vput(ap->a_dvp);
573 	return (EROFS);
574 }
575 
576 int
577 union_open(void *v)
578 {
579 	struct vop_open_args /* {
580 		struct vnodeop_desc *a_desc;
581 		struct vnode *a_vp;
582 		int a_mode;
583 		kauth_cred_t a_cred;
584 	} */ *ap = v;
585 	struct union_node *un = VTOUNION(ap->a_vp);
586 	struct vnode *tvp;
587 	int mode = ap->a_mode;
588 	kauth_cred_t cred = ap->a_cred;
589 	struct lwp *l = curlwp;
590 	int error;
591 
592 	/*
593 	 * If there is an existing upper vp then simply open that.
594 	 */
595 	tvp = un->un_uppervp;
596 	if (tvp == NULLVP) {
597 		/*
598 		 * If the lower vnode is being opened for writing, then
599 		 * copy the file contents to the upper vnode and open that,
600 		 * otherwise can simply open the lower vnode.
601 		 */
602 		tvp = un->un_lowervp;
603 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
604 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
605 			if (error == 0)
606 				error = VOP_OPEN(un->un_uppervp, mode, cred);
607 			return (error);
608 		}
609 
610 		/*
611 		 * Just open the lower vnode, but check for nodev mount flag
612 		 */
613 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
614 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
615 			return ENXIO;
616 		un->un_openl++;
617 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
618 		error = VOP_OPEN(tvp, mode, cred);
619 		VOP_UNLOCK(tvp);
620 
621 		return (error);
622 	}
623 	/*
624 	 * Just open the upper vnode, checking for nodev mount flag first
625 	 */
626 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
627 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
628 		return ENXIO;
629 
630 	error = VOP_OPEN(tvp, mode, cred);
631 
632 	return (error);
633 }
634 
635 int
636 union_close(void *v)
637 {
638 	struct vop_close_args /* {
639 		struct vnode *a_vp;
640 		int  a_fflag;
641 		kauth_cred_t a_cred;
642 	} */ *ap = v;
643 	struct union_node *un = VTOUNION(ap->a_vp);
644 	struct vnode *vp;
645 	int error;
646 	bool do_lock;
647 
648 	vp = un->un_uppervp;
649 	if (vp != NULLVP) {
650 		do_lock = false;
651 	} else {
652 		KASSERT(un->un_openl > 0);
653 		--un->un_openl;
654 		vp = un->un_lowervp;
655 		do_lock = true;
656 	}
657 
658 	KASSERT(vp != NULLVP);
659 	ap->a_vp = vp;
660 	if (do_lock)
661 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
662 	error = VCALL(vp, VOFFSET(vop_close), ap);
663 	if (do_lock)
664 		VOP_UNLOCK(vp);
665 
666 	return error;
667 }
668 
669 /*
670  * Check access permission on the union vnode.
671  * The access check being enforced is to check
672  * against both the underlying vnode, and any
673  * copied vnode.  This ensures that no additional
674  * file permissions are given away simply because
675  * the user caused an implicit file copy.
676  */
677 int
678 union_access(void *v)
679 {
680 	struct vop_access_args /* {
681 		struct vnodeop_desc *a_desc;
682 		struct vnode *a_vp;
683 		int a_mode;
684 		kauth_cred_t a_cred;
685 	} */ *ap = v;
686 	struct vnode *vp = ap->a_vp;
687 	struct union_node *un = VTOUNION(vp);
688 	int error = EACCES;
689 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
690 
691 	/*
692 	 * Disallow write attempts on read-only file systems;
693 	 * unless the file is a socket, fifo, or a block or
694 	 * character device resident on the file system.
695 	 */
696 	if (ap->a_mode & VWRITE) {
697 		switch (vp->v_type) {
698 		case VDIR:
699 		case VLNK:
700 		case VREG:
701 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
702 				return (EROFS);
703 			break;
704 		case VBAD:
705 		case VBLK:
706 		case VCHR:
707 		case VSOCK:
708 		case VFIFO:
709 		case VNON:
710 		default:
711 			break;
712 		}
713 	}
714 
715 
716 	if ((vp = un->un_uppervp) != NULLVP) {
717 		ap->a_vp = vp;
718 		return (VCALL(vp, VOFFSET(vop_access), ap));
719 	}
720 
721 	if ((vp = un->un_lowervp) != NULLVP) {
722 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
723 		ap->a_vp = vp;
724 		error = VCALL(vp, VOFFSET(vop_access), ap);
725 		if (error == 0) {
726 			if (um->um_op == UNMNT_BELOW) {
727 				ap->a_cred = um->um_cred;
728 				error = VCALL(vp, VOFFSET(vop_access), ap);
729 			}
730 		}
731 		VOP_UNLOCK(vp);
732 		if (error)
733 			return (error);
734 	}
735 
736 	return (error);
737 }
738 
739 /*
740  * We handle getattr only to change the fsid and
741  * track object sizes
742  */
743 int
744 union_getattr(void *v)
745 {
746 	struct vop_getattr_args /* {
747 		struct vnode *a_vp;
748 		struct vattr *a_vap;
749 		kauth_cred_t a_cred;
750 	} */ *ap = v;
751 	int error;
752 	struct union_node *un = VTOUNION(ap->a_vp);
753 	struct vnode *vp = un->un_uppervp;
754 	struct vattr *vap;
755 	struct vattr va;
756 
757 
758 	/*
759 	 * Some programs walk the filesystem hierarchy by counting
760 	 * links to directories to avoid stat'ing all the time.
761 	 * This means the link count on directories needs to be "correct".
762 	 * The only way to do that is to call getattr on both layers
763 	 * and fix up the link count.  The link count will not necessarily
764 	 * be accurate but will be large enough to defeat the tree walkers.
765 	 *
766 	 * To make life more interesting, some filesystems don't keep
767 	 * track of link counts in the expected way, and return a
768 	 * link count of `1' for those directories; if either of the
769 	 * component directories returns a link count of `1', we return a 1.
770 	 */
771 
772 	vap = ap->a_vap;
773 
774 	vp = un->un_uppervp;
775 	if (vp != NULLVP) {
776 		error = VOP_GETATTR(vp, vap, ap->a_cred);
777 		if (error)
778 			return (error);
779 		mutex_enter(&un->un_lock);
780 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
781 	}
782 
783 	if (vp == NULLVP) {
784 		vp = un->un_lowervp;
785 	} else if (vp->v_type == VDIR) {
786 		vp = un->un_lowervp;
787 		if (vp != NULLVP)
788 			vap = &va;
789 	} else {
790 		vp = NULLVP;
791 	}
792 
793 	if (vp != NULLVP) {
794 		if (vp == un->un_lowervp)
795 			vn_lock(vp, LK_SHARED | LK_RETRY);
796 		error = VOP_GETATTR(vp, vap, ap->a_cred);
797 		if (vp == un->un_lowervp)
798 			VOP_UNLOCK(vp);
799 		if (error)
800 			return (error);
801 		mutex_enter(&un->un_lock);
802 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
803 	}
804 
805 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
806 		/*
807 		 * Link count manipulation:
808 		 *	- If both return "2", return 2 (no subdirs)
809 		 *	- If one or the other return "1", return "1" (ENOCLUE)
810 		 */
811 		if ((ap->a_vap->va_nlink == 2) &&
812 		    (vap->va_nlink == 2))
813 			;
814 		else if (ap->a_vap->va_nlink != 1) {
815 			if (vap->va_nlink == 1)
816 				ap->a_vap->va_nlink = 1;
817 			else
818 				ap->a_vap->va_nlink += vap->va_nlink;
819 		}
820 	}
821 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
822 	return (0);
823 }
824 
825 int
826 union_setattr(void *v)
827 {
828 	struct vop_setattr_args /* {
829 		struct vnode *a_vp;
830 		struct vattr *a_vap;
831 		kauth_cred_t a_cred;
832 	} */ *ap = v;
833 	struct vattr *vap = ap->a_vap;
834 	struct vnode *vp = ap->a_vp;
835 	struct union_node *un = VTOUNION(vp);
836 	bool size_only;		/* All but va_size are VNOVAL. */
837 	int error;
838 
839 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
840 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
841 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
842 
843 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
844 		return (EROFS);
845 	if (vap->va_size != VNOVAL) {
846  		switch (vp->v_type) {
847  		case VDIR:
848  			return (EISDIR);
849  		case VCHR:
850  		case VBLK:
851  		case VSOCK:
852  		case VFIFO:
853 			break;
854 		case VREG:
855 		case VLNK:
856  		default:
857 			/*
858 			 * Disallow write attempts if the filesystem is
859 			 * mounted read-only.
860 			 */
861 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
862 				return (EROFS);
863 		}
864 	}
865 
866 	/*
867 	 * Handle case of truncating lower object to zero size,
868 	 * by creating a zero length upper object.  This is to
869 	 * handle the case of open with O_TRUNC and O_CREAT.
870 	 */
871 	if ((un->un_uppervp == NULLVP) &&
872 	    /* assert(un->un_lowervp != NULLVP) */
873 	    (un->un_lowervp->v_type == VREG)) {
874 		error = union_copyup(un, (vap->va_size != 0),
875 						ap->a_cred, curlwp);
876 		if (error)
877 			return (error);
878 	}
879 
880 	/*
881 	 * Try to set attributes in upper layer, ignore size change to zero
882 	 * for devices to handle O_TRUNC and return read-only filesystem error
883 	 * otherwise.
884 	 */
885 	if (un->un_uppervp != NULLVP) {
886 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
887 		if ((error == 0) && (vap->va_size != VNOVAL)) {
888 			mutex_enter(&un->un_lock);
889 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
890 		}
891 	} else {
892 		KASSERT(un->un_lowervp != NULLVP);
893 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
894 			if (size_only &&
895 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
896 				error = 0;
897 			else
898 				error = EROFS;
899 		} else {
900 			error = EROFS;
901 		}
902 	}
903 
904 	return (error);
905 }
906 
907 int
908 union_read(void *v)
909 {
910 	struct vop_read_args /* {
911 		struct vnode *a_vp;
912 		struct uio *a_uio;
913 		int  a_ioflag;
914 		kauth_cred_t a_cred;
915 	} */ *ap = v;
916 	int error;
917 	struct vnode *vp = OTHERVP(ap->a_vp);
918 	int dolock = (vp == LOWERVP(ap->a_vp));
919 
920 	if (dolock)
921 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
922 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
923 	if (dolock)
924 		VOP_UNLOCK(vp);
925 
926 	/*
927 	 * XXX
928 	 * perhaps the size of the underlying object has changed under
929 	 * our feet.  take advantage of the offset information present
930 	 * in the uio structure.
931 	 */
932 	if (error == 0) {
933 		struct union_node *un = VTOUNION(ap->a_vp);
934 		off_t cur = ap->a_uio->uio_offset;
935 		off_t usz = VNOVAL, lsz = VNOVAL;
936 
937 		mutex_enter(&un->un_lock);
938 		if (vp == un->un_uppervp) {
939 			if (cur > un->un_uppersz)
940 				usz = cur;
941 		} else {
942 			if (cur > un->un_lowersz)
943 				lsz = cur;
944 		}
945 
946 		if (usz != VNOVAL || lsz != VNOVAL)
947 			union_newsize(ap->a_vp, usz, lsz);
948 		else
949 			mutex_exit(&un->un_lock);
950 	}
951 
952 	return (error);
953 }
954 
955 int
956 union_write(void *v)
957 {
958 	struct vop_read_args /* {
959 		struct vnode *a_vp;
960 		struct uio *a_uio;
961 		int  a_ioflag;
962 		kauth_cred_t a_cred;
963 	} */ *ap = v;
964 	int error;
965 	struct vnode *vp;
966 	struct union_node *un = VTOUNION(ap->a_vp);
967 
968 	vp = UPPERVP(ap->a_vp);
969 	if (vp == NULLVP) {
970 		vp = LOWERVP(ap->a_vp);
971 		if (NODE_IS_SPECIAL(vp)) {
972 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
973 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
974 			    ap->a_cred);
975 			VOP_UNLOCK(vp);
976 			return error;
977 		}
978 		panic("union: missing upper layer in write");
979 	}
980 
981 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
982 
983 	/*
984 	 * the size of the underlying object may be changed by the
985 	 * write.
986 	 */
987 	if (error == 0) {
988 		off_t cur = ap->a_uio->uio_offset;
989 
990 		mutex_enter(&un->un_lock);
991 		if (cur > un->un_uppersz)
992 			union_newsize(ap->a_vp, cur, VNOVAL);
993 		else
994 			mutex_exit(&un->un_lock);
995 	}
996 
997 	return (error);
998 }
999 
1000 int
1001 union_ioctl(void *v)
1002 {
1003 	struct vop_ioctl_args /* {
1004 		struct vnode *a_vp;
1005 		int  a_command;
1006 		void *a_data;
1007 		int  a_fflag;
1008 		kauth_cred_t a_cred;
1009 	} */ *ap = v;
1010 	struct vnode *ovp = OTHERVP(ap->a_vp);
1011 
1012 	ap->a_vp = ovp;
1013 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1014 }
1015 
1016 int
1017 union_poll(void *v)
1018 {
1019 	struct vop_poll_args /* {
1020 		struct vnode *a_vp;
1021 		int a_events;
1022 	} */ *ap = v;
1023 	struct vnode *ovp = OTHERVP(ap->a_vp);
1024 
1025 	ap->a_vp = ovp;
1026 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1027 }
1028 
1029 int
1030 union_revoke(void *v)
1031 {
1032 	struct vop_revoke_args /* {
1033 		struct vnode *a_vp;
1034 		int a_flags;
1035 		struct proc *a_p;
1036 	} */ *ap = v;
1037 	struct vnode *vp = ap->a_vp;
1038 
1039 	if (UPPERVP(vp))
1040 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1041 	if (LOWERVP(vp))
1042 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1043 	vgone(vp);	/* XXXAD?? */
1044 	return (0);
1045 }
1046 
1047 int
1048 union_mmap(void *v)
1049 {
1050 	struct vop_mmap_args /* {
1051 		struct vnode *a_vp;
1052 		vm_prot_t a_prot;
1053 		kauth_cred_t a_cred;
1054 	} */ *ap = v;
1055 	struct vnode *ovp = OTHERVP(ap->a_vp);
1056 
1057 	ap->a_vp = ovp;
1058 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1059 }
1060 
1061 int
1062 union_fsync(void *v)
1063 {
1064 	struct vop_fsync_args /* {
1065 		struct vnode *a_vp;
1066 		kauth_cred_t a_cred;
1067 		int  a_flags;
1068 		off_t offhi;
1069 		off_t offlo;
1070 	} */ *ap = v;
1071 	int error = 0;
1072 	struct vnode *targetvp;
1073 
1074 	/*
1075 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1076 	 * bother syncing the underlying vnodes, since (a) they'll be
1077 	 * fsync'ed when reclaimed and (b) we could deadlock if
1078 	 * they're locked; otherwise, pass it through to the
1079 	 * underlying layer.
1080 	 */
1081 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1082 		error = spec_fsync(v);
1083 		if (error)
1084 			return error;
1085 	}
1086 
1087 	if (ap->a_flags & FSYNC_RECLAIM)
1088 		return 0;
1089 
1090 	targetvp = OTHERVP(ap->a_vp);
1091 	if (targetvp != NULLVP) {
1092 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1093 
1094 		if (dolock)
1095 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1096 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1097 			    ap->a_offlo, ap->a_offhi);
1098 		if (dolock)
1099 			VOP_UNLOCK(targetvp);
1100 	}
1101 
1102 	return (error);
1103 }
1104 
1105 int
1106 union_seek(void *v)
1107 {
1108 	struct vop_seek_args /* {
1109 		struct vnode *a_vp;
1110 		off_t  a_oldoff;
1111 		off_t  a_newoff;
1112 		kauth_cred_t a_cred;
1113 	} */ *ap = v;
1114 	struct vnode *ovp = OTHERVP(ap->a_vp);
1115 
1116 	ap->a_vp = ovp;
1117 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1118 }
1119 
1120 int
1121 union_remove(void *v)
1122 {
1123 	struct vop_remove_args /* {
1124 		struct vnode *a_dvp;
1125 		struct vnode *a_vp;
1126 		struct componentname *a_cnp;
1127 	} */ *ap = v;
1128 	int error;
1129 	struct union_node *dun = VTOUNION(ap->a_dvp);
1130 	struct union_node *un = VTOUNION(ap->a_vp);
1131 	struct componentname *cnp = ap->a_cnp;
1132 
1133 	if (dun->un_uppervp == NULLVP)
1134 		panic("union remove: null upper vnode");
1135 
1136 	if (un->un_uppervp != NULLVP) {
1137 		struct vnode *dvp = dun->un_uppervp;
1138 		struct vnode *vp = un->un_uppervp;
1139 
1140 		vref(dvp);
1141 		dun->un_flags |= UN_KLOCK;
1142 		vput(ap->a_dvp);
1143 		vref(vp);
1144 		un->un_flags |= UN_KLOCK;
1145 		vput(ap->a_vp);
1146 
1147 		if (union_dowhiteout(un, cnp->cn_cred))
1148 			cnp->cn_flags |= DOWHITEOUT;
1149 		error = VOP_REMOVE(dvp, vp, cnp);
1150 		if (!error)
1151 			union_removed_upper(un);
1152 	} else {
1153 		error = union_mkwhiteout(
1154 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1155 			dun->un_uppervp, ap->a_cnp, un);
1156 		vput(ap->a_dvp);
1157 		vput(ap->a_vp);
1158 	}
1159 
1160 	return (error);
1161 }
1162 
1163 int
1164 union_link(void *v)
1165 {
1166 	struct vop_link_args /* {
1167 		struct vnode *a_dvp;
1168 		struct vnode *a_vp;
1169 		struct componentname *a_cnp;
1170 	} */ *ap = v;
1171 	int error = 0;
1172 	struct componentname *cnp = ap->a_cnp;
1173 	struct union_node *dun;
1174 	struct vnode *vp;
1175 	struct vnode *dvp;
1176 
1177 	dun = VTOUNION(ap->a_dvp);
1178 
1179 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1180 
1181 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1182 		vp = ap->a_vp;
1183 	} else {
1184 		struct union_node *un = VTOUNION(ap->a_vp);
1185 		if (un->un_uppervp == NULLVP) {
1186 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1187 
1188 			/*
1189 			 * Needs to be copied before we can link it.
1190 			 */
1191 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1192 			if (droplock)
1193 				VOP_UNLOCK(dun->un_uppervp);
1194 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1195 			if (droplock) {
1196 				vn_lock(dun->un_uppervp,
1197 				    LK_EXCLUSIVE | LK_RETRY);
1198 				/*
1199 				 * During copyup, we dropped the lock on the
1200 				 * dir and invalidated any saved namei lookup
1201 				 * state for the directory we'll be entering
1202 				 * the link in.  We need to re-run the lookup
1203 				 * in that directory to reset any state needed
1204 				 * for VOP_LINK.
1205 				 * Call relookup on the union-layer to reset
1206 				 * the state.
1207 				 */
1208 				vp  = NULLVP;
1209 				if (dun->un_uppervp == NULLVP)
1210 					 panic("union: null upperdvp?");
1211 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1212 				if (error) {
1213 					VOP_UNLOCK(ap->a_vp);
1214 					return EROFS;	/* ? */
1215 				}
1216 				if (vp != NULLVP) {
1217 					/*
1218 					 * The name we want to create has
1219 					 * mysteriously appeared (a race?)
1220 					 */
1221 					error = EEXIST;
1222 					VOP_UNLOCK(ap->a_vp);
1223 					vput(ap->a_dvp);
1224 					vput(vp);
1225 					return (error);
1226 				}
1227 			}
1228 			VOP_UNLOCK(ap->a_vp);
1229 		}
1230 		vp = un->un_uppervp;
1231 	}
1232 
1233 	dvp = dun->un_uppervp;
1234 	if (dvp == NULLVP)
1235 		error = EROFS;
1236 
1237 	if (error) {
1238 		vput(ap->a_dvp);
1239 		return (error);
1240 	}
1241 
1242 	vref(dvp);
1243 	dun->un_flags |= UN_KLOCK;
1244 	vput(ap->a_dvp);
1245 
1246 	return (VOP_LINK(dvp, vp, cnp));
1247 }
1248 
1249 int
1250 union_rename(void *v)
1251 {
1252 	struct vop_rename_args  /* {
1253 		struct vnode *a_fdvp;
1254 		struct vnode *a_fvp;
1255 		struct componentname *a_fcnp;
1256 		struct vnode *a_tdvp;
1257 		struct vnode *a_tvp;
1258 		struct componentname *a_tcnp;
1259 	} */ *ap = v;
1260 	int error;
1261 
1262 	struct vnode *fdvp = ap->a_fdvp;
1263 	struct vnode *fvp = ap->a_fvp;
1264 	struct vnode *tdvp = ap->a_tdvp;
1265 	struct vnode *tvp = ap->a_tvp;
1266 
1267 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1268 		struct union_node *un = VTOUNION(fdvp);
1269 		if (un->un_uppervp == NULLVP) {
1270 			/*
1271 			 * this should never happen in normal
1272 			 * operation but might if there was
1273 			 * a problem creating the top-level shadow
1274 			 * directory.
1275 			 */
1276 			error = EXDEV;
1277 			goto bad;
1278 		}
1279 
1280 		fdvp = un->un_uppervp;
1281 		vref(fdvp);
1282 	}
1283 
1284 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1285 		struct union_node *un = VTOUNION(fvp);
1286 		if (un->un_uppervp == NULLVP) {
1287 			/* XXX: should do a copyup */
1288 			error = EXDEV;
1289 			goto bad;
1290 		}
1291 
1292 		if (un->un_lowervp != NULLVP)
1293 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1294 
1295 		fvp = un->un_uppervp;
1296 		vref(fvp);
1297 	}
1298 
1299 	if (tdvp->v_op == union_vnodeop_p) {
1300 		struct union_node *un = VTOUNION(tdvp);
1301 		if (un->un_uppervp == NULLVP) {
1302 			/*
1303 			 * this should never happen in normal
1304 			 * operation but might if there was
1305 			 * a problem creating the top-level shadow
1306 			 * directory.
1307 			 */
1308 			error = EXDEV;
1309 			goto bad;
1310 		}
1311 
1312 		tdvp = un->un_uppervp;
1313 		vref(tdvp);
1314 		un->un_flags |= UN_KLOCK;
1315 		vput(ap->a_tdvp);
1316 	}
1317 
1318 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1319 		struct union_node *un = VTOUNION(tvp);
1320 
1321 		tvp = un->un_uppervp;
1322 		if (tvp != NULLVP) {
1323 			vref(tvp);
1324 			un->un_flags |= UN_KLOCK;
1325 		}
1326 		vput(ap->a_tvp);
1327 	}
1328 
1329 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1330 	goto out;
1331 
1332 bad:
1333 	vput(tdvp);
1334 	if (tvp != NULLVP)
1335 		vput(tvp);
1336 	vrele(fdvp);
1337 	vrele(fvp);
1338 
1339 out:
1340 	if (fdvp != ap->a_fdvp) {
1341 		vrele(ap->a_fdvp);
1342 	}
1343 	if (fvp != ap->a_fvp) {
1344 		vrele(ap->a_fvp);
1345 	}
1346 	return (error);
1347 }
1348 
1349 int
1350 union_mkdir(void *v)
1351 {
1352 	struct vop_mkdir_args /* {
1353 		struct vnode *a_dvp;
1354 		struct vnode **a_vpp;
1355 		struct componentname *a_cnp;
1356 		struct vattr *a_vap;
1357 	} */ *ap = v;
1358 	struct union_node *un = VTOUNION(ap->a_dvp);
1359 	struct vnode *dvp = un->un_uppervp;
1360 	struct componentname *cnp = ap->a_cnp;
1361 
1362 	if (dvp != NULLVP) {
1363 		int error;
1364 		struct vnode *vp;
1365 
1366 		vref(dvp);
1367 		un->un_flags |= UN_KLOCK;
1368 		VOP_UNLOCK(ap->a_dvp);
1369 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1370 		if (error) {
1371 			vrele(ap->a_dvp);
1372 			return (error);
1373 		}
1374 
1375 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1376 				NULLVP, cnp, vp, NULLVP, 1);
1377 		if (error)
1378 			vput(vp);
1379 		vrele(ap->a_dvp);
1380 		return (error);
1381 	}
1382 
1383 	vput(ap->a_dvp);
1384 	return (EROFS);
1385 }
1386 
1387 int
1388 union_rmdir(void *v)
1389 {
1390 	struct vop_rmdir_args /* {
1391 		struct vnode *a_dvp;
1392 		struct vnode *a_vp;
1393 		struct componentname *a_cnp;
1394 	} */ *ap = v;
1395 	int error;
1396 	struct union_node *dun = VTOUNION(ap->a_dvp);
1397 	struct union_node *un = VTOUNION(ap->a_vp);
1398 	struct componentname *cnp = ap->a_cnp;
1399 
1400 	if (dun->un_uppervp == NULLVP)
1401 		panic("union rmdir: null upper vnode");
1402 
1403 	error = union_check_rmdir(un, cnp->cn_cred);
1404 	if (error) {
1405 		vput(ap->a_dvp);
1406 		vput(ap->a_vp);
1407 		return error;
1408 	}
1409 
1410 	if (un->un_uppervp != NULLVP) {
1411 		struct vnode *dvp = dun->un_uppervp;
1412 		struct vnode *vp = un->un_uppervp;
1413 
1414 		vref(dvp);
1415 		dun->un_flags |= UN_KLOCK;
1416 		vput(ap->a_dvp);
1417 		vref(vp);
1418 		un->un_flags |= UN_KLOCK;
1419 		vput(ap->a_vp);
1420 
1421 		if (union_dowhiteout(un, cnp->cn_cred))
1422 			cnp->cn_flags |= DOWHITEOUT;
1423 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1424 		if (!error)
1425 			union_removed_upper(un);
1426 	} else {
1427 		error = union_mkwhiteout(
1428 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1429 			dun->un_uppervp, ap->a_cnp, un);
1430 		vput(ap->a_dvp);
1431 		vput(ap->a_vp);
1432 	}
1433 
1434 	return (error);
1435 }
1436 
1437 int
1438 union_symlink(void *v)
1439 {
1440 	struct vop_symlink_args /* {
1441 		struct vnode *a_dvp;
1442 		struct vnode **a_vpp;
1443 		struct componentname *a_cnp;
1444 		struct vattr *a_vap;
1445 		char *a_target;
1446 	} */ *ap = v;
1447 	struct union_node *un = VTOUNION(ap->a_dvp);
1448 	struct vnode *dvp = un->un_uppervp;
1449 	struct componentname *cnp = ap->a_cnp;
1450 
1451 	if (dvp != NULLVP) {
1452 		int error;
1453 
1454 		vref(dvp);
1455 		un->un_flags |= UN_KLOCK;
1456 		vput(ap->a_dvp);
1457 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1458 				    ap->a_target);
1459 		return (error);
1460 	}
1461 
1462 	vput(ap->a_dvp);
1463 	return (EROFS);
1464 }
1465 
1466 /*
1467  * union_readdir works in concert with getdirentries and
1468  * readdir(3) to provide a list of entries in the unioned
1469  * directories.  getdirentries is responsible for walking
1470  * down the union stack.  readdir(3) is responsible for
1471  * eliminating duplicate names from the returned data stream.
1472  */
1473 int
1474 union_readdir(void *v)
1475 {
1476 	struct vop_readdir_args /* {
1477 		struct vnodeop_desc *a_desc;
1478 		struct vnode *a_vp;
1479 		struct uio *a_uio;
1480 		kauth_cred_t a_cred;
1481 		int *a_eofflag;
1482 		u_long *a_cookies;
1483 		int a_ncookies;
1484 	} */ *ap = v;
1485 	struct union_node *un = VTOUNION(ap->a_vp);
1486 	struct vnode *uvp = un->un_uppervp;
1487 
1488 	if (uvp == NULLVP)
1489 		return (0);
1490 
1491 	ap->a_vp = uvp;
1492 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1493 }
1494 
1495 int
1496 union_readlink(void *v)
1497 {
1498 	struct vop_readlink_args /* {
1499 		struct vnode *a_vp;
1500 		struct uio *a_uio;
1501 		kauth_cred_t a_cred;
1502 	} */ *ap = v;
1503 	int error;
1504 	struct vnode *vp = OTHERVP(ap->a_vp);
1505 	int dolock = (vp == LOWERVP(ap->a_vp));
1506 
1507 	if (dolock)
1508 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1509 	ap->a_vp = vp;
1510 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1511 	if (dolock)
1512 		VOP_UNLOCK(vp);
1513 
1514 	return (error);
1515 }
1516 
1517 int
1518 union_abortop(void *v)
1519 {
1520 	struct vop_abortop_args /* {
1521 		struct vnode *a_dvp;
1522 		struct componentname *a_cnp;
1523 	} */ *ap = v;
1524 
1525 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1526 
1527 	ap->a_dvp = UPPERVP(ap->a_dvp);
1528 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1529 }
1530 
1531 int
1532 union_inactive(void *v)
1533 {
1534 	struct vop_inactive_args /* {
1535 		const struct vnodeop_desc *a_desc;
1536 		struct vnode *a_vp;
1537 		bool *a_recycle;
1538 	} */ *ap = v;
1539 	struct vnode *vp = ap->a_vp;
1540 	struct union_node *un = VTOUNION(vp);
1541 	struct vnode **vpp;
1542 
1543 	/*
1544 	 * Do nothing (and _don't_ bypass).
1545 	 * Wait to vrele lowervp until reclaim,
1546 	 * so that until then our union_node is in the
1547 	 * cache and reusable.
1548 	 *
1549 	 * NEEDSWORK: Someday, consider inactive'ing
1550 	 * the lowervp and then trying to reactivate it
1551 	 * with capabilities (v_id)
1552 	 * like they do in the name lookup cache code.
1553 	 * That's too much work for now.
1554 	 */
1555 
1556 	if (un->un_dircache != 0) {
1557 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1558 			vrele(*vpp);
1559 		free(un->un_dircache, M_TEMP);
1560 		un->un_dircache = 0;
1561 	}
1562 
1563 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1564 	VOP_UNLOCK(vp);
1565 
1566 	return (0);
1567 }
1568 
1569 int
1570 union_reclaim(void *v)
1571 {
1572 	struct vop_reclaim_args /* {
1573 		struct vnode *a_vp;
1574 	} */ *ap = v;
1575 
1576 	union_freevp(ap->a_vp);
1577 
1578 	return (0);
1579 }
1580 
1581 int
1582 union_lock(void *v)
1583 {
1584 	struct vop_lock_args /* {
1585 		struct vnode *a_vp;
1586 		int a_flags;
1587 	} */ *ap = v;
1588 	struct vnode *vp;
1589 	struct union_node *un;
1590 	int error;
1591 
1592 	un = VTOUNION(ap->a_vp);
1593 	mutex_enter(&un->un_lock);
1594 	for (;;) {
1595 		vp = LOCKVP(ap->a_vp);
1596 		mutex_exit(&un->un_lock);
1597 		if (vp == ap->a_vp)
1598 			error = genfs_lock(ap);
1599 		else
1600 			error = VOP_LOCK(vp, ap->a_flags);
1601 		if (error != 0)
1602 			return error;
1603 		mutex_enter(&un->un_lock);
1604 		if (vp == LOCKVP(ap->a_vp))
1605 			break;
1606 		if (vp == ap->a_vp)
1607 			genfs_unlock(ap);
1608 		else
1609 			VOP_UNLOCK(vp);
1610 	}
1611 	KASSERT((un->un_flags & UN_KLOCK) == 0);
1612 	mutex_exit(&un->un_lock);
1613 
1614 	return error;
1615 }
1616 
1617 /*
1618  * When operations want to vput() a union node yet retain a lock on
1619  * the upper vnode (say, to do some further operations like link(),
1620  * mkdir(), ...), they set UN_KLOCK on the union node, then call
1621  * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1622  * unlocks the union node (leaving the upper vnode alone), clears the
1623  * KLOCK flag, and then returns to vput().  The caller then does whatever
1624  * is left to do with the upper vnode, and ensures that it gets unlocked.
1625  *
1626  * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1627  */
1628 int
1629 union_unlock(void *v)
1630 {
1631 	struct vop_unlock_args /* {
1632 		struct vnode *a_vp;
1633 		int a_flags;
1634 	} */ *ap = v;
1635 	struct vnode *vp;
1636 	struct union_node *un;
1637 
1638 	un = VTOUNION(ap->a_vp);
1639 	vp = LOCKVP(ap->a_vp);
1640 	if ((un->un_flags & UN_KLOCK) == UN_KLOCK) {
1641 		KASSERT(vp != ap->a_vp);
1642 		un->un_flags &= ~UN_KLOCK;
1643 		return 0;
1644 	}
1645 	if (vp == ap->a_vp)
1646 		genfs_unlock(ap);
1647 	else
1648 		VOP_UNLOCK(vp);
1649 
1650 	return 0;
1651 }
1652 
1653 int
1654 union_bmap(void *v)
1655 {
1656 	struct vop_bmap_args /* {
1657 		struct vnode *a_vp;
1658 		daddr_t  a_bn;
1659 		struct vnode **a_vpp;
1660 		daddr_t *a_bnp;
1661 		int *a_runp;
1662 	} */ *ap = v;
1663 	int error;
1664 	struct vnode *vp = OTHERVP(ap->a_vp);
1665 	int dolock = (vp == LOWERVP(ap->a_vp));
1666 
1667 	if (dolock)
1668 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1669 	ap->a_vp = vp;
1670 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1671 	if (dolock)
1672 		VOP_UNLOCK(vp);
1673 
1674 	return (error);
1675 }
1676 
1677 int
1678 union_print(void *v)
1679 {
1680 	struct vop_print_args /* {
1681 		struct vnode *a_vp;
1682 	} */ *ap = v;
1683 	struct vnode *vp = ap->a_vp;
1684 
1685 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1686 			vp, UPPERVP(vp), LOWERVP(vp));
1687 	if (UPPERVP(vp) != NULLVP)
1688 		vprint("union: upper", UPPERVP(vp));
1689 	if (LOWERVP(vp) != NULLVP)
1690 		vprint("union: lower", LOWERVP(vp));
1691 	if (VTOUNION(vp)->un_dircache) {
1692 		struct vnode **vpp;
1693 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1694 			vprint("dircache:", *vpp);
1695 	}
1696 
1697 	return (0);
1698 }
1699 
1700 int
1701 union_islocked(void *v)
1702 {
1703 	struct vop_islocked_args /* {
1704 		struct vnode *a_vp;
1705 	} */ *ap = v;
1706 	struct vnode *vp;
1707 	struct union_node *un;
1708 
1709 	un = VTOUNION(ap->a_vp);
1710 	mutex_enter(&un->un_lock);
1711 	vp = LOCKVP(ap->a_vp);
1712 	mutex_exit(&un->un_lock);
1713 
1714 	if (vp == ap->a_vp)
1715 		return genfs_islocked(ap);
1716 	else
1717 		return VOP_ISLOCKED(vp);
1718 }
1719 
1720 int
1721 union_pathconf(void *v)
1722 {
1723 	struct vop_pathconf_args /* {
1724 		struct vnode *a_vp;
1725 		int a_name;
1726 		int *a_retval;
1727 	} */ *ap = v;
1728 	int error;
1729 	struct vnode *vp = OTHERVP(ap->a_vp);
1730 	int dolock = (vp == LOWERVP(ap->a_vp));
1731 
1732 	if (dolock)
1733 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1734 	ap->a_vp = vp;
1735 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1736 	if (dolock)
1737 		VOP_UNLOCK(vp);
1738 
1739 	return (error);
1740 }
1741 
1742 int
1743 union_advlock(void *v)
1744 {
1745 	struct vop_advlock_args /* {
1746 		struct vnode *a_vp;
1747 		void *a_id;
1748 		int  a_op;
1749 		struct flock *a_fl;
1750 		int  a_flags;
1751 	} */ *ap = v;
1752 	struct vnode *ovp = OTHERVP(ap->a_vp);
1753 
1754 	ap->a_vp = ovp;
1755 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1756 }
1757 
1758 int
1759 union_strategy(void *v)
1760 {
1761 	struct vop_strategy_args /* {
1762 		struct vnode *a_vp;
1763 		struct buf *a_bp;
1764 	} */ *ap = v;
1765 	struct vnode *ovp = OTHERVP(ap->a_vp);
1766 	struct buf *bp = ap->a_bp;
1767 
1768 	KASSERT(ovp != NULLVP);
1769 	if (!NODE_IS_SPECIAL(ovp))
1770 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1771 
1772 	return (VOP_STRATEGY(ovp, bp));
1773 }
1774 
1775 int
1776 union_bwrite(void *v)
1777 {
1778 	struct vop_bwrite_args /* {
1779 		struct vnode *a_vp;
1780 		struct buf *a_bp;
1781 	} */ *ap = v;
1782 	struct vnode *ovp = OTHERVP(ap->a_vp);
1783 	struct buf *bp = ap->a_bp;
1784 
1785 	KASSERT(ovp != NULLVP);
1786 	if (!NODE_IS_SPECIAL(ovp))
1787 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1788 
1789 	return (VOP_BWRITE(ovp, bp));
1790 }
1791 
1792 int
1793 union_getpages(void *v)
1794 {
1795 	struct vop_getpages_args /* {
1796 		struct vnode *a_vp;
1797 		voff_t a_offset;
1798 		struct vm_page **a_m;
1799 		int *a_count;
1800 		int a_centeridx;
1801 		vm_prot_t a_access_type;
1802 		int a_advice;
1803 		int a_flags;
1804 	} */ *ap = v;
1805 	struct vnode *vp = ap->a_vp;
1806 
1807 	KASSERT(mutex_owned(vp->v_interlock));
1808 
1809 	if (ap->a_flags & PGO_LOCKED) {
1810 		return EBUSY;
1811 	}
1812 	ap->a_vp = OTHERVP(vp);
1813 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1814 
1815 	/* Just pass the request on to the underlying layer. */
1816 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1817 }
1818 
1819 int
1820 union_putpages(void *v)
1821 {
1822 	struct vop_putpages_args /* {
1823 		struct vnode *a_vp;
1824 		voff_t a_offlo;
1825 		voff_t a_offhi;
1826 		int a_flags;
1827 	} */ *ap = v;
1828 	struct vnode *vp = ap->a_vp;
1829 
1830 	KASSERT(mutex_owned(vp->v_interlock));
1831 
1832 	ap->a_vp = OTHERVP(vp);
1833 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1834 
1835 	if (ap->a_flags & PGO_RECLAIM) {
1836 		mutex_exit(vp->v_interlock);
1837 		return 0;
1838 	}
1839 
1840 	/* Just pass the request on to the underlying layer. */
1841 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1842 }
1843 
1844 int
1845 union_kqfilter(void *v)
1846 {
1847 	struct vop_kqfilter_args /* {
1848 		struct vnode	*a_vp;
1849 		struct knote	*a_kn;
1850 	} */ *ap = v;
1851 	int error;
1852 
1853 	/*
1854 	 * We watch either the upper layer file (if it already exists),
1855 	 * or the lower layer one. If there is lower layer file only
1856 	 * at this moment, we will keep watching that lower layer file
1857 	 * even if upper layer file would be created later on.
1858 	 */
1859 	if (UPPERVP(ap->a_vp))
1860 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1861 	else if (LOWERVP(ap->a_vp))
1862 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1863 	else {
1864 		/* panic? */
1865 		error = EOPNOTSUPP;
1866 	}
1867 
1868 	return (error);
1869 }
1870