xref: /netbsd-src/sys/fs/union/union_vnops.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: union_vnops.c,v 1.70 2017/05/26 14:21:01 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.70 2017/05/26 14:21:01 riastradh Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95 
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136 
137 static int union_lookup1(struct vnode *, struct vnode **,
138 			      struct vnode **, struct componentname *);
139 
140 
141 /*
142  * Global vfs data structures
143  */
144 int (**union_vnodeop_p)(void *);
145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146 	{ &vop_default_desc, vn_default_error },
147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
148 	{ &vop_create_desc, union_create },		/* create */
149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
151 	{ &vop_open_desc, union_open },			/* open */
152 	{ &vop_close_desc, union_close },		/* close */
153 	{ &vop_access_desc, union_access },		/* access */
154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
156 	{ &vop_read_desc, union_read },			/* read */
157 	{ &vop_write_desc, union_write },		/* write */
158 	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
159 	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
160 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
161 	{ &vop_poll_desc, union_poll },			/* select */
162 	{ &vop_revoke_desc, union_revoke },		/* revoke */
163 	{ &vop_mmap_desc, union_mmap },			/* mmap */
164 	{ &vop_fsync_desc, union_fsync },		/* fsync */
165 	{ &vop_seek_desc, union_seek },			/* seek */
166 	{ &vop_remove_desc, union_remove },		/* remove */
167 	{ &vop_link_desc, union_link },			/* link */
168 	{ &vop_rename_desc, union_rename },		/* rename */
169 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
170 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
171 	{ &vop_symlink_desc, union_symlink },		/* symlink */
172 	{ &vop_readdir_desc, union_readdir },		/* readdir */
173 	{ &vop_readlink_desc, union_readlink },		/* readlink */
174 	{ &vop_abortop_desc, union_abortop },		/* abortop */
175 	{ &vop_inactive_desc, union_inactive },		/* inactive */
176 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
177 	{ &vop_lock_desc, union_lock },			/* lock */
178 	{ &vop_unlock_desc, union_unlock },		/* unlock */
179 	{ &vop_bmap_desc, union_bmap },			/* bmap */
180 	{ &vop_strategy_desc, union_strategy },		/* strategy */
181 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
182 	{ &vop_print_desc, union_print },		/* print */
183 	{ &vop_islocked_desc, union_islocked },		/* islocked */
184 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
185 	{ &vop_advlock_desc, union_advlock },		/* advlock */
186 	{ &vop_getpages_desc, union_getpages },		/* getpages */
187 	{ &vop_putpages_desc, union_putpages },		/* putpages */
188 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
189 	{ NULL, NULL }
190 };
191 const struct vnodeopv_desc union_vnodeop_opv_desc =
192 	{ &union_vnodeop_p, union_vnodeop_entries };
193 
194 #define NODE_IS_SPECIAL(vp) \
195 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
196 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
197 
198 static int
199 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
200 	struct componentname *cnp)
201 {
202 	int error;
203 	struct vnode *tdvp;
204 	struct vnode *dvp;
205 	struct mount *mp;
206 
207 	dvp = *dvpp;
208 
209 	/*
210 	 * If stepping up the directory tree, check for going
211 	 * back across the mount point, in which case do what
212 	 * lookup would do by stepping back down the mount
213 	 * hierarchy.
214 	 */
215 	if (cnp->cn_flags & ISDOTDOT) {
216 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
217 			/*
218 			 * Don't do the NOCROSSMOUNT check
219 			 * at this level.  By definition,
220 			 * union fs deals with namespaces, not
221 			 * filesystems.
222 			 */
223 			tdvp = dvp;
224 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
225 			VOP_UNLOCK(tdvp);
226 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
227 		}
228 	}
229 
230         error = VOP_LOOKUP(dvp, &tdvp, cnp);
231 	if (error)
232 		return (error);
233 	if (dvp != tdvp) {
234 		if (cnp->cn_flags & ISDOTDOT)
235 			VOP_UNLOCK(dvp);
236 		error = vn_lock(tdvp, LK_EXCLUSIVE);
237 		if (cnp->cn_flags & ISDOTDOT)
238 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239 		if (error) {
240 			vrele(tdvp);
241 			return error;
242 		}
243 		dvp = tdvp;
244 	}
245 
246 	/*
247 	 * Lastly check if the current node is a mount point in
248 	 * which case walk up the mount hierarchy making sure not to
249 	 * bump into the root of the mount tree (ie. dvp != udvp).
250 	 */
251 	while (dvp != udvp && (dvp->v_type == VDIR) &&
252 	       (mp = dvp->v_mountedhere)) {
253 		if (vfs_busy(mp))
254 			continue;
255 		vput(dvp);
256 		error = VFS_ROOT(mp, &tdvp);
257 		vfs_unbusy(mp);
258 		if (error) {
259 			return (error);
260 		}
261 		dvp = tdvp;
262 	}
263 
264 	*vpp = dvp;
265 	return (0);
266 }
267 
268 int
269 union_lookup(void *v)
270 {
271 	struct vop_lookup_v2_args /* {
272 		struct vnodeop_desc *a_desc;
273 		struct vnode *a_dvp;
274 		struct vnode **a_vpp;
275 		struct componentname *a_cnp;
276 	} */ *ap = v;
277 	int error;
278 	int uerror, lerror;
279 	struct vnode *uppervp, *lowervp;
280 	struct vnode *upperdvp, *lowerdvp;
281 	struct vnode *dvp = ap->a_dvp;
282 	struct union_node *dun = VTOUNION(dvp);
283 	struct componentname *cnp = ap->a_cnp;
284 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
285 	kauth_cred_t saved_cred = NULL;
286 	int iswhiteout;
287 	struct vattr va;
288 
289 #ifdef notyet
290 	if (cnp->cn_namelen == 3 &&
291 			cnp->cn_nameptr[2] == '.' &&
292 			cnp->cn_nameptr[1] == '.' &&
293 			cnp->cn_nameptr[0] == '.') {
294 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
295 		if (dvp == NULLVP)
296 			return (ENOENT);
297 		vref(dvp);
298 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
299 		return (0);
300 	}
301 #endif
302 
303 	if ((cnp->cn_flags & ISLASTCN) &&
304 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
305 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
306 		return (EROFS);
307 
308 start:
309 	upperdvp = dun->un_uppervp;
310 	lowerdvp = dun->un_lowervp;
311 	uppervp = NULLVP;
312 	lowervp = NULLVP;
313 	iswhiteout = 0;
314 
315 	/*
316 	 * do the lookup in the upper level.
317 	 * if that level comsumes additional pathnames,
318 	 * then assume that something special is going
319 	 * on and just return that vnode.
320 	 */
321 	if (upperdvp != NULLVP) {
322 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
323 					&uppervp, cnp);
324 		if (cnp->cn_consume != 0) {
325 			if (uppervp != upperdvp)
326 				VOP_UNLOCK(uppervp);
327 			*ap->a_vpp = uppervp;
328 			return (uerror);
329 		}
330 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
331 			if (cnp->cn_flags & ISWHITEOUT) {
332 				iswhiteout = 1;
333 			} else if (lowerdvp != NULLVP) {
334 				lerror = VOP_GETATTR(upperdvp, &va,
335 					cnp->cn_cred);
336 				if (lerror == 0 && (va.va_flags & OPAQUE))
337 					iswhiteout = 1;
338 			}
339 		}
340 	} else {
341 		uerror = ENOENT;
342 	}
343 
344 	/*
345 	 * in a similar way to the upper layer, do the lookup
346 	 * in the lower layer.   this time, if there is some
347 	 * component magic going on, then vput whatever we got
348 	 * back from the upper layer and return the lower vnode
349 	 * instead.
350 	 */
351 	if (lowerdvp != NULLVP && !iswhiteout) {
352 		int nameiop;
353 
354 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
355 
356 		/*
357 		 * Only do a LOOKUP on the bottom node, since
358 		 * we won't be making changes to it anyway.
359 		 */
360 		nameiop = cnp->cn_nameiop;
361 		cnp->cn_nameiop = LOOKUP;
362 		if (um->um_op == UNMNT_BELOW) {
363 			saved_cred = cnp->cn_cred;
364 			cnp->cn_cred = um->um_cred;
365 		}
366 
367 		/*
368 		 * we shouldn't have to worry about locking interactions
369 		 * between the lower layer and our union layer (w.r.t.
370 		 * `..' processing) because we don't futz with lowervp
371 		 * locks in the union-node instantiation code path.
372 		 */
373 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
374 				&lowervp, cnp);
375 		if (um->um_op == UNMNT_BELOW)
376 			cnp->cn_cred = saved_cred;
377 		cnp->cn_nameiop = nameiop;
378 
379 		if (lowervp != lowerdvp)
380 			VOP_UNLOCK(lowerdvp);
381 
382 		if (cnp->cn_consume != 0) {
383 			if (uppervp != NULLVP) {
384 				if (uppervp == upperdvp)
385 					vrele(uppervp);
386 				else
387 					vput(uppervp);
388 				uppervp = NULLVP;
389 			}
390 			*ap->a_vpp = lowervp;
391 			return (lerror);
392 		}
393 	} else {
394 		lerror = ENOENT;
395 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
396 			lowervp = LOWERVP(dun->un_pvp);
397 			if (lowervp != NULLVP) {
398 				vref(lowervp);
399 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
400 				lerror = 0;
401 			}
402 		}
403 	}
404 
405 	/*
406 	 * EJUSTRETURN is used by underlying filesystems to indicate that
407 	 * a directory modification op was started successfully.
408 	 * This will only happen in the upper layer, since
409 	 * the lower layer only does LOOKUPs.
410 	 * If this union is mounted read-only, bounce it now.
411 	 */
412 
413 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
414 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
415 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
416 		uerror = EROFS;
417 
418 	/*
419 	 * at this point, we have uerror and lerror indicating
420 	 * possible errors with the lookups in the upper and lower
421 	 * layers.  additionally, uppervp and lowervp are (locked)
422 	 * references to existing vnodes in the upper and lower layers.
423 	 *
424 	 * there are now three cases to consider.
425 	 * 1. if both layers returned an error, then return whatever
426 	 *    error the upper layer generated.
427 	 *
428 	 * 2. if the top layer failed and the bottom layer succeeded
429 	 *    then two subcases occur.
430 	 *    a.  the bottom vnode is not a directory, in which
431 	 *	  case just return a new union vnode referencing
432 	 *	  an empty top layer and the existing bottom layer.
433 	 *    b.  the bottom vnode is a directory, in which case
434 	 *	  create a new directory in the top-level and
435 	 *	  continue as in case 3.
436 	 *
437 	 * 3. if the top layer succeeded then return a new union
438 	 *    vnode referencing whatever the new top layer and
439 	 *    whatever the bottom layer returned.
440 	 */
441 
442 	*ap->a_vpp = NULLVP;
443 
444 
445 	/* case 1. */
446 	if ((uerror != 0) && (lerror != 0)) {
447 		return (uerror);
448 	}
449 
450 	/* case 2. */
451 	if (uerror != 0 /* && (lerror == 0) */ ) {
452 		if (lowervp->v_type == VDIR) { /* case 2b. */
453 			/*
454 			 * We may be racing another process to make the
455 			 * upper-level shadow directory.  Be careful with
456 			 * locks/etc!
457 			 * If we have to create a shadow directory and want
458 			 * to commit the node we have to restart the lookup
459 			 * to get the componentname right.
460 			 */
461 			if (upperdvp) {
462 				VOP_UNLOCK(upperdvp);
463 				uerror = union_mkshadow(um, upperdvp, cnp,
464 				    &uppervp);
465 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
466 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
467 					vrele(uppervp);
468 					if (lowervp != NULLVP)
469 						vput(lowervp);
470 					goto start;
471 				}
472 			}
473 			if (uerror) {
474 				if (lowervp != NULLVP) {
475 					vput(lowervp);
476 					lowervp = NULLVP;
477 				}
478 				return (uerror);
479 			}
480 		}
481 	} else { /* uerror == 0 */
482 		if (uppervp != upperdvp)
483 			VOP_UNLOCK(uppervp);
484 	}
485 
486 	if (lowervp != NULLVP)
487 		VOP_UNLOCK(lowervp);
488 
489 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
490 			      uppervp, lowervp, 1);
491 
492 	if (error) {
493 		if (uppervp != NULLVP)
494 			vrele(uppervp);
495 		if (lowervp != NULLVP)
496 			vrele(lowervp);
497 		return error;
498 	}
499 
500 	return 0;
501 }
502 
503 int
504 union_create(void *v)
505 {
506 	struct vop_create_v3_args /* {
507 		struct vnode *a_dvp;
508 		struct vnode **a_vpp;
509 		struct componentname *a_cnp;
510 		struct vattr *a_vap;
511 	} */ *ap = v;
512 	struct union_node *un = VTOUNION(ap->a_dvp);
513 	struct vnode *dvp = un->un_uppervp;
514 	struct componentname *cnp = ap->a_cnp;
515 
516 	if (dvp != NULLVP) {
517 		int error;
518 		struct vnode *vp;
519 		struct mount *mp;
520 
521 		mp = ap->a_dvp->v_mount;
522 
523 		vp = NULL;
524 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
525 		if (error)
526 			return (error);
527 
528 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
529 				NULLVP, 1);
530 		if (error)
531 			vrele(vp);
532 		return (error);
533 	}
534 
535 	return (EROFS);
536 }
537 
538 int
539 union_whiteout(void *v)
540 {
541 	struct vop_whiteout_args /* {
542 		struct vnode *a_dvp;
543 		struct componentname *a_cnp;
544 		int a_flags;
545 	} */ *ap = v;
546 	struct union_node *un = VTOUNION(ap->a_dvp);
547 	struct componentname *cnp = ap->a_cnp;
548 
549 	if (un->un_uppervp == NULLVP)
550 		return (EOPNOTSUPP);
551 
552 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
553 }
554 
555 int
556 union_mknod(void *v)
557 {
558 	struct vop_mknod_v3_args /* {
559 		struct vnode *a_dvp;
560 		struct vnode **a_vpp;
561 		struct componentname *a_cnp;
562 		struct vattr *a_vap;
563 	} */ *ap = v;
564 	struct union_node *un = VTOUNION(ap->a_dvp);
565 	struct vnode *dvp = un->un_uppervp;
566 	struct componentname *cnp = ap->a_cnp;
567 
568 	if (dvp != NULLVP) {
569 		int error;
570 		struct vnode *vp;
571 		struct mount *mp;
572 
573 		mp = ap->a_dvp->v_mount;
574 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
575 		if (error)
576 			return (error);
577 
578 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
579 				      cnp, vp, NULLVP, 1);
580 		if (error)
581 			vrele(vp);
582 		return (error);
583 	}
584 
585 	return (EROFS);
586 }
587 
588 int
589 union_open(void *v)
590 {
591 	struct vop_open_args /* {
592 		struct vnodeop_desc *a_desc;
593 		struct vnode *a_vp;
594 		int a_mode;
595 		kauth_cred_t a_cred;
596 	} */ *ap = v;
597 	struct union_node *un = VTOUNION(ap->a_vp);
598 	struct vnode *tvp;
599 	int mode = ap->a_mode;
600 	kauth_cred_t cred = ap->a_cred;
601 	struct lwp *l = curlwp;
602 	int error;
603 
604 	/*
605 	 * If there is an existing upper vp then simply open that.
606 	 */
607 	tvp = un->un_uppervp;
608 	if (tvp == NULLVP) {
609 		/*
610 		 * If the lower vnode is being opened for writing, then
611 		 * copy the file contents to the upper vnode and open that,
612 		 * otherwise can simply open the lower vnode.
613 		 */
614 		tvp = un->un_lowervp;
615 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
616 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
617 			if (error == 0)
618 				error = VOP_OPEN(un->un_uppervp, mode, cred);
619 			if (error == 0) {
620 				mutex_enter(un->un_uppervp->v_interlock);
621 				un->un_uppervp->v_writecount++;
622 				mutex_exit(un->un_uppervp->v_interlock);
623 			}
624 			return (error);
625 		}
626 
627 		/*
628 		 * Just open the lower vnode, but check for nodev mount flag
629 		 */
630 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
631 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
632 			return ENXIO;
633 		un->un_openl++;
634 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
635 		error = VOP_OPEN(tvp, mode, cred);
636 		VOP_UNLOCK(tvp);
637 
638 		return (error);
639 	}
640 	/*
641 	 * Just open the upper vnode, checking for nodev mount flag first
642 	 */
643 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
644 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
645 		return ENXIO;
646 
647 	error = VOP_OPEN(tvp, mode, cred);
648 	if (error == 0 && (ap->a_mode & FWRITE)) {
649 		mutex_enter(tvp->v_interlock);
650 		tvp->v_writecount++;
651 		mutex_exit(tvp->v_interlock);
652 	}
653 
654 	return (error);
655 }
656 
657 int
658 union_close(void *v)
659 {
660 	struct vop_close_args /* {
661 		struct vnode *a_vp;
662 		int  a_fflag;
663 		kauth_cred_t a_cred;
664 	} */ *ap = v;
665 	struct union_node *un = VTOUNION(ap->a_vp);
666 	struct vnode *vp;
667 	int error;
668 	bool do_lock;
669 
670 	vp = un->un_uppervp;
671 	if (vp != NULLVP) {
672 		do_lock = false;
673 	} else {
674 		KASSERT(un->un_openl > 0);
675 		--un->un_openl;
676 		vp = un->un_lowervp;
677 		do_lock = true;
678 	}
679 
680 	KASSERT(vp != NULLVP);
681 	ap->a_vp = vp;
682 	if ((ap->a_fflag & FWRITE)) {
683 		KASSERT(vp == un->un_uppervp);
684 		mutex_enter(vp->v_interlock);
685 		vp->v_writecount--;
686 		mutex_exit(vp->v_interlock);
687 	}
688 	if (do_lock)
689 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
690 	error = VCALL(vp, VOFFSET(vop_close), ap);
691 	if (do_lock)
692 		VOP_UNLOCK(vp);
693 
694 	return error;
695 }
696 
697 /*
698  * Check access permission on the union vnode.
699  * The access check being enforced is to check
700  * against both the underlying vnode, and any
701  * copied vnode.  This ensures that no additional
702  * file permissions are given away simply because
703  * the user caused an implicit file copy.
704  */
705 int
706 union_access(void *v)
707 {
708 	struct vop_access_args /* {
709 		struct vnodeop_desc *a_desc;
710 		struct vnode *a_vp;
711 		int a_mode;
712 		kauth_cred_t a_cred;
713 	} */ *ap = v;
714 	struct vnode *vp = ap->a_vp;
715 	struct union_node *un = VTOUNION(vp);
716 	int error = EACCES;
717 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
718 
719 	/*
720 	 * Disallow write attempts on read-only file systems;
721 	 * unless the file is a socket, fifo, or a block or
722 	 * character device resident on the file system.
723 	 */
724 	if (ap->a_mode & VWRITE) {
725 		switch (vp->v_type) {
726 		case VDIR:
727 		case VLNK:
728 		case VREG:
729 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
730 				return (EROFS);
731 			break;
732 		case VBAD:
733 		case VBLK:
734 		case VCHR:
735 		case VSOCK:
736 		case VFIFO:
737 		case VNON:
738 		default:
739 			break;
740 		}
741 	}
742 
743 
744 	if ((vp = un->un_uppervp) != NULLVP) {
745 		ap->a_vp = vp;
746 		return (VCALL(vp, VOFFSET(vop_access), ap));
747 	}
748 
749 	if ((vp = un->un_lowervp) != NULLVP) {
750 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
751 		ap->a_vp = vp;
752 		error = VCALL(vp, VOFFSET(vop_access), ap);
753 		if (error == 0) {
754 			if (um->um_op == UNMNT_BELOW) {
755 				ap->a_cred = um->um_cred;
756 				error = VCALL(vp, VOFFSET(vop_access), ap);
757 			}
758 		}
759 		VOP_UNLOCK(vp);
760 		if (error)
761 			return (error);
762 	}
763 
764 	return (error);
765 }
766 
767 /*
768  * We handle getattr only to change the fsid and
769  * track object sizes
770  */
771 int
772 union_getattr(void *v)
773 {
774 	struct vop_getattr_args /* {
775 		struct vnode *a_vp;
776 		struct vattr *a_vap;
777 		kauth_cred_t a_cred;
778 	} */ *ap = v;
779 	int error;
780 	struct union_node *un = VTOUNION(ap->a_vp);
781 	struct vnode *vp = un->un_uppervp;
782 	struct vattr *vap;
783 	struct vattr va;
784 
785 
786 	/*
787 	 * Some programs walk the filesystem hierarchy by counting
788 	 * links to directories to avoid stat'ing all the time.
789 	 * This means the link count on directories needs to be "correct".
790 	 * The only way to do that is to call getattr on both layers
791 	 * and fix up the link count.  The link count will not necessarily
792 	 * be accurate but will be large enough to defeat the tree walkers.
793 	 *
794 	 * To make life more interesting, some filesystems don't keep
795 	 * track of link counts in the expected way, and return a
796 	 * link count of `1' for those directories; if either of the
797 	 * component directories returns a link count of `1', we return a 1.
798 	 */
799 
800 	vap = ap->a_vap;
801 
802 	vp = un->un_uppervp;
803 	if (vp != NULLVP) {
804 		error = VOP_GETATTR(vp, vap, ap->a_cred);
805 		if (error)
806 			return (error);
807 		mutex_enter(&un->un_lock);
808 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
809 	}
810 
811 	if (vp == NULLVP) {
812 		vp = un->un_lowervp;
813 	} else if (vp->v_type == VDIR) {
814 		vp = un->un_lowervp;
815 		if (vp != NULLVP)
816 			vap = &va;
817 	} else {
818 		vp = NULLVP;
819 	}
820 
821 	if (vp != NULLVP) {
822 		if (vp == un->un_lowervp)
823 			vn_lock(vp, LK_SHARED | LK_RETRY);
824 		error = VOP_GETATTR(vp, vap, ap->a_cred);
825 		if (vp == un->un_lowervp)
826 			VOP_UNLOCK(vp);
827 		if (error)
828 			return (error);
829 		mutex_enter(&un->un_lock);
830 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
831 	}
832 
833 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
834 		/*
835 		 * Link count manipulation:
836 		 *	- If both return "2", return 2 (no subdirs)
837 		 *	- If one or the other return "1", return "1" (ENOCLUE)
838 		 */
839 		if ((ap->a_vap->va_nlink == 2) &&
840 		    (vap->va_nlink == 2))
841 			;
842 		else if (ap->a_vap->va_nlink != 1) {
843 			if (vap->va_nlink == 1)
844 				ap->a_vap->va_nlink = 1;
845 			else
846 				ap->a_vap->va_nlink += vap->va_nlink;
847 		}
848 	}
849 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
850 	return (0);
851 }
852 
853 int
854 union_setattr(void *v)
855 {
856 	struct vop_setattr_args /* {
857 		struct vnode *a_vp;
858 		struct vattr *a_vap;
859 		kauth_cred_t a_cred;
860 	} */ *ap = v;
861 	struct vattr *vap = ap->a_vap;
862 	struct vnode *vp = ap->a_vp;
863 	struct union_node *un = VTOUNION(vp);
864 	bool size_only;		/* All but va_size are VNOVAL. */
865 	int error;
866 
867 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
868 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
869 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
870 
871 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
872 		return (EROFS);
873 	if (vap->va_size != VNOVAL) {
874  		switch (vp->v_type) {
875  		case VDIR:
876  			return (EISDIR);
877  		case VCHR:
878  		case VBLK:
879  		case VSOCK:
880  		case VFIFO:
881 			break;
882 		case VREG:
883 		case VLNK:
884  		default:
885 			/*
886 			 * Disallow write attempts if the filesystem is
887 			 * mounted read-only.
888 			 */
889 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
890 				return (EROFS);
891 		}
892 	}
893 
894 	/*
895 	 * Handle case of truncating lower object to zero size,
896 	 * by creating a zero length upper object.  This is to
897 	 * handle the case of open with O_TRUNC and O_CREAT.
898 	 */
899 	if ((un->un_uppervp == NULLVP) &&
900 	    /* assert(un->un_lowervp != NULLVP) */
901 	    (un->un_lowervp->v_type == VREG)) {
902 		error = union_copyup(un, (vap->va_size != 0),
903 						ap->a_cred, curlwp);
904 		if (error)
905 			return (error);
906 	}
907 
908 	/*
909 	 * Try to set attributes in upper layer, ignore size change to zero
910 	 * for devices to handle O_TRUNC and return read-only filesystem error
911 	 * otherwise.
912 	 */
913 	if (un->un_uppervp != NULLVP) {
914 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
915 		if ((error == 0) && (vap->va_size != VNOVAL)) {
916 			mutex_enter(&un->un_lock);
917 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
918 		}
919 	} else {
920 		KASSERT(un->un_lowervp != NULLVP);
921 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
922 			if (size_only &&
923 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
924 				error = 0;
925 			else
926 				error = EROFS;
927 		} else {
928 			error = EROFS;
929 		}
930 	}
931 
932 	return (error);
933 }
934 
935 int
936 union_read(void *v)
937 {
938 	struct vop_read_args /* {
939 		struct vnode *a_vp;
940 		struct uio *a_uio;
941 		int  a_ioflag;
942 		kauth_cred_t a_cred;
943 	} */ *ap = v;
944 	int error;
945 	struct vnode *vp = OTHERVP(ap->a_vp);
946 	int dolock = (vp == LOWERVP(ap->a_vp));
947 
948 	if (dolock)
949 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
950 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
951 	if (dolock)
952 		VOP_UNLOCK(vp);
953 
954 	/*
955 	 * XXX
956 	 * perhaps the size of the underlying object has changed under
957 	 * our feet.  take advantage of the offset information present
958 	 * in the uio structure.
959 	 */
960 	if (error == 0) {
961 		struct union_node *un = VTOUNION(ap->a_vp);
962 		off_t cur = ap->a_uio->uio_offset;
963 		off_t usz = VNOVAL, lsz = VNOVAL;
964 
965 		mutex_enter(&un->un_lock);
966 		if (vp == un->un_uppervp) {
967 			if (cur > un->un_uppersz)
968 				usz = cur;
969 		} else {
970 			if (cur > un->un_lowersz)
971 				lsz = cur;
972 		}
973 
974 		if (usz != VNOVAL || lsz != VNOVAL)
975 			union_newsize(ap->a_vp, usz, lsz);
976 		else
977 			mutex_exit(&un->un_lock);
978 	}
979 
980 	return (error);
981 }
982 
983 int
984 union_write(void *v)
985 {
986 	struct vop_read_args /* {
987 		struct vnode *a_vp;
988 		struct uio *a_uio;
989 		int  a_ioflag;
990 		kauth_cred_t a_cred;
991 	} */ *ap = v;
992 	int error;
993 	struct vnode *vp;
994 	struct union_node *un = VTOUNION(ap->a_vp);
995 
996 	vp = UPPERVP(ap->a_vp);
997 	if (vp == NULLVP) {
998 		vp = LOWERVP(ap->a_vp);
999 		if (NODE_IS_SPECIAL(vp)) {
1000 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1001 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1002 			    ap->a_cred);
1003 			VOP_UNLOCK(vp);
1004 			return error;
1005 		}
1006 		panic("union: missing upper layer in write");
1007 	}
1008 
1009 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1010 
1011 	/*
1012 	 * the size of the underlying object may be changed by the
1013 	 * write.
1014 	 */
1015 	if (error == 0) {
1016 		off_t cur = ap->a_uio->uio_offset;
1017 
1018 		mutex_enter(&un->un_lock);
1019 		if (cur > un->un_uppersz)
1020 			union_newsize(ap->a_vp, cur, VNOVAL);
1021 		else
1022 			mutex_exit(&un->un_lock);
1023 	}
1024 
1025 	return (error);
1026 }
1027 
1028 int
1029 union_ioctl(void *v)
1030 {
1031 	struct vop_ioctl_args /* {
1032 		struct vnode *a_vp;
1033 		int  a_command;
1034 		void *a_data;
1035 		int  a_fflag;
1036 		kauth_cred_t a_cred;
1037 	} */ *ap = v;
1038 	struct vnode *ovp = OTHERVP(ap->a_vp);
1039 
1040 	ap->a_vp = ovp;
1041 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1042 }
1043 
1044 int
1045 union_poll(void *v)
1046 {
1047 	struct vop_poll_args /* {
1048 		struct vnode *a_vp;
1049 		int a_events;
1050 	} */ *ap = v;
1051 	struct vnode *ovp = OTHERVP(ap->a_vp);
1052 
1053 	ap->a_vp = ovp;
1054 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1055 }
1056 
1057 int
1058 union_revoke(void *v)
1059 {
1060 	struct vop_revoke_args /* {
1061 		struct vnode *a_vp;
1062 		int a_flags;
1063 		struct proc *a_p;
1064 	} */ *ap = v;
1065 	struct vnode *vp = ap->a_vp;
1066 
1067 	if (UPPERVP(vp))
1068 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1069 	if (LOWERVP(vp))
1070 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1071 	vgone(vp);	/* XXXAD?? */
1072 	return (0);
1073 }
1074 
1075 int
1076 union_mmap(void *v)
1077 {
1078 	struct vop_mmap_args /* {
1079 		struct vnode *a_vp;
1080 		vm_prot_t a_prot;
1081 		kauth_cred_t a_cred;
1082 	} */ *ap = v;
1083 	struct vnode *ovp = OTHERVP(ap->a_vp);
1084 
1085 	ap->a_vp = ovp;
1086 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1087 }
1088 
1089 int
1090 union_fsync(void *v)
1091 {
1092 	struct vop_fsync_args /* {
1093 		struct vnode *a_vp;
1094 		kauth_cred_t a_cred;
1095 		int  a_flags;
1096 		off_t offhi;
1097 		off_t offlo;
1098 	} */ *ap = v;
1099 	int error = 0;
1100 	struct vnode *targetvp;
1101 
1102 	/*
1103 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1104 	 * bother syncing the underlying vnodes, since (a) they'll be
1105 	 * fsync'ed when reclaimed and (b) we could deadlock if
1106 	 * they're locked; otherwise, pass it through to the
1107 	 * underlying layer.
1108 	 */
1109 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1110 		error = spec_fsync(v);
1111 		if (error)
1112 			return error;
1113 	}
1114 
1115 	if (ap->a_flags & FSYNC_RECLAIM)
1116 		return 0;
1117 
1118 	targetvp = OTHERVP(ap->a_vp);
1119 	if (targetvp != NULLVP) {
1120 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1121 
1122 		if (dolock)
1123 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1124 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1125 			    ap->a_offlo, ap->a_offhi);
1126 		if (dolock)
1127 			VOP_UNLOCK(targetvp);
1128 	}
1129 
1130 	return (error);
1131 }
1132 
1133 int
1134 union_seek(void *v)
1135 {
1136 	struct vop_seek_args /* {
1137 		struct vnode *a_vp;
1138 		off_t  a_oldoff;
1139 		off_t  a_newoff;
1140 		kauth_cred_t a_cred;
1141 	} */ *ap = v;
1142 	struct vnode *ovp = OTHERVP(ap->a_vp);
1143 
1144 	ap->a_vp = ovp;
1145 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1146 }
1147 
1148 int
1149 union_remove(void *v)
1150 {
1151 	struct vop_remove_v2_args /* {
1152 		struct vnode *a_dvp;
1153 		struct vnode *a_vp;
1154 		struct componentname *a_cnp;
1155 	} */ *ap = v;
1156 	int error;
1157 	struct union_node *dun = VTOUNION(ap->a_dvp);
1158 	struct union_node *un = VTOUNION(ap->a_vp);
1159 	struct componentname *cnp = ap->a_cnp;
1160 
1161 	if (dun->un_uppervp == NULLVP)
1162 		panic("union remove: null upper vnode");
1163 
1164 	if (un->un_uppervp != NULLVP) {
1165 		struct vnode *dvp = dun->un_uppervp;
1166 		struct vnode *vp = un->un_uppervp;
1167 
1168 		/* Account for VOP_REMOVE to vrele vp.  */
1169 		vref(vp);
1170 		if (union_dowhiteout(un, cnp->cn_cred))
1171 			cnp->cn_flags |= DOWHITEOUT;
1172 		error = VOP_REMOVE(dvp, vp, cnp);
1173 		if (!error)
1174 			union_removed_upper(un);
1175 		vrele(ap->a_vp);
1176 	} else {
1177 		error = union_mkwhiteout(
1178 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1179 			dun->un_uppervp, ap->a_cnp, un);
1180 		vput(ap->a_vp);
1181 	}
1182 
1183 	return (error);
1184 }
1185 
1186 int
1187 union_link(void *v)
1188 {
1189 	struct vop_link_v2_args /* {
1190 		struct vnode *a_dvp;
1191 		struct vnode *a_vp;
1192 		struct componentname *a_cnp;
1193 	} */ *ap = v;
1194 	int error = 0;
1195 	struct componentname *cnp = ap->a_cnp;
1196 	struct union_node *dun;
1197 	struct vnode *vp;
1198 	struct vnode *dvp;
1199 
1200 	dun = VTOUNION(ap->a_dvp);
1201 
1202 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1203 
1204 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1205 		vp = ap->a_vp;
1206 	} else {
1207 		struct union_node *un = VTOUNION(ap->a_vp);
1208 		if (un->un_uppervp == NULLVP) {
1209 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1210 
1211 			/*
1212 			 * Needs to be copied before we can link it.
1213 			 */
1214 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1215 			if (droplock)
1216 				VOP_UNLOCK(dun->un_uppervp);
1217 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1218 			if (droplock) {
1219 				vn_lock(dun->un_uppervp,
1220 				    LK_EXCLUSIVE | LK_RETRY);
1221 				/*
1222 				 * During copyup, we dropped the lock on the
1223 				 * dir and invalidated any saved namei lookup
1224 				 * state for the directory we'll be entering
1225 				 * the link in.  We need to re-run the lookup
1226 				 * in that directory to reset any state needed
1227 				 * for VOP_LINK.
1228 				 * Call relookup on the union-layer to reset
1229 				 * the state.
1230 				 */
1231 				vp  = NULLVP;
1232 				if (dun->un_uppervp == NULLVP)
1233 					 panic("union: null upperdvp?");
1234 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1235 				if (error) {
1236 					VOP_UNLOCK(ap->a_vp);
1237 					return EROFS;	/* ? */
1238 				}
1239 				if (vp != NULLVP) {
1240 					/*
1241 					 * The name we want to create has
1242 					 * mysteriously appeared (a race?)
1243 					 */
1244 					error = EEXIST;
1245 					VOP_UNLOCK(ap->a_vp);
1246 					vput(vp);
1247 					return (error);
1248 				}
1249 			}
1250 			VOP_UNLOCK(ap->a_vp);
1251 		}
1252 		vp = un->un_uppervp;
1253 	}
1254 
1255 	dvp = dun->un_uppervp;
1256 	if (dvp == NULLVP)
1257 		error = EROFS;
1258 
1259 	if (error)
1260 		return (error);
1261 
1262 	return VOP_LINK(dvp, vp, cnp);
1263 }
1264 
1265 int
1266 union_rename(void *v)
1267 {
1268 	struct vop_rename_args  /* {
1269 		struct vnode *a_fdvp;
1270 		struct vnode *a_fvp;
1271 		struct componentname *a_fcnp;
1272 		struct vnode *a_tdvp;
1273 		struct vnode *a_tvp;
1274 		struct componentname *a_tcnp;
1275 	} */ *ap = v;
1276 	int error;
1277 
1278 	struct vnode *fdvp = ap->a_fdvp;
1279 	struct vnode *fvp = ap->a_fvp;
1280 	struct vnode *tdvp = ap->a_tdvp;
1281 	struct vnode *tvp = ap->a_tvp;
1282 
1283 	/*
1284 	 * Account for VOP_RENAME to vrele all nodes.
1285 	 * Note: VOP_RENAME will unlock tdvp.
1286 	 */
1287 
1288 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1289 		struct union_node *un = VTOUNION(fdvp);
1290 		if (un->un_uppervp == NULLVP) {
1291 			/*
1292 			 * this should never happen in normal
1293 			 * operation but might if there was
1294 			 * a problem creating the top-level shadow
1295 			 * directory.
1296 			 */
1297 			error = EXDEV;
1298 			goto bad;
1299 		}
1300 
1301 		fdvp = un->un_uppervp;
1302 		vref(fdvp);
1303 	}
1304 
1305 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1306 		struct union_node *un = VTOUNION(fvp);
1307 		if (un->un_uppervp == NULLVP) {
1308 			/* XXX: should do a copyup */
1309 			error = EXDEV;
1310 			goto bad;
1311 		}
1312 
1313 		if (un->un_lowervp != NULLVP)
1314 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1315 
1316 		fvp = un->un_uppervp;
1317 		vref(fvp);
1318 	}
1319 
1320 	if (tdvp->v_op == union_vnodeop_p) {
1321 		struct union_node *un = VTOUNION(tdvp);
1322 		if (un->un_uppervp == NULLVP) {
1323 			/*
1324 			 * this should never happen in normal
1325 			 * operation but might if there was
1326 			 * a problem creating the top-level shadow
1327 			 * directory.
1328 			 */
1329 			error = EXDEV;
1330 			goto bad;
1331 		}
1332 
1333 		tdvp = un->un_uppervp;
1334 		vref(tdvp);
1335 	}
1336 
1337 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1338 		struct union_node *un = VTOUNION(tvp);
1339 
1340 		tvp = un->un_uppervp;
1341 		if (tvp != NULLVP) {
1342 			vref(tvp);
1343 		}
1344 	}
1345 
1346 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1347 	goto out;
1348 
1349 bad:
1350 	vput(tdvp);
1351 	if (tvp != NULLVP)
1352 		vput(tvp);
1353 	vrele(fdvp);
1354 	vrele(fvp);
1355 
1356 out:
1357 	if (fdvp != ap->a_fdvp) {
1358 		vrele(ap->a_fdvp);
1359 	}
1360 	if (fvp != ap->a_fvp) {
1361 		vrele(ap->a_fvp);
1362 	}
1363 	if (tdvp != ap->a_tdvp) {
1364 		vrele(ap->a_tdvp);
1365 	}
1366 	if (tvp != ap->a_tvp) {
1367 		vrele(ap->a_tvp);
1368 	}
1369 	return (error);
1370 }
1371 
1372 int
1373 union_mkdir(void *v)
1374 {
1375 	struct vop_mkdir_v3_args /* {
1376 		struct vnode *a_dvp;
1377 		struct vnode **a_vpp;
1378 		struct componentname *a_cnp;
1379 		struct vattr *a_vap;
1380 	} */ *ap = v;
1381 	struct union_node *un = VTOUNION(ap->a_dvp);
1382 	struct vnode *dvp = un->un_uppervp;
1383 	struct componentname *cnp = ap->a_cnp;
1384 
1385 	if (dvp != NULLVP) {
1386 		int error;
1387 		struct vnode *vp;
1388 
1389 		vp = NULL;
1390 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1391 		if (error) {
1392 			vrele(ap->a_dvp);
1393 			return (error);
1394 		}
1395 
1396 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1397 				NULLVP, cnp, vp, NULLVP, 1);
1398 		if (error)
1399 			vrele(vp);
1400 		return (error);
1401 	}
1402 
1403 	return (EROFS);
1404 }
1405 
1406 int
1407 union_rmdir(void *v)
1408 {
1409 	struct vop_rmdir_v2_args /* {
1410 		struct vnode *a_dvp;
1411 		struct vnode *a_vp;
1412 		struct componentname *a_cnp;
1413 	} */ *ap = v;
1414 	int error;
1415 	struct union_node *dun = VTOUNION(ap->a_dvp);
1416 	struct union_node *un = VTOUNION(ap->a_vp);
1417 	struct componentname *cnp = ap->a_cnp;
1418 
1419 	if (dun->un_uppervp == NULLVP)
1420 		panic("union rmdir: null upper vnode");
1421 
1422 	error = union_check_rmdir(un, cnp->cn_cred);
1423 	if (error) {
1424 		vput(ap->a_vp);
1425 		return error;
1426 	}
1427 
1428 	if (un->un_uppervp != NULLVP) {
1429 		struct vnode *dvp = dun->un_uppervp;
1430 		struct vnode *vp = un->un_uppervp;
1431 
1432 		/* Account for VOP_RMDIR to vrele vp.  */
1433 		vref(vp);
1434 		if (union_dowhiteout(un, cnp->cn_cred))
1435 			cnp->cn_flags |= DOWHITEOUT;
1436 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1437 		if (!error)
1438 			union_removed_upper(un);
1439 		vrele(ap->a_vp);
1440 	} else {
1441 		error = union_mkwhiteout(
1442 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1443 			dun->un_uppervp, ap->a_cnp, un);
1444 		vput(ap->a_vp);
1445 	}
1446 
1447 	return (error);
1448 }
1449 
1450 int
1451 union_symlink(void *v)
1452 {
1453 	struct vop_symlink_v3_args /* {
1454 		struct vnode *a_dvp;
1455 		struct vnode **a_vpp;
1456 		struct componentname *a_cnp;
1457 		struct vattr *a_vap;
1458 		char *a_target;
1459 	} */ *ap = v;
1460 	struct union_node *un = VTOUNION(ap->a_dvp);
1461 	struct vnode *dvp = un->un_uppervp;
1462 	struct componentname *cnp = ap->a_cnp;
1463 
1464 	if (dvp != NULLVP) {
1465 		int error;
1466 
1467 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1468 				    ap->a_target);
1469 		return (error);
1470 	}
1471 
1472 	return (EROFS);
1473 }
1474 
1475 /*
1476  * union_readdir works in concert with getdirentries and
1477  * readdir(3) to provide a list of entries in the unioned
1478  * directories.  getdirentries is responsible for walking
1479  * down the union stack.  readdir(3) is responsible for
1480  * eliminating duplicate names from the returned data stream.
1481  */
1482 int
1483 union_readdir(void *v)
1484 {
1485 	struct vop_readdir_args /* {
1486 		struct vnodeop_desc *a_desc;
1487 		struct vnode *a_vp;
1488 		struct uio *a_uio;
1489 		kauth_cred_t a_cred;
1490 		int *a_eofflag;
1491 		u_long *a_cookies;
1492 		int a_ncookies;
1493 	} */ *ap = v;
1494 	struct union_node *un = VTOUNION(ap->a_vp);
1495 	struct vnode *uvp = un->un_uppervp;
1496 
1497 	if (uvp == NULLVP)
1498 		return (0);
1499 
1500 	ap->a_vp = uvp;
1501 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1502 }
1503 
1504 int
1505 union_readlink(void *v)
1506 {
1507 	struct vop_readlink_args /* {
1508 		struct vnode *a_vp;
1509 		struct uio *a_uio;
1510 		kauth_cred_t a_cred;
1511 	} */ *ap = v;
1512 	int error;
1513 	struct vnode *vp = OTHERVP(ap->a_vp);
1514 	int dolock = (vp == LOWERVP(ap->a_vp));
1515 
1516 	if (dolock)
1517 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1518 	ap->a_vp = vp;
1519 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1520 	if (dolock)
1521 		VOP_UNLOCK(vp);
1522 
1523 	return (error);
1524 }
1525 
1526 int
1527 union_abortop(void *v)
1528 {
1529 	struct vop_abortop_args /* {
1530 		struct vnode *a_dvp;
1531 		struct componentname *a_cnp;
1532 	} */ *ap = v;
1533 
1534 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1535 
1536 	ap->a_dvp = UPPERVP(ap->a_dvp);
1537 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1538 }
1539 
1540 int
1541 union_inactive(void *v)
1542 {
1543 	struct vop_inactive_v2_args /* {
1544 		const struct vnodeop_desc *a_desc;
1545 		struct vnode *a_vp;
1546 		bool *a_recycle;
1547 	} */ *ap = v;
1548 	struct vnode *vp = ap->a_vp;
1549 	struct union_node *un = VTOUNION(vp);
1550 	struct vnode **vpp;
1551 
1552 	/*
1553 	 * Do nothing (and _don't_ bypass).
1554 	 * Wait to vrele lowervp until reclaim,
1555 	 * so that until then our union_node is in the
1556 	 * cache and reusable.
1557 	 *
1558 	 * NEEDSWORK: Someday, consider inactive'ing
1559 	 * the lowervp and then trying to reactivate it
1560 	 * with capabilities (v_id)
1561 	 * like they do in the name lookup cache code.
1562 	 * That's too much work for now.
1563 	 */
1564 
1565 	if (un->un_dircache != 0) {
1566 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1567 			vrele(*vpp);
1568 		free(un->un_dircache, M_TEMP);
1569 		un->un_dircache = 0;
1570 	}
1571 
1572 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1573 
1574 	return (0);
1575 }
1576 
1577 int
1578 union_reclaim(void *v)
1579 {
1580 	struct vop_reclaim_v2_args /* {
1581 		struct vnode *a_vp;
1582 	} */ *ap = v;
1583 	struct vnode *vp = ap->a_vp;
1584 	struct vnode *uvp = UPPERVP(vp);
1585 
1586 	VOP_UNLOCK(vp);
1587 
1588 	if (uvp != NULL) {
1589 		mutex_enter(uvp->v_interlock);
1590 		KASSERT(vp->v_interlock == uvp->v_interlock);
1591 		uvp->v_writecount -= vp->v_writecount;
1592 		mutex_exit(uvp->v_interlock);
1593 	}
1594 
1595 	union_freevp(vp);
1596 
1597 	return (0);
1598 }
1599 
1600 static int
1601 union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1602 {
1603 	struct vop_lock_args ap;
1604 
1605 	ap.a_desc = VDESC(vop_lock);
1606 	ap.a_vp = lockvp;
1607 	ap.a_flags = flags;
1608 
1609 	if (lockvp == vp)
1610 		return genfs_lock(&ap);
1611 	else
1612 		return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1613 }
1614 
1615 static int
1616 union_unlock1(struct vnode *vp, struct vnode *lockvp)
1617 {
1618 	struct vop_unlock_args ap;
1619 
1620 	ap.a_desc = VDESC(vop_unlock);
1621 	ap.a_vp = lockvp;
1622 
1623 	if (lockvp == vp)
1624 		return genfs_unlock(&ap);
1625 	else
1626 		return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1627 }
1628 
1629 int
1630 union_lock(void *v)
1631 {
1632 	struct vop_lock_args /* {
1633 		struct vnode *a_vp;
1634 		int a_flags;
1635 	} */ *ap = v;
1636 	struct vnode *vp = ap->a_vp, *lockvp;
1637 	struct union_node *un = VTOUNION(vp);
1638 	int flags = ap->a_flags;
1639 	int error;
1640 
1641 	if ((flags & LK_NOWAIT) != 0) {
1642 		if (!mutex_tryenter(&un->un_lock))
1643 			return EBUSY;
1644 		lockvp = LOCKVP(vp);
1645 		error = union_lock1(vp, lockvp, flags);
1646 		mutex_exit(&un->un_lock);
1647 		if (error)
1648 			return error;
1649 		if (mutex_tryenter(vp->v_interlock)) {
1650 			error = vdead_check(vp, VDEAD_NOWAIT);
1651 			mutex_exit(vp->v_interlock);
1652 		} else
1653 			error = EBUSY;
1654 		if (error)
1655 			union_unlock1(vp, lockvp);
1656 		return error;
1657 	}
1658 
1659 	mutex_enter(&un->un_lock);
1660 	for (;;) {
1661 		lockvp = LOCKVP(vp);
1662 		mutex_exit(&un->un_lock);
1663 		error = union_lock1(vp, lockvp, flags);
1664 		if (error != 0)
1665 			return error;
1666 		mutex_enter(&un->un_lock);
1667 		if (lockvp == LOCKVP(vp))
1668 			break;
1669 		union_unlock1(vp, lockvp);
1670 	}
1671 	mutex_exit(&un->un_lock);
1672 
1673 	mutex_enter(vp->v_interlock);
1674 	error = vdead_check(vp, VDEAD_NOWAIT);
1675 	if (error) {
1676 		union_unlock1(vp, lockvp);
1677 		error = vdead_check(vp, 0);
1678 		KASSERT(error == ENOENT);
1679 	}
1680 	mutex_exit(vp->v_interlock);
1681 	return error;
1682 }
1683 
1684 int
1685 union_unlock(void *v)
1686 {
1687 	struct vop_unlock_args /* {
1688 		struct vnode *a_vp;
1689 		int a_flags;
1690 	} */ *ap = v;
1691 	struct vnode *vp = ap->a_vp, *lockvp;
1692 
1693 	lockvp = LOCKVP(vp);
1694 	union_unlock1(vp, lockvp);
1695 
1696 	return 0;
1697 }
1698 
1699 int
1700 union_bmap(void *v)
1701 {
1702 	struct vop_bmap_args /* {
1703 		struct vnode *a_vp;
1704 		daddr_t  a_bn;
1705 		struct vnode **a_vpp;
1706 		daddr_t *a_bnp;
1707 		int *a_runp;
1708 	} */ *ap = v;
1709 	int error;
1710 	struct vnode *vp = OTHERVP(ap->a_vp);
1711 	int dolock = (vp == LOWERVP(ap->a_vp));
1712 
1713 	if (dolock)
1714 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1715 	ap->a_vp = vp;
1716 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1717 	if (dolock)
1718 		VOP_UNLOCK(vp);
1719 
1720 	return (error);
1721 }
1722 
1723 int
1724 union_print(void *v)
1725 {
1726 	struct vop_print_args /* {
1727 		struct vnode *a_vp;
1728 	} */ *ap = v;
1729 	struct vnode *vp = ap->a_vp;
1730 
1731 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1732 			vp, UPPERVP(vp), LOWERVP(vp));
1733 	if (UPPERVP(vp) != NULLVP)
1734 		vprint("union: upper", UPPERVP(vp));
1735 	if (LOWERVP(vp) != NULLVP)
1736 		vprint("union: lower", LOWERVP(vp));
1737 	if (VTOUNION(vp)->un_dircache) {
1738 		struct vnode **vpp;
1739 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1740 			vprint("dircache:", *vpp);
1741 	}
1742 
1743 	return (0);
1744 }
1745 
1746 int
1747 union_islocked(void *v)
1748 {
1749 	struct vop_islocked_args /* {
1750 		struct vnode *a_vp;
1751 	} */ *ap = v;
1752 	struct vnode *vp;
1753 	struct union_node *un;
1754 
1755 	un = VTOUNION(ap->a_vp);
1756 	mutex_enter(&un->un_lock);
1757 	vp = LOCKVP(ap->a_vp);
1758 	mutex_exit(&un->un_lock);
1759 
1760 	if (vp == ap->a_vp)
1761 		return genfs_islocked(ap);
1762 	else
1763 		return VOP_ISLOCKED(vp);
1764 }
1765 
1766 int
1767 union_pathconf(void *v)
1768 {
1769 	struct vop_pathconf_args /* {
1770 		struct vnode *a_vp;
1771 		int a_name;
1772 		int *a_retval;
1773 	} */ *ap = v;
1774 	int error;
1775 	struct vnode *vp = OTHERVP(ap->a_vp);
1776 	int dolock = (vp == LOWERVP(ap->a_vp));
1777 
1778 	if (dolock)
1779 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1780 	ap->a_vp = vp;
1781 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1782 	if (dolock)
1783 		VOP_UNLOCK(vp);
1784 
1785 	return (error);
1786 }
1787 
1788 int
1789 union_advlock(void *v)
1790 {
1791 	struct vop_advlock_args /* {
1792 		struct vnode *a_vp;
1793 		void *a_id;
1794 		int  a_op;
1795 		struct flock *a_fl;
1796 		int  a_flags;
1797 	} */ *ap = v;
1798 	struct vnode *ovp = OTHERVP(ap->a_vp);
1799 
1800 	ap->a_vp = ovp;
1801 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1802 }
1803 
1804 int
1805 union_strategy(void *v)
1806 {
1807 	struct vop_strategy_args /* {
1808 		struct vnode *a_vp;
1809 		struct buf *a_bp;
1810 	} */ *ap = v;
1811 	struct vnode *ovp = OTHERVP(ap->a_vp);
1812 	struct buf *bp = ap->a_bp;
1813 
1814 	KASSERT(ovp != NULLVP);
1815 	if (!NODE_IS_SPECIAL(ovp))
1816 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1817 
1818 	return (VOP_STRATEGY(ovp, bp));
1819 }
1820 
1821 int
1822 union_bwrite(void *v)
1823 {
1824 	struct vop_bwrite_args /* {
1825 		struct vnode *a_vp;
1826 		struct buf *a_bp;
1827 	} */ *ap = v;
1828 	struct vnode *ovp = OTHERVP(ap->a_vp);
1829 	struct buf *bp = ap->a_bp;
1830 
1831 	KASSERT(ovp != NULLVP);
1832 	if (!NODE_IS_SPECIAL(ovp))
1833 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1834 
1835 	return (VOP_BWRITE(ovp, bp));
1836 }
1837 
1838 int
1839 union_getpages(void *v)
1840 {
1841 	struct vop_getpages_args /* {
1842 		struct vnode *a_vp;
1843 		voff_t a_offset;
1844 		struct vm_page **a_m;
1845 		int *a_count;
1846 		int a_centeridx;
1847 		vm_prot_t a_access_type;
1848 		int a_advice;
1849 		int a_flags;
1850 	} */ *ap = v;
1851 	struct vnode *vp = ap->a_vp;
1852 
1853 	KASSERT(mutex_owned(vp->v_interlock));
1854 
1855 	if (ap->a_flags & PGO_LOCKED) {
1856 		return EBUSY;
1857 	}
1858 	ap->a_vp = OTHERVP(vp);
1859 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1860 
1861 	/* Just pass the request on to the underlying layer. */
1862 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1863 }
1864 
1865 int
1866 union_putpages(void *v)
1867 {
1868 	struct vop_putpages_args /* {
1869 		struct vnode *a_vp;
1870 		voff_t a_offlo;
1871 		voff_t a_offhi;
1872 		int a_flags;
1873 	} */ *ap = v;
1874 	struct vnode *vp = ap->a_vp;
1875 
1876 	KASSERT(mutex_owned(vp->v_interlock));
1877 
1878 	ap->a_vp = OTHERVP(vp);
1879 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1880 
1881 	if (ap->a_flags & PGO_RECLAIM) {
1882 		mutex_exit(vp->v_interlock);
1883 		return 0;
1884 	}
1885 
1886 	/* Just pass the request on to the underlying layer. */
1887 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1888 }
1889 
1890 int
1891 union_kqfilter(void *v)
1892 {
1893 	struct vop_kqfilter_args /* {
1894 		struct vnode	*a_vp;
1895 		struct knote	*a_kn;
1896 	} */ *ap = v;
1897 	int error;
1898 
1899 	/*
1900 	 * We watch either the upper layer file (if it already exists),
1901 	 * or the lower layer one. If there is lower layer file only
1902 	 * at this moment, we will keep watching that lower layer file
1903 	 * even if upper layer file would be created later on.
1904 	 */
1905 	if (UPPERVP(ap->a_vp))
1906 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1907 	else if (LOWERVP(ap->a_vp))
1908 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1909 	else {
1910 		/* panic? */
1911 		error = EOPNOTSUPP;
1912 	}
1913 
1914 	return (error);
1915 }
1916