xref: /netbsd-src/sys/fs/union/union_vnops.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /*	$NetBSD: union_vnops.c,v 1.59 2014/03/24 13:42:40 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.59 2014/03/24 13:42:40 hannken Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95 
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136 
137 static int union_lookup1(struct vnode *, struct vnode **,
138 			      struct vnode **, struct componentname *);
139 
140 
141 /*
142  * Global vfs data structures
143  */
144 int (**union_vnodeop_p)(void *);
145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146 	{ &vop_default_desc, vn_default_error },
147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
148 	{ &vop_create_desc, union_create },		/* create */
149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
151 	{ &vop_open_desc, union_open },			/* open */
152 	{ &vop_close_desc, union_close },		/* close */
153 	{ &vop_access_desc, union_access },		/* access */
154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
156 	{ &vop_read_desc, union_read },			/* read */
157 	{ &vop_write_desc, union_write },		/* write */
158 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
159 	{ &vop_poll_desc, union_poll },			/* select */
160 	{ &vop_revoke_desc, union_revoke },		/* revoke */
161 	{ &vop_mmap_desc, union_mmap },			/* mmap */
162 	{ &vop_fsync_desc, union_fsync },		/* fsync */
163 	{ &vop_seek_desc, union_seek },			/* seek */
164 	{ &vop_remove_desc, union_remove },		/* remove */
165 	{ &vop_link_desc, union_link },			/* link */
166 	{ &vop_rename_desc, union_rename },		/* rename */
167 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
168 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
169 	{ &vop_symlink_desc, union_symlink },		/* symlink */
170 	{ &vop_readdir_desc, union_readdir },		/* readdir */
171 	{ &vop_readlink_desc, union_readlink },		/* readlink */
172 	{ &vop_abortop_desc, union_abortop },		/* abortop */
173 	{ &vop_inactive_desc, union_inactive },		/* inactive */
174 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
175 	{ &vop_lock_desc, union_lock },			/* lock */
176 	{ &vop_unlock_desc, union_unlock },		/* unlock */
177 	{ &vop_bmap_desc, union_bmap },			/* bmap */
178 	{ &vop_strategy_desc, union_strategy },		/* strategy */
179 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
180 	{ &vop_print_desc, union_print },		/* print */
181 	{ &vop_islocked_desc, union_islocked },		/* islocked */
182 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
183 	{ &vop_advlock_desc, union_advlock },		/* advlock */
184 	{ &vop_getpages_desc, union_getpages },		/* getpages */
185 	{ &vop_putpages_desc, union_putpages },		/* putpages */
186 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
187 	{ NULL, NULL }
188 };
189 const struct vnodeopv_desc union_vnodeop_opv_desc =
190 	{ &union_vnodeop_p, union_vnodeop_entries };
191 
192 #define NODE_IS_SPECIAL(vp) \
193 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
194 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
195 
196 static int
197 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
198 	struct componentname *cnp)
199 {
200 	int error;
201 	struct vnode *tdvp;
202 	struct vnode *dvp;
203 	struct mount *mp;
204 
205 	dvp = *dvpp;
206 
207 	/*
208 	 * If stepping up the directory tree, check for going
209 	 * back across the mount point, in which case do what
210 	 * lookup would do by stepping back down the mount
211 	 * hierarchy.
212 	 */
213 	if (cnp->cn_flags & ISDOTDOT) {
214 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
215 			/*
216 			 * Don't do the NOCROSSMOUNT check
217 			 * at this level.  By definition,
218 			 * union fs deals with namespaces, not
219 			 * filesystems.
220 			 */
221 			tdvp = dvp;
222 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
223 			VOP_UNLOCK(tdvp);
224 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
225 		}
226 	}
227 
228         error = VOP_LOOKUP(dvp, &tdvp, cnp);
229 	if (error)
230 		return (error);
231 	if (dvp != tdvp) {
232 		if (cnp->cn_flags & ISDOTDOT)
233 			VOP_UNLOCK(dvp);
234 		error = vn_lock(tdvp, LK_EXCLUSIVE);
235 		if (cnp->cn_flags & ISDOTDOT)
236 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
237 		if (error) {
238 			vrele(tdvp);
239 			return error;
240 		}
241 		dvp = tdvp;
242 	}
243 
244 	/*
245 	 * Lastly check if the current node is a mount point in
246 	 * which case walk up the mount hierarchy making sure not to
247 	 * bump into the root of the mount tree (ie. dvp != udvp).
248 	 */
249 	while (dvp != udvp && (dvp->v_type == VDIR) &&
250 	       (mp = dvp->v_mountedhere)) {
251 		if (vfs_busy(mp, NULL))
252 			continue;
253 		vput(dvp);
254 		error = VFS_ROOT(mp, &tdvp);
255 		vfs_unbusy(mp, false, NULL);
256 		if (error) {
257 			return (error);
258 		}
259 		dvp = tdvp;
260 	}
261 
262 	*vpp = dvp;
263 	return (0);
264 }
265 
266 int
267 union_lookup(void *v)
268 {
269 	struct vop_lookup_v2_args /* {
270 		struct vnodeop_desc *a_desc;
271 		struct vnode *a_dvp;
272 		struct vnode **a_vpp;
273 		struct componentname *a_cnp;
274 	} */ *ap = v;
275 	int error;
276 	int uerror, lerror;
277 	struct vnode *uppervp, *lowervp;
278 	struct vnode *upperdvp, *lowerdvp;
279 	struct vnode *dvp = ap->a_dvp;
280 	struct union_node *dun = VTOUNION(dvp);
281 	struct componentname *cnp = ap->a_cnp;
282 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
283 	kauth_cred_t saved_cred = NULL;
284 	int iswhiteout;
285 	struct vattr va;
286 
287 #ifdef notyet
288 	if (cnp->cn_namelen == 3 &&
289 			cnp->cn_nameptr[2] == '.' &&
290 			cnp->cn_nameptr[1] == '.' &&
291 			cnp->cn_nameptr[0] == '.') {
292 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
293 		if (dvp == NULLVP)
294 			return (ENOENT);
295 		vref(dvp);
296 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
297 		return (0);
298 	}
299 #endif
300 
301 	if ((cnp->cn_flags & ISLASTCN) &&
302 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
303 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
304 		return (EROFS);
305 
306 start:
307 	upperdvp = dun->un_uppervp;
308 	lowerdvp = dun->un_lowervp;
309 	uppervp = NULLVP;
310 	lowervp = NULLVP;
311 	iswhiteout = 0;
312 
313 	/*
314 	 * do the lookup in the upper level.
315 	 * if that level comsumes additional pathnames,
316 	 * then assume that something special is going
317 	 * on and just return that vnode.
318 	 */
319 	if (upperdvp != NULLVP) {
320 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
321 					&uppervp, cnp);
322 		if (cnp->cn_consume != 0) {
323 			if (uppervp != upperdvp)
324 				VOP_UNLOCK(uppervp);
325 			*ap->a_vpp = uppervp;
326 			return (uerror);
327 		}
328 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
329 			if (cnp->cn_flags & ISWHITEOUT) {
330 				iswhiteout = 1;
331 			} else if (lowerdvp != NULLVP) {
332 				lerror = VOP_GETATTR(upperdvp, &va,
333 					cnp->cn_cred);
334 				if (lerror == 0 && (va.va_flags & OPAQUE))
335 					iswhiteout = 1;
336 			}
337 		}
338 	} else {
339 		uerror = ENOENT;
340 	}
341 
342 	/*
343 	 * in a similar way to the upper layer, do the lookup
344 	 * in the lower layer.   this time, if there is some
345 	 * component magic going on, then vput whatever we got
346 	 * back from the upper layer and return the lower vnode
347 	 * instead.
348 	 */
349 	if (lowerdvp != NULLVP && !iswhiteout) {
350 		int nameiop;
351 
352 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
353 
354 		/*
355 		 * Only do a LOOKUP on the bottom node, since
356 		 * we won't be making changes to it anyway.
357 		 */
358 		nameiop = cnp->cn_nameiop;
359 		cnp->cn_nameiop = LOOKUP;
360 		if (um->um_op == UNMNT_BELOW) {
361 			saved_cred = cnp->cn_cred;
362 			cnp->cn_cred = um->um_cred;
363 		}
364 
365 		/*
366 		 * we shouldn't have to worry about locking interactions
367 		 * between the lower layer and our union layer (w.r.t.
368 		 * `..' processing) because we don't futz with lowervp
369 		 * locks in the union-node instantiation code path.
370 		 */
371 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
372 				&lowervp, cnp);
373 		if (um->um_op == UNMNT_BELOW)
374 			cnp->cn_cred = saved_cred;
375 		cnp->cn_nameiop = nameiop;
376 
377 		if (lowervp != lowerdvp)
378 			VOP_UNLOCK(lowerdvp);
379 
380 		if (cnp->cn_consume != 0) {
381 			if (uppervp != NULLVP) {
382 				if (uppervp == upperdvp)
383 					vrele(uppervp);
384 				else
385 					vput(uppervp);
386 				uppervp = NULLVP;
387 			}
388 			*ap->a_vpp = lowervp;
389 			return (lerror);
390 		}
391 	} else {
392 		lerror = ENOENT;
393 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
394 			lowervp = LOWERVP(dun->un_pvp);
395 			if (lowervp != NULLVP) {
396 				vref(lowervp);
397 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
398 				lerror = 0;
399 			}
400 		}
401 	}
402 
403 	/*
404 	 * EJUSTRETURN is used by underlying filesystems to indicate that
405 	 * a directory modification op was started successfully.
406 	 * This will only happen in the upper layer, since
407 	 * the lower layer only does LOOKUPs.
408 	 * If this union is mounted read-only, bounce it now.
409 	 */
410 
411 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
412 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
413 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
414 		uerror = EROFS;
415 
416 	/*
417 	 * at this point, we have uerror and lerror indicating
418 	 * possible errors with the lookups in the upper and lower
419 	 * layers.  additionally, uppervp and lowervp are (locked)
420 	 * references to existing vnodes in the upper and lower layers.
421 	 *
422 	 * there are now three cases to consider.
423 	 * 1. if both layers returned an error, then return whatever
424 	 *    error the upper layer generated.
425 	 *
426 	 * 2. if the top layer failed and the bottom layer succeeded
427 	 *    then two subcases occur.
428 	 *    a.  the bottom vnode is not a directory, in which
429 	 *	  case just return a new union vnode referencing
430 	 *	  an empty top layer and the existing bottom layer.
431 	 *    b.  the bottom vnode is a directory, in which case
432 	 *	  create a new directory in the top-level and
433 	 *	  continue as in case 3.
434 	 *
435 	 * 3. if the top layer succeeded then return a new union
436 	 *    vnode referencing whatever the new top layer and
437 	 *    whatever the bottom layer returned.
438 	 */
439 
440 	*ap->a_vpp = NULLVP;
441 
442 
443 	/* case 1. */
444 	if ((uerror != 0) && (lerror != 0)) {
445 		return (uerror);
446 	}
447 
448 	/* case 2. */
449 	if (uerror != 0 /* && (lerror == 0) */ ) {
450 		if (lowervp->v_type == VDIR) { /* case 2b. */
451 			/*
452 			 * We may be racing another process to make the
453 			 * upper-level shadow directory.  Be careful with
454 			 * locks/etc!
455 			 * If we have to create a shadow directory and want
456 			 * to commit the node we have to restart the lookup
457 			 * to get the componentname right.
458 			 */
459 			if (upperdvp) {
460 				VOP_UNLOCK(upperdvp);
461 				uerror = union_mkshadow(um, upperdvp, cnp,
462 				    &uppervp);
463 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
464 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
465 					vrele(uppervp);
466 					if (lowervp != NULLVP)
467 						vput(lowervp);
468 					goto start;
469 				}
470 			}
471 			if (uerror) {
472 				if (lowervp != NULLVP) {
473 					vput(lowervp);
474 					lowervp = NULLVP;
475 				}
476 				return (uerror);
477 			}
478 		}
479 	} else { /* uerror == 0 */
480 		if (uppervp != upperdvp)
481 			VOP_UNLOCK(uppervp);
482 	}
483 
484 	if (lowervp != NULLVP)
485 		VOP_UNLOCK(lowervp);
486 
487 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
488 			      uppervp, lowervp, 1);
489 
490 	if (error) {
491 		if (uppervp != NULLVP)
492 			vrele(uppervp);
493 		if (lowervp != NULLVP)
494 			vrele(lowervp);
495 		return error;
496 	}
497 
498 	return 0;
499 }
500 
501 int
502 union_create(void *v)
503 {
504 	struct vop_create_v3_args /* {
505 		struct vnode *a_dvp;
506 		struct vnode **a_vpp;
507 		struct componentname *a_cnp;
508 		struct vattr *a_vap;
509 	} */ *ap = v;
510 	struct union_node *un = VTOUNION(ap->a_dvp);
511 	struct vnode *dvp = un->un_uppervp;
512 	struct componentname *cnp = ap->a_cnp;
513 
514 	if (dvp != NULLVP) {
515 		int error;
516 		struct vnode *vp;
517 		struct mount *mp;
518 
519 		mp = ap->a_dvp->v_mount;
520 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
521 		if (error)
522 			return (error);
523 
524 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
525 				NULLVP, 1);
526 		if (error)
527 			vrele(vp);
528 		return (error);
529 	}
530 
531 	return (EROFS);
532 }
533 
534 int
535 union_whiteout(void *v)
536 {
537 	struct vop_whiteout_args /* {
538 		struct vnode *a_dvp;
539 		struct componentname *a_cnp;
540 		int a_flags;
541 	} */ *ap = v;
542 	struct union_node *un = VTOUNION(ap->a_dvp);
543 	struct componentname *cnp = ap->a_cnp;
544 
545 	if (un->un_uppervp == NULLVP)
546 		return (EOPNOTSUPP);
547 
548 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
549 }
550 
551 int
552 union_mknod(void *v)
553 {
554 	struct vop_mknod_v3_args /* {
555 		struct vnode *a_dvp;
556 		struct vnode **a_vpp;
557 		struct componentname *a_cnp;
558 		struct vattr *a_vap;
559 	} */ *ap = v;
560 	struct union_node *un = VTOUNION(ap->a_dvp);
561 	struct vnode *dvp = un->un_uppervp;
562 	struct componentname *cnp = ap->a_cnp;
563 
564 	if (dvp != NULLVP) {
565 		int error;
566 		struct vnode *vp;
567 		struct mount *mp;
568 
569 		mp = ap->a_dvp->v_mount;
570 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
571 		if (error)
572 			return (error);
573 
574 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
575 				      cnp, vp, NULLVP, 1);
576 		if (error)
577 			vrele(vp);
578 		return (error);
579 	}
580 
581 	return (EROFS);
582 }
583 
584 int
585 union_open(void *v)
586 {
587 	struct vop_open_args /* {
588 		struct vnodeop_desc *a_desc;
589 		struct vnode *a_vp;
590 		int a_mode;
591 		kauth_cred_t a_cred;
592 	} */ *ap = v;
593 	struct union_node *un = VTOUNION(ap->a_vp);
594 	struct vnode *tvp;
595 	int mode = ap->a_mode;
596 	kauth_cred_t cred = ap->a_cred;
597 	struct lwp *l = curlwp;
598 	int error;
599 
600 	/*
601 	 * If there is an existing upper vp then simply open that.
602 	 */
603 	tvp = un->un_uppervp;
604 	if (tvp == NULLVP) {
605 		/*
606 		 * If the lower vnode is being opened for writing, then
607 		 * copy the file contents to the upper vnode and open that,
608 		 * otherwise can simply open the lower vnode.
609 		 */
610 		tvp = un->un_lowervp;
611 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
612 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
613 			if (error == 0)
614 				error = VOP_OPEN(un->un_uppervp, mode, cred);
615 			return (error);
616 		}
617 
618 		/*
619 		 * Just open the lower vnode, but check for nodev mount flag
620 		 */
621 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
622 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
623 			return ENXIO;
624 		un->un_openl++;
625 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
626 		error = VOP_OPEN(tvp, mode, cred);
627 		VOP_UNLOCK(tvp);
628 
629 		return (error);
630 	}
631 	/*
632 	 * Just open the upper vnode, checking for nodev mount flag first
633 	 */
634 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
635 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
636 		return ENXIO;
637 
638 	error = VOP_OPEN(tvp, mode, cred);
639 
640 	return (error);
641 }
642 
643 int
644 union_close(void *v)
645 {
646 	struct vop_close_args /* {
647 		struct vnode *a_vp;
648 		int  a_fflag;
649 		kauth_cred_t a_cred;
650 	} */ *ap = v;
651 	struct union_node *un = VTOUNION(ap->a_vp);
652 	struct vnode *vp;
653 	int error;
654 	bool do_lock;
655 
656 	vp = un->un_uppervp;
657 	if (vp != NULLVP) {
658 		do_lock = false;
659 	} else {
660 		KASSERT(un->un_openl > 0);
661 		--un->un_openl;
662 		vp = un->un_lowervp;
663 		do_lock = true;
664 	}
665 
666 	KASSERT(vp != NULLVP);
667 	ap->a_vp = vp;
668 	if (do_lock)
669 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
670 	error = VCALL(vp, VOFFSET(vop_close), ap);
671 	if (do_lock)
672 		VOP_UNLOCK(vp);
673 
674 	return error;
675 }
676 
677 /*
678  * Check access permission on the union vnode.
679  * The access check being enforced is to check
680  * against both the underlying vnode, and any
681  * copied vnode.  This ensures that no additional
682  * file permissions are given away simply because
683  * the user caused an implicit file copy.
684  */
685 int
686 union_access(void *v)
687 {
688 	struct vop_access_args /* {
689 		struct vnodeop_desc *a_desc;
690 		struct vnode *a_vp;
691 		int a_mode;
692 		kauth_cred_t a_cred;
693 	} */ *ap = v;
694 	struct vnode *vp = ap->a_vp;
695 	struct union_node *un = VTOUNION(vp);
696 	int error = EACCES;
697 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
698 
699 	/*
700 	 * Disallow write attempts on read-only file systems;
701 	 * unless the file is a socket, fifo, or a block or
702 	 * character device resident on the file system.
703 	 */
704 	if (ap->a_mode & VWRITE) {
705 		switch (vp->v_type) {
706 		case VDIR:
707 		case VLNK:
708 		case VREG:
709 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
710 				return (EROFS);
711 			break;
712 		case VBAD:
713 		case VBLK:
714 		case VCHR:
715 		case VSOCK:
716 		case VFIFO:
717 		case VNON:
718 		default:
719 			break;
720 		}
721 	}
722 
723 
724 	if ((vp = un->un_uppervp) != NULLVP) {
725 		ap->a_vp = vp;
726 		return (VCALL(vp, VOFFSET(vop_access), ap));
727 	}
728 
729 	if ((vp = un->un_lowervp) != NULLVP) {
730 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
731 		ap->a_vp = vp;
732 		error = VCALL(vp, VOFFSET(vop_access), ap);
733 		if (error == 0) {
734 			if (um->um_op == UNMNT_BELOW) {
735 				ap->a_cred = um->um_cred;
736 				error = VCALL(vp, VOFFSET(vop_access), ap);
737 			}
738 		}
739 		VOP_UNLOCK(vp);
740 		if (error)
741 			return (error);
742 	}
743 
744 	return (error);
745 }
746 
747 /*
748  * We handle getattr only to change the fsid and
749  * track object sizes
750  */
751 int
752 union_getattr(void *v)
753 {
754 	struct vop_getattr_args /* {
755 		struct vnode *a_vp;
756 		struct vattr *a_vap;
757 		kauth_cred_t a_cred;
758 	} */ *ap = v;
759 	int error;
760 	struct union_node *un = VTOUNION(ap->a_vp);
761 	struct vnode *vp = un->un_uppervp;
762 	struct vattr *vap;
763 	struct vattr va;
764 
765 
766 	/*
767 	 * Some programs walk the filesystem hierarchy by counting
768 	 * links to directories to avoid stat'ing all the time.
769 	 * This means the link count on directories needs to be "correct".
770 	 * The only way to do that is to call getattr on both layers
771 	 * and fix up the link count.  The link count will not necessarily
772 	 * be accurate but will be large enough to defeat the tree walkers.
773 	 *
774 	 * To make life more interesting, some filesystems don't keep
775 	 * track of link counts in the expected way, and return a
776 	 * link count of `1' for those directories; if either of the
777 	 * component directories returns a link count of `1', we return a 1.
778 	 */
779 
780 	vap = ap->a_vap;
781 
782 	vp = un->un_uppervp;
783 	if (vp != NULLVP) {
784 		error = VOP_GETATTR(vp, vap, ap->a_cred);
785 		if (error)
786 			return (error);
787 		mutex_enter(&un->un_lock);
788 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
789 	}
790 
791 	if (vp == NULLVP) {
792 		vp = un->un_lowervp;
793 	} else if (vp->v_type == VDIR) {
794 		vp = un->un_lowervp;
795 		if (vp != NULLVP)
796 			vap = &va;
797 	} else {
798 		vp = NULLVP;
799 	}
800 
801 	if (vp != NULLVP) {
802 		if (vp == un->un_lowervp)
803 			vn_lock(vp, LK_SHARED | LK_RETRY);
804 		error = VOP_GETATTR(vp, vap, ap->a_cred);
805 		if (vp == un->un_lowervp)
806 			VOP_UNLOCK(vp);
807 		if (error)
808 			return (error);
809 		mutex_enter(&un->un_lock);
810 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
811 	}
812 
813 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
814 		/*
815 		 * Link count manipulation:
816 		 *	- If both return "2", return 2 (no subdirs)
817 		 *	- If one or the other return "1", return "1" (ENOCLUE)
818 		 */
819 		if ((ap->a_vap->va_nlink == 2) &&
820 		    (vap->va_nlink == 2))
821 			;
822 		else if (ap->a_vap->va_nlink != 1) {
823 			if (vap->va_nlink == 1)
824 				ap->a_vap->va_nlink = 1;
825 			else
826 				ap->a_vap->va_nlink += vap->va_nlink;
827 		}
828 	}
829 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
830 	return (0);
831 }
832 
833 int
834 union_setattr(void *v)
835 {
836 	struct vop_setattr_args /* {
837 		struct vnode *a_vp;
838 		struct vattr *a_vap;
839 		kauth_cred_t a_cred;
840 	} */ *ap = v;
841 	struct vattr *vap = ap->a_vap;
842 	struct vnode *vp = ap->a_vp;
843 	struct union_node *un = VTOUNION(vp);
844 	bool size_only;		/* All but va_size are VNOVAL. */
845 	int error;
846 
847 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
848 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
849 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
850 
851 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
852 		return (EROFS);
853 	if (vap->va_size != VNOVAL) {
854  		switch (vp->v_type) {
855  		case VDIR:
856  			return (EISDIR);
857  		case VCHR:
858  		case VBLK:
859  		case VSOCK:
860  		case VFIFO:
861 			break;
862 		case VREG:
863 		case VLNK:
864  		default:
865 			/*
866 			 * Disallow write attempts if the filesystem is
867 			 * mounted read-only.
868 			 */
869 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
870 				return (EROFS);
871 		}
872 	}
873 
874 	/*
875 	 * Handle case of truncating lower object to zero size,
876 	 * by creating a zero length upper object.  This is to
877 	 * handle the case of open with O_TRUNC and O_CREAT.
878 	 */
879 	if ((un->un_uppervp == NULLVP) &&
880 	    /* assert(un->un_lowervp != NULLVP) */
881 	    (un->un_lowervp->v_type == VREG)) {
882 		error = union_copyup(un, (vap->va_size != 0),
883 						ap->a_cred, curlwp);
884 		if (error)
885 			return (error);
886 	}
887 
888 	/*
889 	 * Try to set attributes in upper layer, ignore size change to zero
890 	 * for devices to handle O_TRUNC and return read-only filesystem error
891 	 * otherwise.
892 	 */
893 	if (un->un_uppervp != NULLVP) {
894 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
895 		if ((error == 0) && (vap->va_size != VNOVAL)) {
896 			mutex_enter(&un->un_lock);
897 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
898 		}
899 	} else {
900 		KASSERT(un->un_lowervp != NULLVP);
901 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
902 			if (size_only &&
903 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
904 				error = 0;
905 			else
906 				error = EROFS;
907 		} else {
908 			error = EROFS;
909 		}
910 	}
911 
912 	return (error);
913 }
914 
915 int
916 union_read(void *v)
917 {
918 	struct vop_read_args /* {
919 		struct vnode *a_vp;
920 		struct uio *a_uio;
921 		int  a_ioflag;
922 		kauth_cred_t a_cred;
923 	} */ *ap = v;
924 	int error;
925 	struct vnode *vp = OTHERVP(ap->a_vp);
926 	int dolock = (vp == LOWERVP(ap->a_vp));
927 
928 	if (dolock)
929 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
930 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
931 	if (dolock)
932 		VOP_UNLOCK(vp);
933 
934 	/*
935 	 * XXX
936 	 * perhaps the size of the underlying object has changed under
937 	 * our feet.  take advantage of the offset information present
938 	 * in the uio structure.
939 	 */
940 	if (error == 0) {
941 		struct union_node *un = VTOUNION(ap->a_vp);
942 		off_t cur = ap->a_uio->uio_offset;
943 		off_t usz = VNOVAL, lsz = VNOVAL;
944 
945 		mutex_enter(&un->un_lock);
946 		if (vp == un->un_uppervp) {
947 			if (cur > un->un_uppersz)
948 				usz = cur;
949 		} else {
950 			if (cur > un->un_lowersz)
951 				lsz = cur;
952 		}
953 
954 		if (usz != VNOVAL || lsz != VNOVAL)
955 			union_newsize(ap->a_vp, usz, lsz);
956 		else
957 			mutex_exit(&un->un_lock);
958 	}
959 
960 	return (error);
961 }
962 
963 int
964 union_write(void *v)
965 {
966 	struct vop_read_args /* {
967 		struct vnode *a_vp;
968 		struct uio *a_uio;
969 		int  a_ioflag;
970 		kauth_cred_t a_cred;
971 	} */ *ap = v;
972 	int error;
973 	struct vnode *vp;
974 	struct union_node *un = VTOUNION(ap->a_vp);
975 
976 	vp = UPPERVP(ap->a_vp);
977 	if (vp == NULLVP) {
978 		vp = LOWERVP(ap->a_vp);
979 		if (NODE_IS_SPECIAL(vp)) {
980 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
981 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
982 			    ap->a_cred);
983 			VOP_UNLOCK(vp);
984 			return error;
985 		}
986 		panic("union: missing upper layer in write");
987 	}
988 
989 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
990 
991 	/*
992 	 * the size of the underlying object may be changed by the
993 	 * write.
994 	 */
995 	if (error == 0) {
996 		off_t cur = ap->a_uio->uio_offset;
997 
998 		mutex_enter(&un->un_lock);
999 		if (cur > un->un_uppersz)
1000 			union_newsize(ap->a_vp, cur, VNOVAL);
1001 		else
1002 			mutex_exit(&un->un_lock);
1003 	}
1004 
1005 	return (error);
1006 }
1007 
1008 int
1009 union_ioctl(void *v)
1010 {
1011 	struct vop_ioctl_args /* {
1012 		struct vnode *a_vp;
1013 		int  a_command;
1014 		void *a_data;
1015 		int  a_fflag;
1016 		kauth_cred_t a_cred;
1017 	} */ *ap = v;
1018 	struct vnode *ovp = OTHERVP(ap->a_vp);
1019 
1020 	ap->a_vp = ovp;
1021 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1022 }
1023 
1024 int
1025 union_poll(void *v)
1026 {
1027 	struct vop_poll_args /* {
1028 		struct vnode *a_vp;
1029 		int a_events;
1030 	} */ *ap = v;
1031 	struct vnode *ovp = OTHERVP(ap->a_vp);
1032 
1033 	ap->a_vp = ovp;
1034 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1035 }
1036 
1037 int
1038 union_revoke(void *v)
1039 {
1040 	struct vop_revoke_args /* {
1041 		struct vnode *a_vp;
1042 		int a_flags;
1043 		struct proc *a_p;
1044 	} */ *ap = v;
1045 	struct vnode *vp = ap->a_vp;
1046 
1047 	if (UPPERVP(vp))
1048 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1049 	if (LOWERVP(vp))
1050 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1051 	vgone(vp);	/* XXXAD?? */
1052 	return (0);
1053 }
1054 
1055 int
1056 union_mmap(void *v)
1057 {
1058 	struct vop_mmap_args /* {
1059 		struct vnode *a_vp;
1060 		vm_prot_t a_prot;
1061 		kauth_cred_t a_cred;
1062 	} */ *ap = v;
1063 	struct vnode *ovp = OTHERVP(ap->a_vp);
1064 
1065 	ap->a_vp = ovp;
1066 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1067 }
1068 
1069 int
1070 union_fsync(void *v)
1071 {
1072 	struct vop_fsync_args /* {
1073 		struct vnode *a_vp;
1074 		kauth_cred_t a_cred;
1075 		int  a_flags;
1076 		off_t offhi;
1077 		off_t offlo;
1078 	} */ *ap = v;
1079 	int error = 0;
1080 	struct vnode *targetvp;
1081 
1082 	/*
1083 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1084 	 * bother syncing the underlying vnodes, since (a) they'll be
1085 	 * fsync'ed when reclaimed and (b) we could deadlock if
1086 	 * they're locked; otherwise, pass it through to the
1087 	 * underlying layer.
1088 	 */
1089 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1090 		error = spec_fsync(v);
1091 		if (error)
1092 			return error;
1093 	}
1094 
1095 	if (ap->a_flags & FSYNC_RECLAIM)
1096 		return 0;
1097 
1098 	targetvp = OTHERVP(ap->a_vp);
1099 	if (targetvp != NULLVP) {
1100 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1101 
1102 		if (dolock)
1103 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1104 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1105 			    ap->a_offlo, ap->a_offhi);
1106 		if (dolock)
1107 			VOP_UNLOCK(targetvp);
1108 	}
1109 
1110 	return (error);
1111 }
1112 
1113 int
1114 union_seek(void *v)
1115 {
1116 	struct vop_seek_args /* {
1117 		struct vnode *a_vp;
1118 		off_t  a_oldoff;
1119 		off_t  a_newoff;
1120 		kauth_cred_t a_cred;
1121 	} */ *ap = v;
1122 	struct vnode *ovp = OTHERVP(ap->a_vp);
1123 
1124 	ap->a_vp = ovp;
1125 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1126 }
1127 
1128 int
1129 union_remove(void *v)
1130 {
1131 	struct vop_remove_args /* {
1132 		struct vnode *a_dvp;
1133 		struct vnode *a_vp;
1134 		struct componentname *a_cnp;
1135 	} */ *ap = v;
1136 	int error;
1137 	struct union_node *dun = VTOUNION(ap->a_dvp);
1138 	struct union_node *un = VTOUNION(ap->a_vp);
1139 	struct componentname *cnp = ap->a_cnp;
1140 
1141 	if (dun->un_uppervp == NULLVP)
1142 		panic("union remove: null upper vnode");
1143 
1144 	if (un->un_uppervp != NULLVP) {
1145 		struct vnode *dvp = dun->un_uppervp;
1146 		struct vnode *vp = un->un_uppervp;
1147 
1148 		/*
1149 		 * Account for VOP_REMOVE to vrele dvp and vp.
1150 		 * Note: VOP_REMOVE will unlock dvp and vp.
1151 		 */
1152 		vref(dvp);
1153 		vref(vp);
1154 		if (union_dowhiteout(un, cnp->cn_cred))
1155 			cnp->cn_flags |= DOWHITEOUT;
1156 		error = VOP_REMOVE(dvp, vp, cnp);
1157 		if (!error)
1158 			union_removed_upper(un);
1159 		vrele(ap->a_dvp);
1160 		vrele(ap->a_vp);
1161 	} else {
1162 		error = union_mkwhiteout(
1163 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1164 			dun->un_uppervp, ap->a_cnp, un);
1165 		vput(ap->a_dvp);
1166 		vput(ap->a_vp);
1167 	}
1168 
1169 	return (error);
1170 }
1171 
1172 int
1173 union_link(void *v)
1174 {
1175 	struct vop_link_args /* {
1176 		struct vnode *a_dvp;
1177 		struct vnode *a_vp;
1178 		struct componentname *a_cnp;
1179 	} */ *ap = v;
1180 	int error = 0;
1181 	struct componentname *cnp = ap->a_cnp;
1182 	struct union_node *dun;
1183 	struct vnode *vp;
1184 	struct vnode *dvp;
1185 
1186 	dun = VTOUNION(ap->a_dvp);
1187 
1188 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1189 
1190 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1191 		vp = ap->a_vp;
1192 	} else {
1193 		struct union_node *un = VTOUNION(ap->a_vp);
1194 		if (un->un_uppervp == NULLVP) {
1195 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1196 
1197 			/*
1198 			 * Needs to be copied before we can link it.
1199 			 */
1200 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1201 			if (droplock)
1202 				VOP_UNLOCK(dun->un_uppervp);
1203 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1204 			if (droplock) {
1205 				vn_lock(dun->un_uppervp,
1206 				    LK_EXCLUSIVE | LK_RETRY);
1207 				/*
1208 				 * During copyup, we dropped the lock on the
1209 				 * dir and invalidated any saved namei lookup
1210 				 * state for the directory we'll be entering
1211 				 * the link in.  We need to re-run the lookup
1212 				 * in that directory to reset any state needed
1213 				 * for VOP_LINK.
1214 				 * Call relookup on the union-layer to reset
1215 				 * the state.
1216 				 */
1217 				vp  = NULLVP;
1218 				if (dun->un_uppervp == NULLVP)
1219 					 panic("union: null upperdvp?");
1220 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1221 				if (error) {
1222 					VOP_UNLOCK(ap->a_vp);
1223 					return EROFS;	/* ? */
1224 				}
1225 				if (vp != NULLVP) {
1226 					/*
1227 					 * The name we want to create has
1228 					 * mysteriously appeared (a race?)
1229 					 */
1230 					error = EEXIST;
1231 					VOP_UNLOCK(ap->a_vp);
1232 					vput(ap->a_dvp);
1233 					vput(vp);
1234 					return (error);
1235 				}
1236 			}
1237 			VOP_UNLOCK(ap->a_vp);
1238 		}
1239 		vp = un->un_uppervp;
1240 	}
1241 
1242 	dvp = dun->un_uppervp;
1243 	if (dvp == NULLVP)
1244 		error = EROFS;
1245 
1246 	if (error) {
1247 		vput(ap->a_dvp);
1248 		return (error);
1249 	}
1250 
1251 	/*
1252 	 * Account for VOP_LINK to vrele dvp.
1253 	 * Note: VOP_LINK will unlock dvp.
1254 	 */
1255 	vref(dvp);
1256 	error = VOP_LINK(dvp, vp, cnp);
1257 	vrele(ap->a_dvp);
1258 
1259 	return error;
1260 }
1261 
1262 int
1263 union_rename(void *v)
1264 {
1265 	struct vop_rename_args  /* {
1266 		struct vnode *a_fdvp;
1267 		struct vnode *a_fvp;
1268 		struct componentname *a_fcnp;
1269 		struct vnode *a_tdvp;
1270 		struct vnode *a_tvp;
1271 		struct componentname *a_tcnp;
1272 	} */ *ap = v;
1273 	int error;
1274 
1275 	struct vnode *fdvp = ap->a_fdvp;
1276 	struct vnode *fvp = ap->a_fvp;
1277 	struct vnode *tdvp = ap->a_tdvp;
1278 	struct vnode *tvp = ap->a_tvp;
1279 
1280 	/*
1281 	 * Account for VOP_RENAME to vrele all nodes.
1282 	 * Note: VOP_RENAME will unlock tdvp.
1283 	 */
1284 
1285 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1286 		struct union_node *un = VTOUNION(fdvp);
1287 		if (un->un_uppervp == NULLVP) {
1288 			/*
1289 			 * this should never happen in normal
1290 			 * operation but might if there was
1291 			 * a problem creating the top-level shadow
1292 			 * directory.
1293 			 */
1294 			error = EXDEV;
1295 			goto bad;
1296 		}
1297 
1298 		fdvp = un->un_uppervp;
1299 		vref(fdvp);
1300 	}
1301 
1302 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1303 		struct union_node *un = VTOUNION(fvp);
1304 		if (un->un_uppervp == NULLVP) {
1305 			/* XXX: should do a copyup */
1306 			error = EXDEV;
1307 			goto bad;
1308 		}
1309 
1310 		if (un->un_lowervp != NULLVP)
1311 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1312 
1313 		fvp = un->un_uppervp;
1314 		vref(fvp);
1315 	}
1316 
1317 	if (tdvp->v_op == union_vnodeop_p) {
1318 		struct union_node *un = VTOUNION(tdvp);
1319 		if (un->un_uppervp == NULLVP) {
1320 			/*
1321 			 * this should never happen in normal
1322 			 * operation but might if there was
1323 			 * a problem creating the top-level shadow
1324 			 * directory.
1325 			 */
1326 			error = EXDEV;
1327 			goto bad;
1328 		}
1329 
1330 		tdvp = un->un_uppervp;
1331 		vref(tdvp);
1332 	}
1333 
1334 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1335 		struct union_node *un = VTOUNION(tvp);
1336 
1337 		tvp = un->un_uppervp;
1338 		if (tvp != NULLVP) {
1339 			vref(tvp);
1340 		}
1341 	}
1342 
1343 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1344 	goto out;
1345 
1346 bad:
1347 	vput(tdvp);
1348 	if (tvp != NULLVP)
1349 		vput(tvp);
1350 	vrele(fdvp);
1351 	vrele(fvp);
1352 
1353 out:
1354 	if (fdvp != ap->a_fdvp) {
1355 		vrele(ap->a_fdvp);
1356 	}
1357 	if (fvp != ap->a_fvp) {
1358 		vrele(ap->a_fvp);
1359 	}
1360 	if (tdvp != ap->a_tdvp) {
1361 		vrele(ap->a_tdvp);
1362 	}
1363 	if (tvp != ap->a_tvp) {
1364 		vrele(ap->a_tvp);
1365 	}
1366 	return (error);
1367 }
1368 
1369 int
1370 union_mkdir(void *v)
1371 {
1372 	struct vop_mkdir_v3_args /* {
1373 		struct vnode *a_dvp;
1374 		struct vnode **a_vpp;
1375 		struct componentname *a_cnp;
1376 		struct vattr *a_vap;
1377 	} */ *ap = v;
1378 	struct union_node *un = VTOUNION(ap->a_dvp);
1379 	struct vnode *dvp = un->un_uppervp;
1380 	struct componentname *cnp = ap->a_cnp;
1381 
1382 	if (dvp != NULLVP) {
1383 		int error;
1384 		struct vnode *vp;
1385 
1386 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1387 		if (error) {
1388 			vrele(ap->a_dvp);
1389 			return (error);
1390 		}
1391 
1392 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1393 				NULLVP, cnp, vp, NULLVP, 1);
1394 		if (error)
1395 			vrele(vp);
1396 		return (error);
1397 	}
1398 
1399 	return (EROFS);
1400 }
1401 
1402 int
1403 union_rmdir(void *v)
1404 {
1405 	struct vop_rmdir_args /* {
1406 		struct vnode *a_dvp;
1407 		struct vnode *a_vp;
1408 		struct componentname *a_cnp;
1409 	} */ *ap = v;
1410 	int error;
1411 	struct union_node *dun = VTOUNION(ap->a_dvp);
1412 	struct union_node *un = VTOUNION(ap->a_vp);
1413 	struct componentname *cnp = ap->a_cnp;
1414 
1415 	if (dun->un_uppervp == NULLVP)
1416 		panic("union rmdir: null upper vnode");
1417 
1418 	error = union_check_rmdir(un, cnp->cn_cred);
1419 	if (error) {
1420 		vput(ap->a_dvp);
1421 		vput(ap->a_vp);
1422 		return error;
1423 	}
1424 
1425 	if (un->un_uppervp != NULLVP) {
1426 		struct vnode *dvp = dun->un_uppervp;
1427 		struct vnode *vp = un->un_uppervp;
1428 
1429 		/*
1430 		 * Account for VOP_RMDIR to vrele dvp and vp.
1431 		 * Note: VOP_RMDIR will unlock dvp and vp.
1432 		 */
1433 		vref(dvp);
1434 		vref(vp);
1435 		if (union_dowhiteout(un, cnp->cn_cred))
1436 			cnp->cn_flags |= DOWHITEOUT;
1437 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1438 		if (!error)
1439 			union_removed_upper(un);
1440 		vrele(ap->a_dvp);
1441 		vrele(ap->a_vp);
1442 	} else {
1443 		error = union_mkwhiteout(
1444 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1445 			dun->un_uppervp, ap->a_cnp, un);
1446 		vput(ap->a_dvp);
1447 		vput(ap->a_vp);
1448 	}
1449 
1450 	return (error);
1451 }
1452 
1453 int
1454 union_symlink(void *v)
1455 {
1456 	struct vop_symlink_v3_args /* {
1457 		struct vnode *a_dvp;
1458 		struct vnode **a_vpp;
1459 		struct componentname *a_cnp;
1460 		struct vattr *a_vap;
1461 		char *a_target;
1462 	} */ *ap = v;
1463 	struct union_node *un = VTOUNION(ap->a_dvp);
1464 	struct vnode *dvp = un->un_uppervp;
1465 	struct componentname *cnp = ap->a_cnp;
1466 
1467 	if (dvp != NULLVP) {
1468 		int error;
1469 
1470 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1471 				    ap->a_target);
1472 		return (error);
1473 	}
1474 
1475 	return (EROFS);
1476 }
1477 
1478 /*
1479  * union_readdir works in concert with getdirentries and
1480  * readdir(3) to provide a list of entries in the unioned
1481  * directories.  getdirentries is responsible for walking
1482  * down the union stack.  readdir(3) is responsible for
1483  * eliminating duplicate names from the returned data stream.
1484  */
1485 int
1486 union_readdir(void *v)
1487 {
1488 	struct vop_readdir_args /* {
1489 		struct vnodeop_desc *a_desc;
1490 		struct vnode *a_vp;
1491 		struct uio *a_uio;
1492 		kauth_cred_t a_cred;
1493 		int *a_eofflag;
1494 		u_long *a_cookies;
1495 		int a_ncookies;
1496 	} */ *ap = v;
1497 	struct union_node *un = VTOUNION(ap->a_vp);
1498 	struct vnode *uvp = un->un_uppervp;
1499 
1500 	if (uvp == NULLVP)
1501 		return (0);
1502 
1503 	ap->a_vp = uvp;
1504 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1505 }
1506 
1507 int
1508 union_readlink(void *v)
1509 {
1510 	struct vop_readlink_args /* {
1511 		struct vnode *a_vp;
1512 		struct uio *a_uio;
1513 		kauth_cred_t a_cred;
1514 	} */ *ap = v;
1515 	int error;
1516 	struct vnode *vp = OTHERVP(ap->a_vp);
1517 	int dolock = (vp == LOWERVP(ap->a_vp));
1518 
1519 	if (dolock)
1520 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1521 	ap->a_vp = vp;
1522 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1523 	if (dolock)
1524 		VOP_UNLOCK(vp);
1525 
1526 	return (error);
1527 }
1528 
1529 int
1530 union_abortop(void *v)
1531 {
1532 	struct vop_abortop_args /* {
1533 		struct vnode *a_dvp;
1534 		struct componentname *a_cnp;
1535 	} */ *ap = v;
1536 
1537 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1538 
1539 	ap->a_dvp = UPPERVP(ap->a_dvp);
1540 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1541 }
1542 
1543 int
1544 union_inactive(void *v)
1545 {
1546 	struct vop_inactive_args /* {
1547 		const struct vnodeop_desc *a_desc;
1548 		struct vnode *a_vp;
1549 		bool *a_recycle;
1550 	} */ *ap = v;
1551 	struct vnode *vp = ap->a_vp;
1552 	struct union_node *un = VTOUNION(vp);
1553 	struct vnode **vpp;
1554 
1555 	/*
1556 	 * Do nothing (and _don't_ bypass).
1557 	 * Wait to vrele lowervp until reclaim,
1558 	 * so that until then our union_node is in the
1559 	 * cache and reusable.
1560 	 *
1561 	 * NEEDSWORK: Someday, consider inactive'ing
1562 	 * the lowervp and then trying to reactivate it
1563 	 * with capabilities (v_id)
1564 	 * like they do in the name lookup cache code.
1565 	 * That's too much work for now.
1566 	 */
1567 
1568 	if (un->un_dircache != 0) {
1569 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1570 			vrele(*vpp);
1571 		free(un->un_dircache, M_TEMP);
1572 		un->un_dircache = 0;
1573 	}
1574 
1575 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1576 	VOP_UNLOCK(vp);
1577 
1578 	return (0);
1579 }
1580 
1581 int
1582 union_reclaim(void *v)
1583 {
1584 	struct vop_reclaim_args /* {
1585 		struct vnode *a_vp;
1586 	} */ *ap = v;
1587 
1588 	union_freevp(ap->a_vp);
1589 
1590 	return (0);
1591 }
1592 
1593 static int
1594 union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1595 {
1596 	struct vop_lock_args ap;
1597 
1598 	if (lockvp == vp) {
1599 		ap.a_vp = vp;
1600 		ap.a_flags = flags;
1601 		return genfs_lock(&ap);
1602 	} else
1603 		return VOP_LOCK(lockvp, flags);
1604 }
1605 
1606 static int
1607 union_unlock1(struct vnode *vp, struct vnode *lockvp)
1608 {
1609 	struct vop_unlock_args ap;
1610 
1611 	if (lockvp == vp) {
1612 		ap.a_vp = vp;
1613 		return genfs_unlock(&ap);
1614 	} else
1615 		return VOP_UNLOCK(lockvp);
1616 }
1617 
1618 int
1619 union_lock(void *v)
1620 {
1621 	struct vop_lock_args /* {
1622 		struct vnode *a_vp;
1623 		int a_flags;
1624 	} */ *ap = v;
1625 	struct vnode *vp = ap->a_vp, *lockvp;
1626 	struct union_node *un = VTOUNION(vp);
1627 	int flags = ap->a_flags;
1628 	int error;
1629 
1630 	if ((flags & LK_NOWAIT) != 0) {
1631 		if (!mutex_tryenter(&un->un_lock))
1632 			return EBUSY;
1633 		lockvp = LOCKVP(vp);
1634 		error = union_lock1(vp, lockvp, flags);
1635 		mutex_exit(&un->un_lock);
1636 		if (error)
1637 			return error;
1638 		if (mutex_tryenter(vp->v_interlock)) {
1639 			error = vdead_check(vp, VDEAD_NOWAIT);
1640 			mutex_exit(vp->v_interlock);
1641 		} else
1642 			error = EBUSY;
1643 		if (error)
1644 			union_unlock1(vp, lockvp);
1645 		return error;
1646 	}
1647 
1648 	mutex_enter(&un->un_lock);
1649 	for (;;) {
1650 		lockvp = LOCKVP(vp);
1651 		mutex_exit(&un->un_lock);
1652 		error = union_lock1(vp, lockvp, flags);
1653 		if (error != 0)
1654 			return error;
1655 		mutex_enter(&un->un_lock);
1656 		if (lockvp == LOCKVP(vp))
1657 			break;
1658 		union_unlock1(vp, lockvp);
1659 	}
1660 	mutex_exit(&un->un_lock);
1661 
1662 	mutex_enter(vp->v_interlock);
1663 	error = vdead_check(vp, VDEAD_NOWAIT);
1664 	if (error) {
1665 		union_unlock1(vp, lockvp);
1666 		error = vdead_check(vp, 0);
1667 		KASSERT(error == ENOENT);
1668 	}
1669 	mutex_exit(vp->v_interlock);
1670 	return error;
1671 }
1672 
1673 int
1674 union_unlock(void *v)
1675 {
1676 	struct vop_unlock_args /* {
1677 		struct vnode *a_vp;
1678 		int a_flags;
1679 	} */ *ap = v;
1680 	struct vnode *vp = ap->a_vp, *lockvp;
1681 
1682 	lockvp = LOCKVP(vp);
1683 	union_unlock1(vp, lockvp);
1684 
1685 	return 0;
1686 }
1687 
1688 int
1689 union_bmap(void *v)
1690 {
1691 	struct vop_bmap_args /* {
1692 		struct vnode *a_vp;
1693 		daddr_t  a_bn;
1694 		struct vnode **a_vpp;
1695 		daddr_t *a_bnp;
1696 		int *a_runp;
1697 	} */ *ap = v;
1698 	int error;
1699 	struct vnode *vp = OTHERVP(ap->a_vp);
1700 	int dolock = (vp == LOWERVP(ap->a_vp));
1701 
1702 	if (dolock)
1703 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1704 	ap->a_vp = vp;
1705 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1706 	if (dolock)
1707 		VOP_UNLOCK(vp);
1708 
1709 	return (error);
1710 }
1711 
1712 int
1713 union_print(void *v)
1714 {
1715 	struct vop_print_args /* {
1716 		struct vnode *a_vp;
1717 	} */ *ap = v;
1718 	struct vnode *vp = ap->a_vp;
1719 
1720 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1721 			vp, UPPERVP(vp), LOWERVP(vp));
1722 	if (UPPERVP(vp) != NULLVP)
1723 		vprint("union: upper", UPPERVP(vp));
1724 	if (LOWERVP(vp) != NULLVP)
1725 		vprint("union: lower", LOWERVP(vp));
1726 	if (VTOUNION(vp)->un_dircache) {
1727 		struct vnode **vpp;
1728 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1729 			vprint("dircache:", *vpp);
1730 	}
1731 
1732 	return (0);
1733 }
1734 
1735 int
1736 union_islocked(void *v)
1737 {
1738 	struct vop_islocked_args /* {
1739 		struct vnode *a_vp;
1740 	} */ *ap = v;
1741 	struct vnode *vp;
1742 	struct union_node *un;
1743 
1744 	un = VTOUNION(ap->a_vp);
1745 	mutex_enter(&un->un_lock);
1746 	vp = LOCKVP(ap->a_vp);
1747 	mutex_exit(&un->un_lock);
1748 
1749 	if (vp == ap->a_vp)
1750 		return genfs_islocked(ap);
1751 	else
1752 		return VOP_ISLOCKED(vp);
1753 }
1754 
1755 int
1756 union_pathconf(void *v)
1757 {
1758 	struct vop_pathconf_args /* {
1759 		struct vnode *a_vp;
1760 		int a_name;
1761 		int *a_retval;
1762 	} */ *ap = v;
1763 	int error;
1764 	struct vnode *vp = OTHERVP(ap->a_vp);
1765 	int dolock = (vp == LOWERVP(ap->a_vp));
1766 
1767 	if (dolock)
1768 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1769 	ap->a_vp = vp;
1770 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1771 	if (dolock)
1772 		VOP_UNLOCK(vp);
1773 
1774 	return (error);
1775 }
1776 
1777 int
1778 union_advlock(void *v)
1779 {
1780 	struct vop_advlock_args /* {
1781 		struct vnode *a_vp;
1782 		void *a_id;
1783 		int  a_op;
1784 		struct flock *a_fl;
1785 		int  a_flags;
1786 	} */ *ap = v;
1787 	struct vnode *ovp = OTHERVP(ap->a_vp);
1788 
1789 	ap->a_vp = ovp;
1790 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1791 }
1792 
1793 int
1794 union_strategy(void *v)
1795 {
1796 	struct vop_strategy_args /* {
1797 		struct vnode *a_vp;
1798 		struct buf *a_bp;
1799 	} */ *ap = v;
1800 	struct vnode *ovp = OTHERVP(ap->a_vp);
1801 	struct buf *bp = ap->a_bp;
1802 
1803 	KASSERT(ovp != NULLVP);
1804 	if (!NODE_IS_SPECIAL(ovp))
1805 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1806 
1807 	return (VOP_STRATEGY(ovp, bp));
1808 }
1809 
1810 int
1811 union_bwrite(void *v)
1812 {
1813 	struct vop_bwrite_args /* {
1814 		struct vnode *a_vp;
1815 		struct buf *a_bp;
1816 	} */ *ap = v;
1817 	struct vnode *ovp = OTHERVP(ap->a_vp);
1818 	struct buf *bp = ap->a_bp;
1819 
1820 	KASSERT(ovp != NULLVP);
1821 	if (!NODE_IS_SPECIAL(ovp))
1822 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1823 
1824 	return (VOP_BWRITE(ovp, bp));
1825 }
1826 
1827 int
1828 union_getpages(void *v)
1829 {
1830 	struct vop_getpages_args /* {
1831 		struct vnode *a_vp;
1832 		voff_t a_offset;
1833 		struct vm_page **a_m;
1834 		int *a_count;
1835 		int a_centeridx;
1836 		vm_prot_t a_access_type;
1837 		int a_advice;
1838 		int a_flags;
1839 	} */ *ap = v;
1840 	struct vnode *vp = ap->a_vp;
1841 
1842 	KASSERT(mutex_owned(vp->v_interlock));
1843 
1844 	if (ap->a_flags & PGO_LOCKED) {
1845 		return EBUSY;
1846 	}
1847 	ap->a_vp = OTHERVP(vp);
1848 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1849 
1850 	/* Just pass the request on to the underlying layer. */
1851 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1852 }
1853 
1854 int
1855 union_putpages(void *v)
1856 {
1857 	struct vop_putpages_args /* {
1858 		struct vnode *a_vp;
1859 		voff_t a_offlo;
1860 		voff_t a_offhi;
1861 		int a_flags;
1862 	} */ *ap = v;
1863 	struct vnode *vp = ap->a_vp;
1864 
1865 	KASSERT(mutex_owned(vp->v_interlock));
1866 
1867 	ap->a_vp = OTHERVP(vp);
1868 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1869 
1870 	if (ap->a_flags & PGO_RECLAIM) {
1871 		mutex_exit(vp->v_interlock);
1872 		return 0;
1873 	}
1874 
1875 	/* Just pass the request on to the underlying layer. */
1876 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1877 }
1878 
1879 int
1880 union_kqfilter(void *v)
1881 {
1882 	struct vop_kqfilter_args /* {
1883 		struct vnode	*a_vp;
1884 		struct knote	*a_kn;
1885 	} */ *ap = v;
1886 	int error;
1887 
1888 	/*
1889 	 * We watch either the upper layer file (if it already exists),
1890 	 * or the lower layer one. If there is lower layer file only
1891 	 * at this moment, we will keep watching that lower layer file
1892 	 * even if upper layer file would be created later on.
1893 	 */
1894 	if (UPPERVP(ap->a_vp))
1895 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1896 	else if (LOWERVP(ap->a_vp))
1897 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1898 	else {
1899 		/* panic? */
1900 		error = EOPNOTSUPP;
1901 	}
1902 
1903 	return (error);
1904 }
1905