xref: /netbsd-src/sys/fs/union/union_vnops.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: union_vnops.c,v 1.47 2011/10/18 09:22:53 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.47 2011/10/18 09:22:53 hannken Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95 
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136 
137 static void union_fixup(struct union_node *);
138 static int union_lookup1(struct vnode *, struct vnode **,
139 			      struct vnode **, struct componentname *);
140 
141 
142 /*
143  * Global vfs data structures
144  */
145 int (**union_vnodeop_p)(void *);
146 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147 	{ &vop_default_desc, vn_default_error },
148 	{ &vop_lookup_desc, union_lookup },		/* lookup */
149 	{ &vop_create_desc, union_create },		/* create */
150 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
151 	{ &vop_mknod_desc, union_mknod },		/* mknod */
152 	{ &vop_open_desc, union_open },			/* open */
153 	{ &vop_close_desc, union_close },		/* close */
154 	{ &vop_access_desc, union_access },		/* access */
155 	{ &vop_getattr_desc, union_getattr },		/* getattr */
156 	{ &vop_setattr_desc, union_setattr },		/* setattr */
157 	{ &vop_read_desc, union_read },			/* read */
158 	{ &vop_write_desc, union_write },		/* write */
159 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
160 	{ &vop_poll_desc, union_poll },			/* select */
161 	{ &vop_revoke_desc, union_revoke },		/* revoke */
162 	{ &vop_mmap_desc, union_mmap },			/* mmap */
163 	{ &vop_fsync_desc, union_fsync },		/* fsync */
164 	{ &vop_seek_desc, union_seek },			/* seek */
165 	{ &vop_remove_desc, union_remove },		/* remove */
166 	{ &vop_link_desc, union_link },			/* link */
167 	{ &vop_rename_desc, union_rename },		/* rename */
168 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
169 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
170 	{ &vop_symlink_desc, union_symlink },		/* symlink */
171 	{ &vop_readdir_desc, union_readdir },		/* readdir */
172 	{ &vop_readlink_desc, union_readlink },		/* readlink */
173 	{ &vop_abortop_desc, union_abortop },		/* abortop */
174 	{ &vop_inactive_desc, union_inactive },		/* inactive */
175 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
176 	{ &vop_lock_desc, union_lock },			/* lock */
177 	{ &vop_unlock_desc, union_unlock },		/* unlock */
178 	{ &vop_bmap_desc, union_bmap },			/* bmap */
179 	{ &vop_strategy_desc, union_strategy },		/* strategy */
180 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
181 	{ &vop_print_desc, union_print },		/* print */
182 	{ &vop_islocked_desc, union_islocked },		/* islocked */
183 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
184 	{ &vop_advlock_desc, union_advlock },		/* advlock */
185 	{ &vop_getpages_desc, union_getpages },		/* getpages */
186 	{ &vop_putpages_desc, union_putpages },		/* putpages */
187 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
188 	{ NULL, NULL }
189 };
190 const struct vnodeopv_desc union_vnodeop_opv_desc =
191 	{ &union_vnodeop_p, union_vnodeop_entries };
192 
193 #define FIXUP(un) { \
194 	if (((un)->un_flags & UN_ULOCK) == 0) { \
195 		union_fixup(un); \
196 	} \
197 }
198 #define NODE_IS_SPECIAL(vp) \
199 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
200 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
201 
202 static void
203 union_fixup(struct union_node *un)
204 {
205 
206 	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
207 	un->un_flags |= UN_ULOCK;
208 }
209 
210 static int
211 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
212 	struct componentname *cnp)
213 {
214 	int error;
215 	struct vnode *tdvp;
216 	struct vnode *dvp;
217 	struct mount *mp;
218 
219 	dvp = *dvpp;
220 
221 	/*
222 	 * If stepping up the directory tree, check for going
223 	 * back across the mount point, in which case do what
224 	 * lookup would do by stepping back down the mount
225 	 * hierarchy.
226 	 */
227 	if (cnp->cn_flags & ISDOTDOT) {
228 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
229 			/*
230 			 * Don't do the NOCROSSMOUNT check
231 			 * at this level.  By definition,
232 			 * union fs deals with namespaces, not
233 			 * filesystems.
234 			 */
235 			tdvp = dvp;
236 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
237 			VOP_UNLOCK(tdvp);
238 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239 		}
240 	}
241 
242         error = VOP_LOOKUP(dvp, &tdvp, cnp);
243 	if (error)
244 		return (error);
245 
246 	dvp = tdvp;
247 
248 	/*
249 	 * Lastly check if the current node is a mount point in
250 	 * which case walk up the mount hierarchy making sure not to
251 	 * bump into the root of the mount tree (ie. dvp != udvp).
252 	 */
253 	while (dvp != udvp && (dvp->v_type == VDIR) &&
254 	       (mp = dvp->v_mountedhere)) {
255 		if (vfs_busy(mp, NULL))
256 			continue;
257 		vput(dvp);
258 		error = VFS_ROOT(mp, &tdvp);
259 		vfs_unbusy(mp, false, NULL);
260 		if (error) {
261 			return (error);
262 		}
263 		dvp = tdvp;
264 	}
265 
266 	*vpp = dvp;
267 	return (0);
268 }
269 
270 int
271 union_lookup(void *v)
272 {
273 	struct vop_lookup_args /* {
274 		struct vnodeop_desc *a_desc;
275 		struct vnode *a_dvp;
276 		struct vnode **a_vpp;
277 		struct componentname *a_cnp;
278 	} */ *ap = v;
279 	int error;
280 	int uerror, lerror;
281 	struct vnode *uppervp, *lowervp;
282 	struct vnode *upperdvp, *lowerdvp;
283 	struct vnode *dvp = ap->a_dvp;
284 	struct union_node *dun = VTOUNION(dvp);
285 	struct componentname *cnp = ap->a_cnp;
286 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
287 	kauth_cred_t saved_cred = NULL;
288 	int iswhiteout;
289 	struct vattr va;
290 
291 #ifdef notyet
292 	if (cnp->cn_namelen == 3 &&
293 			cnp->cn_nameptr[2] == '.' &&
294 			cnp->cn_nameptr[1] == '.' &&
295 			cnp->cn_nameptr[0] == '.') {
296 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
297 		if (dvp == NULLVP)
298 			return (ENOENT);
299 		vref(dvp);
300 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
301 		return (0);
302 	}
303 #endif
304 
305 	if ((cnp->cn_flags & ISLASTCN) &&
306 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
307 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
308 		return (EROFS);
309 
310 start:
311 	upperdvp = dun->un_uppervp;
312 	lowerdvp = dun->un_lowervp;
313 	uppervp = NULLVP;
314 	lowervp = NULLVP;
315 	iswhiteout = 0;
316 
317 	/*
318 	 * do the lookup in the upper level.
319 	 * if that level comsumes additional pathnames,
320 	 * then assume that something special is going
321 	 * on and just return that vnode.
322 	 */
323 	if (upperdvp != NULLVP) {
324 		FIXUP(dun);
325 		/*
326 		 * If we're doing `..' in the underlying filesystem,
327 		 * we must drop our lock on the union node before
328 		 * going up the tree in the lower file system--if we block
329 		 * on the lowervp lock, and that's held by someone else
330 		 * coming down the tree and who's waiting for our lock,
331 		 * we would be hosed.
332 		 */
333 		if (cnp->cn_flags & ISDOTDOT) {
334 			/* retain lock on underlying VP */
335 			dun->un_flags |= UN_KLOCK;
336 			VOP_UNLOCK(dvp);
337 		}
338 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
339 					&uppervp, cnp);
340 
341 		if (cnp->cn_flags & ISDOTDOT) {
342 			if (dun->un_uppervp == upperdvp) {
343 				/*
344 				 * we got the underlying bugger back locked...
345 				 * now take back the union node lock.  Since we
346 				 *  hold the uppervp lock, we can diddle union
347 				 * locking flags at will. :)
348 				 */
349 				dun->un_flags |= UN_ULOCK;
350 			}
351 			/*
352 			 * if upperdvp got swapped out, it means we did
353 			 * some mount point magic, and we do not have
354 			 * dun->un_uppervp locked currently--so we get it
355 			 * locked here (don't set the UN_ULOCK flag).
356 			 */
357 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
358 		}
359 		if (cnp->cn_consume != 0) {
360 			*ap->a_vpp = uppervp;
361 			return (uerror);
362 		}
363 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
364 			if (cnp->cn_flags & ISWHITEOUT) {
365 				iswhiteout = 1;
366 			} else if (lowerdvp != NULLVP) {
367 				lerror = VOP_GETATTR(upperdvp, &va,
368 					cnp->cn_cred);
369 				if (lerror == 0 && (va.va_flags & OPAQUE))
370 					iswhiteout = 1;
371 			}
372 		}
373 	} else {
374 		uerror = ENOENT;
375 	}
376 
377 	/*
378 	 * in a similar way to the upper layer, do the lookup
379 	 * in the lower layer.   this time, if there is some
380 	 * component magic going on, then vput whatever we got
381 	 * back from the upper layer and return the lower vnode
382 	 * instead.
383 	 */
384 	if (lowerdvp != NULLVP && !iswhiteout) {
385 		int nameiop;
386 
387 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
388 
389 		/*
390 		 * Only do a LOOKUP on the bottom node, since
391 		 * we won't be making changes to it anyway.
392 		 */
393 		nameiop = cnp->cn_nameiop;
394 		cnp->cn_nameiop = LOOKUP;
395 		if (um->um_op == UNMNT_BELOW) {
396 			saved_cred = cnp->cn_cred;
397 			cnp->cn_cred = um->um_cred;
398 		}
399 
400 		/*
401 		 * we shouldn't have to worry about locking interactions
402 		 * between the lower layer and our union layer (w.r.t.
403 		 * `..' processing) because we don't futz with lowervp
404 		 * locks in the union-node instantiation code path.
405 		 */
406 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
407 				&lowervp, cnp);
408 		if (um->um_op == UNMNT_BELOW)
409 			cnp->cn_cred = saved_cred;
410 		cnp->cn_nameiop = nameiop;
411 
412 		if (lowervp != lowerdvp)
413 			VOP_UNLOCK(lowerdvp);
414 
415 		if (cnp->cn_consume != 0) {
416 			if (uppervp != NULLVP) {
417 				if (uppervp == upperdvp)
418 					vrele(uppervp);
419 				else
420 					vput(uppervp);
421 				uppervp = NULLVP;
422 			}
423 			*ap->a_vpp = lowervp;
424 			return (lerror);
425 		}
426 	} else {
427 		lerror = ENOENT;
428 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
429 			lowervp = LOWERVP(dun->un_pvp);
430 			if (lowervp != NULLVP) {
431 				vref(lowervp);
432 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
433 				lerror = 0;
434 			}
435 		}
436 	}
437 
438 	/*
439 	 * EJUSTRETURN is used by underlying filesystems to indicate that
440 	 * a directory modification op was started successfully.
441 	 * This will only happen in the upper layer, since
442 	 * the lower layer only does LOOKUPs.
443 	 * If this union is mounted read-only, bounce it now.
444 	 */
445 
446 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
447 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
448 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
449 		uerror = EROFS;
450 
451 	/*
452 	 * at this point, we have uerror and lerror indicating
453 	 * possible errors with the lookups in the upper and lower
454 	 * layers.  additionally, uppervp and lowervp are (locked)
455 	 * references to existing vnodes in the upper and lower layers.
456 	 *
457 	 * there are now three cases to consider.
458 	 * 1. if both layers returned an error, then return whatever
459 	 *    error the upper layer generated.
460 	 *
461 	 * 2. if the top layer failed and the bottom layer succeeded
462 	 *    then two subcases occur.
463 	 *    a.  the bottom vnode is not a directory, in which
464 	 *	  case just return a new union vnode referencing
465 	 *	  an empty top layer and the existing bottom layer.
466 	 *    b.  the bottom vnode is a directory, in which case
467 	 *	  create a new directory in the top-level and
468 	 *	  continue as in case 3.
469 	 *
470 	 * 3. if the top layer succeeded then return a new union
471 	 *    vnode referencing whatever the new top layer and
472 	 *    whatever the bottom layer returned.
473 	 */
474 
475 	*ap->a_vpp = NULLVP;
476 
477 
478 	/* case 1. */
479 	if ((uerror != 0) && (lerror != 0)) {
480 		return (uerror);
481 	}
482 
483 	/* case 2. */
484 	if (uerror != 0 /* && (lerror == 0) */ ) {
485 		if (lowervp->v_type == VDIR) { /* case 2b. */
486 			/*
487 			 * We may be racing another process to make the
488 			 * upper-level shadow directory.  Be careful with
489 			 * locks/etc!
490 			 * If we have to create a shadow directory and want
491 			 * to commit the node we have to restart the lookup
492 			 * to get the componentname right.
493 			 */
494 			if (upperdvp) {
495 				dun->un_flags &= ~UN_ULOCK;
496 				VOP_UNLOCK(upperdvp);
497 				uerror = union_mkshadow(um, upperdvp, cnp,
498 				    &uppervp);
499 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
500 				dun->un_flags |= UN_ULOCK;
501 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
502 					vput(uppervp);
503 					if (lowervp != NULLVP)
504 						vput(lowervp);
505 					goto start;
506 				}
507 			}
508 			if (uerror) {
509 				if (lowervp != NULLVP) {
510 					vput(lowervp);
511 					lowervp = NULLVP;
512 				}
513 				return (uerror);
514 			}
515 		}
516 	}
517 
518 	if (lowervp != NULLVP)
519 		VOP_UNLOCK(lowervp);
520 
521 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
522 			      uppervp, lowervp, 1);
523 
524 	if (error) {
525 		if (uppervp != NULLVP)
526 			vput(uppervp);
527 		if (lowervp != NULLVP)
528 			vrele(lowervp);
529 	}
530 
531 	return (error);
532 }
533 
534 int
535 union_create(void *v)
536 {
537 	struct vop_create_args /* {
538 		struct vnode *a_dvp;
539 		struct vnode **a_vpp;
540 		struct componentname *a_cnp;
541 		struct vattr *a_vap;
542 	} */ *ap = v;
543 	struct union_node *un = VTOUNION(ap->a_dvp);
544 	struct vnode *dvp = un->un_uppervp;
545 	struct componentname *cnp = ap->a_cnp;
546 
547 	if (dvp != NULLVP) {
548 		int error;
549 		struct vnode *vp;
550 		struct mount *mp;
551 
552 		FIXUP(un);
553 
554 		vref(dvp);
555 		un->un_flags |= UN_KLOCK;
556 		mp = ap->a_dvp->v_mount;
557 		vput(ap->a_dvp);
558 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
559 		if (error)
560 			return (error);
561 
562 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
563 				NULLVP, 1);
564 		if (error)
565 			vput(vp);
566 		return (error);
567 	}
568 
569 	vput(ap->a_dvp);
570 	return (EROFS);
571 }
572 
573 int
574 union_whiteout(void *v)
575 {
576 	struct vop_whiteout_args /* {
577 		struct vnode *a_dvp;
578 		struct componentname *a_cnp;
579 		int a_flags;
580 	} */ *ap = v;
581 	struct union_node *un = VTOUNION(ap->a_dvp);
582 	struct componentname *cnp = ap->a_cnp;
583 
584 	if (un->un_uppervp == NULLVP)
585 		return (EOPNOTSUPP);
586 
587 	FIXUP(un);
588 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
589 }
590 
591 int
592 union_mknod(void *v)
593 {
594 	struct vop_mknod_args /* {
595 		struct vnode *a_dvp;
596 		struct vnode **a_vpp;
597 		struct componentname *a_cnp;
598 		struct vattr *a_vap;
599 	} */ *ap = v;
600 	struct union_node *un = VTOUNION(ap->a_dvp);
601 	struct vnode *dvp = un->un_uppervp;
602 	struct componentname *cnp = ap->a_cnp;
603 
604 	if (dvp != NULLVP) {
605 		int error;
606 		struct vnode *vp;
607 		struct mount *mp;
608 
609 		FIXUP(un);
610 
611 		vref(dvp);
612 		un->un_flags |= UN_KLOCK;
613 		mp = ap->a_dvp->v_mount;
614 		vput(ap->a_dvp);
615 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
616 		if (error)
617 			return (error);
618 
619 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
620 				      cnp, vp, NULLVP, 1);
621 		if (error)
622 		    vput(vp);
623 		return (error);
624 	}
625 
626 	vput(ap->a_dvp);
627 	return (EROFS);
628 }
629 
630 int
631 union_open(void *v)
632 {
633 	struct vop_open_args /* {
634 		struct vnodeop_desc *a_desc;
635 		struct vnode *a_vp;
636 		int a_mode;
637 		kauth_cred_t a_cred;
638 	} */ *ap = v;
639 	struct union_node *un = VTOUNION(ap->a_vp);
640 	struct vnode *tvp;
641 	int mode = ap->a_mode;
642 	kauth_cred_t cred = ap->a_cred;
643 	struct lwp *l = curlwp;
644 	int error;
645 
646 	/*
647 	 * If there is an existing upper vp then simply open that.
648 	 */
649 	tvp = un->un_uppervp;
650 	if (tvp == NULLVP) {
651 		/*
652 		 * If the lower vnode is being opened for writing, then
653 		 * copy the file contents to the upper vnode and open that,
654 		 * otherwise can simply open the lower vnode.
655 		 */
656 		tvp = un->un_lowervp;
657 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
658 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
659 			if (error == 0)
660 				error = VOP_OPEN(un->un_uppervp, mode, cred);
661 			return (error);
662 		}
663 
664 		/*
665 		 * Just open the lower vnode, but check for nodev mount flag
666 		 */
667 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
668 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
669 			return ENXIO;
670 		un->un_openl++;
671 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
672 		error = VOP_OPEN(tvp, mode, cred);
673 		VOP_UNLOCK(tvp);
674 
675 		return (error);
676 	}
677 	/*
678 	 * Just open the upper vnode, checking for nodev mount flag first
679 	 */
680 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
681 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
682 		return ENXIO;
683 
684 	FIXUP(un);
685 
686 	error = VOP_OPEN(tvp, mode, cred);
687 
688 	return (error);
689 }
690 
691 int
692 union_close(void *v)
693 {
694 	struct vop_close_args /* {
695 		struct vnode *a_vp;
696 		int  a_fflag;
697 		kauth_cred_t a_cred;
698 	} */ *ap = v;
699 	struct union_node *un = VTOUNION(ap->a_vp);
700 	struct vnode *vp;
701 	int error;
702 	bool do_lock;
703 
704 	vp = un->un_uppervp;
705 	if (vp != NULLVP) {
706 		do_lock = false;
707 	} else {
708 		KASSERT(un->un_openl > 0);
709 		--un->un_openl;
710 		vp = un->un_lowervp;
711 		do_lock = true;
712 	}
713 
714 	KASSERT(vp != NULLVP);
715 	ap->a_vp = vp;
716 	if (do_lock)
717 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
718 	error = VCALL(vp, VOFFSET(vop_close), ap);
719 	if (do_lock)
720 		VOP_UNLOCK(vp);
721 
722 	return error;
723 }
724 
725 /*
726  * Check access permission on the union vnode.
727  * The access check being enforced is to check
728  * against both the underlying vnode, and any
729  * copied vnode.  This ensures that no additional
730  * file permissions are given away simply because
731  * the user caused an implicit file copy.
732  */
733 int
734 union_access(void *v)
735 {
736 	struct vop_access_args /* {
737 		struct vnodeop_desc *a_desc;
738 		struct vnode *a_vp;
739 		int a_mode;
740 		kauth_cred_t a_cred;
741 	} */ *ap = v;
742 	struct vnode *vp = ap->a_vp;
743 	struct union_node *un = VTOUNION(vp);
744 	int error = EACCES;
745 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
746 
747 	/*
748 	 * Disallow write attempts on read-only file systems;
749 	 * unless the file is a socket, fifo, or a block or
750 	 * character device resident on the file system.
751 	 */
752 	if (ap->a_mode & VWRITE) {
753 		switch (vp->v_type) {
754 		case VDIR:
755 		case VLNK:
756 		case VREG:
757 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
758 				return (EROFS);
759 			break;
760 		case VBAD:
761 		case VBLK:
762 		case VCHR:
763 		case VSOCK:
764 		case VFIFO:
765 		case VNON:
766 		default:
767 			break;
768 		}
769 	}
770 
771 
772 	if ((vp = un->un_uppervp) != NULLVP) {
773 		FIXUP(un);
774 		ap->a_vp = vp;
775 		return (VCALL(vp, VOFFSET(vop_access), ap));
776 	}
777 
778 	if ((vp = un->un_lowervp) != NULLVP) {
779 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
780 		ap->a_vp = vp;
781 		error = VCALL(vp, VOFFSET(vop_access), ap);
782 		if (error == 0) {
783 			if (um->um_op == UNMNT_BELOW) {
784 				ap->a_cred = um->um_cred;
785 				error = VCALL(vp, VOFFSET(vop_access), ap);
786 			}
787 		}
788 		VOP_UNLOCK(vp);
789 		if (error)
790 			return (error);
791 	}
792 
793 	return (error);
794 }
795 
796 /*
797  * We handle getattr only to change the fsid and
798  * track object sizes
799  */
800 int
801 union_getattr(void *v)
802 {
803 	struct vop_getattr_args /* {
804 		struct vnode *a_vp;
805 		struct vattr *a_vap;
806 		kauth_cred_t a_cred;
807 	} */ *ap = v;
808 	int error;
809 	struct union_node *un = VTOUNION(ap->a_vp);
810 	struct vnode *vp = un->un_uppervp;
811 	struct vattr *vap;
812 	struct vattr va;
813 
814 
815 	/*
816 	 * Some programs walk the filesystem hierarchy by counting
817 	 * links to directories to avoid stat'ing all the time.
818 	 * This means the link count on directories needs to be "correct".
819 	 * The only way to do that is to call getattr on both layers
820 	 * and fix up the link count.  The link count will not necessarily
821 	 * be accurate but will be large enough to defeat the tree walkers.
822 	 *
823 	 * To make life more interesting, some filesystems don't keep
824 	 * track of link counts in the expected way, and return a
825 	 * link count of `1' for those directories; if either of the
826 	 * component directories returns a link count of `1', we return a 1.
827 	 */
828 
829 	vap = ap->a_vap;
830 
831 	vp = un->un_uppervp;
832 	if (vp != NULLVP) {
833 		if (un->un_flags & UN_LOCKED)
834 			FIXUP(un);
835 
836 		error = VOP_GETATTR(vp, vap, ap->a_cred);
837 		if (error)
838 			return (error);
839 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
840 	}
841 
842 	if (vp == NULLVP) {
843 		vp = un->un_lowervp;
844 	} else if (vp->v_type == VDIR) {
845 		vp = un->un_lowervp;
846 		if (vp != NULLVP)
847 			vap = &va;
848 	} else {
849 		vp = NULLVP;
850 	}
851 
852 	if (vp != NULLVP) {
853 		if (vp == un->un_lowervp)
854 			vn_lock(vp, LK_SHARED | LK_RETRY);
855 		error = VOP_GETATTR(vp, vap, ap->a_cred);
856 		if (vp == un->un_lowervp)
857 			VOP_UNLOCK(vp);
858 		if (error)
859 			return (error);
860 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
861 	}
862 
863 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
864 		/*
865 		 * Link count manipulation:
866 		 *	- If both return "2", return 2 (no subdirs)
867 		 *	- If one or the other return "1", return "1" (ENOCLUE)
868 		 */
869 		if ((ap->a_vap->va_nlink == 2) &&
870 		    (vap->va_nlink == 2))
871 			;
872 		else if (ap->a_vap->va_nlink != 1) {
873 			if (vap->va_nlink == 1)
874 				ap->a_vap->va_nlink = 1;
875 			else
876 				ap->a_vap->va_nlink += vap->va_nlink;
877 		}
878 	}
879 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
880 	return (0);
881 }
882 
883 int
884 union_setattr(void *v)
885 {
886 	struct vop_setattr_args /* {
887 		struct vnode *a_vp;
888 		struct vattr *a_vap;
889 		kauth_cred_t a_cred;
890 	} */ *ap = v;
891 	struct vattr *vap = ap->a_vap;
892 	struct vnode *vp = ap->a_vp;
893 	struct union_node *un = VTOUNION(vp);
894 	bool size_only;		/* All but va_size are VNOVAL. */
895 	int error;
896 
897 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
898 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
899 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
900 
901 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
902 		return (EROFS);
903 	if (vap->va_size != VNOVAL) {
904  		switch (vp->v_type) {
905  		case VDIR:
906  			return (EISDIR);
907  		case VCHR:
908  		case VBLK:
909  		case VSOCK:
910  		case VFIFO:
911 			break;
912 		case VREG:
913 		case VLNK:
914  		default:
915 			/*
916 			 * Disallow write attempts if the filesystem is
917 			 * mounted read-only.
918 			 */
919 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
920 				return (EROFS);
921 		}
922 	}
923 
924 	/*
925 	 * Handle case of truncating lower object to zero size,
926 	 * by creating a zero length upper object.  This is to
927 	 * handle the case of open with O_TRUNC and O_CREAT.
928 	 */
929 	if ((un->un_uppervp == NULLVP) &&
930 	    /* assert(un->un_lowervp != NULLVP) */
931 	    (un->un_lowervp->v_type == VREG)) {
932 		error = union_copyup(un, (vap->va_size != 0),
933 						ap->a_cred, curlwp);
934 		if (error)
935 			return (error);
936 	}
937 
938 	/*
939 	 * Try to set attributes in upper layer, ignore size change to zero
940 	 * for devices to handle O_TRUNC and return read-only filesystem error
941 	 * otherwise.
942 	 */
943 	if (un->un_uppervp != NULLVP) {
944 		FIXUP(un);
945 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
946 		if ((error == 0) && (vap->va_size != VNOVAL))
947 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
948 	} else {
949 		KASSERT(un->un_lowervp != NULLVP);
950 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
951 			if (size_only &&
952 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
953 				error = 0;
954 			else
955 				error = EROFS;
956 		} else {
957 			error = EROFS;
958 		}
959 	}
960 
961 	return (error);
962 }
963 
964 int
965 union_read(void *v)
966 {
967 	struct vop_read_args /* {
968 		struct vnode *a_vp;
969 		struct uio *a_uio;
970 		int  a_ioflag;
971 		kauth_cred_t a_cred;
972 	} */ *ap = v;
973 	int error;
974 	struct vnode *vp = OTHERVP(ap->a_vp);
975 	int dolock = (vp == LOWERVP(ap->a_vp));
976 
977 	if (dolock)
978 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
979 	else
980 		FIXUP(VTOUNION(ap->a_vp));
981 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
982 	if (dolock)
983 		VOP_UNLOCK(vp);
984 
985 	/*
986 	 * XXX
987 	 * perhaps the size of the underlying object has changed under
988 	 * our feet.  take advantage of the offset information present
989 	 * in the uio structure.
990 	 */
991 	if (error == 0) {
992 		struct union_node *un = VTOUNION(ap->a_vp);
993 		off_t cur = ap->a_uio->uio_offset;
994 
995 		if (vp == un->un_uppervp) {
996 			if (cur > un->un_uppersz)
997 				union_newsize(ap->a_vp, cur, VNOVAL);
998 		} else {
999 			if (cur > un->un_lowersz)
1000 				union_newsize(ap->a_vp, VNOVAL, cur);
1001 		}
1002 	}
1003 
1004 	return (error);
1005 }
1006 
1007 int
1008 union_write(void *v)
1009 {
1010 	struct vop_read_args /* {
1011 		struct vnode *a_vp;
1012 		struct uio *a_uio;
1013 		int  a_ioflag;
1014 		kauth_cred_t a_cred;
1015 	} */ *ap = v;
1016 	int error;
1017 	struct vnode *vp;
1018 	struct union_node *un = VTOUNION(ap->a_vp);
1019 
1020 	vp = UPPERVP(ap->a_vp);
1021 	if (vp == NULLVP) {
1022 		vp = LOWERVP(ap->a_vp);
1023 		if (NODE_IS_SPECIAL(vp)) {
1024 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1025 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1026 			    ap->a_cred);
1027 			VOP_UNLOCK(vp);
1028 			return error;
1029 		}
1030 		panic("union: missing upper layer in write");
1031 	}
1032 
1033 	FIXUP(un);
1034 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1035 
1036 	/*
1037 	 * the size of the underlying object may be changed by the
1038 	 * write.
1039 	 */
1040 	if (error == 0) {
1041 		off_t cur = ap->a_uio->uio_offset;
1042 
1043 		if (cur > un->un_uppersz)
1044 			union_newsize(ap->a_vp, cur, VNOVAL);
1045 	}
1046 
1047 	return (error);
1048 }
1049 
1050 int
1051 union_ioctl(void *v)
1052 {
1053 	struct vop_ioctl_args /* {
1054 		struct vnode *a_vp;
1055 		int  a_command;
1056 		void *a_data;
1057 		int  a_fflag;
1058 		kauth_cred_t a_cred;
1059 	} */ *ap = v;
1060 	struct vnode *ovp = OTHERVP(ap->a_vp);
1061 
1062 	ap->a_vp = ovp;
1063 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1064 }
1065 
1066 int
1067 union_poll(void *v)
1068 {
1069 	struct vop_poll_args /* {
1070 		struct vnode *a_vp;
1071 		int a_events;
1072 	} */ *ap = v;
1073 	struct vnode *ovp = OTHERVP(ap->a_vp);
1074 
1075 	ap->a_vp = ovp;
1076 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1077 }
1078 
1079 int
1080 union_revoke(void *v)
1081 {
1082 	struct vop_revoke_args /* {
1083 		struct vnode *a_vp;
1084 		int a_flags;
1085 		struct proc *a_p;
1086 	} */ *ap = v;
1087 	struct vnode *vp = ap->a_vp;
1088 
1089 	if (UPPERVP(vp))
1090 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1091 	if (LOWERVP(vp))
1092 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1093 	vgone(vp);	/* XXXAD?? */
1094 	return (0);
1095 }
1096 
1097 int
1098 union_mmap(void *v)
1099 {
1100 	struct vop_mmap_args /* {
1101 		struct vnode *a_vp;
1102 		vm_prot_t a_prot;
1103 		kauth_cred_t a_cred;
1104 	} */ *ap = v;
1105 	struct vnode *ovp = OTHERVP(ap->a_vp);
1106 
1107 	ap->a_vp = ovp;
1108 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1109 }
1110 
1111 int
1112 union_fsync(void *v)
1113 {
1114 	struct vop_fsync_args /* {
1115 		struct vnode *a_vp;
1116 		kauth_cred_t a_cred;
1117 		int  a_flags;
1118 		off_t offhi;
1119 		off_t offlo;
1120 	} */ *ap = v;
1121 	int error = 0;
1122 	struct vnode *targetvp;
1123 
1124 	/*
1125 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1126 	 * bother syncing the underlying vnodes, since (a) they'll be
1127 	 * fsync'ed when reclaimed and (b) we could deadlock if
1128 	 * they're locked; otherwise, pass it through to the
1129 	 * underlying layer.
1130 	 */
1131 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1132 		error = spec_fsync(v);
1133 		if (error)
1134 			return error;
1135 	}
1136 
1137 	if (ap->a_flags & FSYNC_RECLAIM)
1138 		return 0;
1139 
1140 	targetvp = OTHERVP(ap->a_vp);
1141 	if (targetvp != NULLVP) {
1142 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1143 
1144 		if (dolock)
1145 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1146 		else
1147 			FIXUP(VTOUNION(ap->a_vp));
1148 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1149 			    ap->a_offlo, ap->a_offhi);
1150 		if (dolock)
1151 			VOP_UNLOCK(targetvp);
1152 	}
1153 
1154 	return (error);
1155 }
1156 
1157 int
1158 union_seek(void *v)
1159 {
1160 	struct vop_seek_args /* {
1161 		struct vnode *a_vp;
1162 		off_t  a_oldoff;
1163 		off_t  a_newoff;
1164 		kauth_cred_t a_cred;
1165 	} */ *ap = v;
1166 	struct vnode *ovp = OTHERVP(ap->a_vp);
1167 
1168 	ap->a_vp = ovp;
1169 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1170 }
1171 
1172 int
1173 union_remove(void *v)
1174 {
1175 	struct vop_remove_args /* {
1176 		struct vnode *a_dvp;
1177 		struct vnode *a_vp;
1178 		struct componentname *a_cnp;
1179 	} */ *ap = v;
1180 	int error;
1181 	struct union_node *dun = VTOUNION(ap->a_dvp);
1182 	struct union_node *un = VTOUNION(ap->a_vp);
1183 	struct componentname *cnp = ap->a_cnp;
1184 
1185 	if (dun->un_uppervp == NULLVP)
1186 		panic("union remove: null upper vnode");
1187 
1188 	if (un->un_uppervp != NULLVP) {
1189 		struct vnode *dvp = dun->un_uppervp;
1190 		struct vnode *vp = un->un_uppervp;
1191 
1192 		FIXUP(dun);
1193 		vref(dvp);
1194 		dun->un_flags |= UN_KLOCK;
1195 		vput(ap->a_dvp);
1196 		FIXUP(un);
1197 		vref(vp);
1198 		un->un_flags |= UN_KLOCK;
1199 		vput(ap->a_vp);
1200 
1201 		if (union_dowhiteout(un, cnp->cn_cred))
1202 			cnp->cn_flags |= DOWHITEOUT;
1203 		error = VOP_REMOVE(dvp, vp, cnp);
1204 		if (!error)
1205 			union_removed_upper(un);
1206 	} else {
1207 		FIXUP(dun);
1208 		error = union_mkwhiteout(
1209 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1210 			dun->un_uppervp, ap->a_cnp, un);
1211 		vput(ap->a_dvp);
1212 		vput(ap->a_vp);
1213 	}
1214 
1215 	return (error);
1216 }
1217 
1218 int
1219 union_link(void *v)
1220 {
1221 	struct vop_link_args /* {
1222 		struct vnode *a_dvp;
1223 		struct vnode *a_vp;
1224 		struct componentname *a_cnp;
1225 	} */ *ap = v;
1226 	int error = 0;
1227 	struct componentname *cnp = ap->a_cnp;
1228 	struct union_node *dun;
1229 	struct vnode *vp;
1230 	struct vnode *dvp;
1231 
1232 	dun = VTOUNION(ap->a_dvp);
1233 
1234 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1235 
1236 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1237 		vp = ap->a_vp;
1238 	} else {
1239 		struct union_node *un = VTOUNION(ap->a_vp);
1240 		if (un->un_uppervp == NULLVP) {
1241 			/*
1242 			 * Needs to be copied before we can link it.
1243 			 */
1244 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1245 			if (dun->un_uppervp == un->un_dirvp) {
1246 				dun->un_flags &= ~UN_ULOCK;
1247 				VOP_UNLOCK(dun->un_uppervp);
1248 			}
1249 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1250 			if (dun->un_uppervp == un->un_dirvp) {
1251 				/*
1252 				 * During copyup, we dropped the lock on the
1253 				 * dir and invalidated any saved namei lookup
1254 				 * state for the directory we'll be entering
1255 				 * the link in.  We need to re-run the lookup
1256 				 * in that directory to reset any state needed
1257 				 * for VOP_LINK.
1258 				 * Call relookup on the union-layer to reset
1259 				 * the state.
1260 				 */
1261 				vp  = NULLVP;
1262 				if (dun->un_uppervp == NULLVP)
1263 					 panic("union: null upperdvp?");
1264 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1265 				if (error) {
1266 					VOP_UNLOCK(ap->a_vp);
1267 					return EROFS;	/* ? */
1268 				}
1269 				if (vp != NULLVP) {
1270 					/*
1271 					 * The name we want to create has
1272 					 * mysteriously appeared (a race?)
1273 					 */
1274 					error = EEXIST;
1275 					VOP_UNLOCK(ap->a_vp);
1276 					vput(ap->a_dvp);
1277 					vput(vp);
1278 					return (error);
1279 				}
1280 			}
1281 			VOP_UNLOCK(ap->a_vp);
1282 		}
1283 		vp = un->un_uppervp;
1284 	}
1285 
1286 	dvp = dun->un_uppervp;
1287 	if (dvp == NULLVP)
1288 		error = EROFS;
1289 
1290 	if (error) {
1291 		vput(ap->a_dvp);
1292 		return (error);
1293 	}
1294 
1295 	FIXUP(dun);
1296 	vref(dvp);
1297 	dun->un_flags |= UN_KLOCK;
1298 	vput(ap->a_dvp);
1299 
1300 	return (VOP_LINK(dvp, vp, cnp));
1301 }
1302 
1303 int
1304 union_rename(void *v)
1305 {
1306 	struct vop_rename_args  /* {
1307 		struct vnode *a_fdvp;
1308 		struct vnode *a_fvp;
1309 		struct componentname *a_fcnp;
1310 		struct vnode *a_tdvp;
1311 		struct vnode *a_tvp;
1312 		struct componentname *a_tcnp;
1313 	} */ *ap = v;
1314 	int error;
1315 
1316 	struct vnode *fdvp = ap->a_fdvp;
1317 	struct vnode *fvp = ap->a_fvp;
1318 	struct vnode *tdvp = ap->a_tdvp;
1319 	struct vnode *tvp = ap->a_tvp;
1320 
1321 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1322 		struct union_node *un = VTOUNION(fdvp);
1323 		if (un->un_uppervp == NULLVP) {
1324 			/*
1325 			 * this should never happen in normal
1326 			 * operation but might if there was
1327 			 * a problem creating the top-level shadow
1328 			 * directory.
1329 			 */
1330 			error = EXDEV;
1331 			goto bad;
1332 		}
1333 
1334 		fdvp = un->un_uppervp;
1335 		vref(fdvp);
1336 	}
1337 
1338 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1339 		struct union_node *un = VTOUNION(fvp);
1340 		if (un->un_uppervp == NULLVP) {
1341 			/* XXX: should do a copyup */
1342 			error = EXDEV;
1343 			goto bad;
1344 		}
1345 
1346 		if (un->un_lowervp != NULLVP)
1347 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1348 
1349 		fvp = un->un_uppervp;
1350 		vref(fvp);
1351 	}
1352 
1353 	if (tdvp->v_op == union_vnodeop_p) {
1354 		struct union_node *un = VTOUNION(tdvp);
1355 		if (un->un_uppervp == NULLVP) {
1356 			/*
1357 			 * this should never happen in normal
1358 			 * operation but might if there was
1359 			 * a problem creating the top-level shadow
1360 			 * directory.
1361 			 */
1362 			error = EXDEV;
1363 			goto bad;
1364 		}
1365 
1366 		tdvp = un->un_uppervp;
1367 		vref(tdvp);
1368 		un->un_flags |= UN_KLOCK;
1369 		vput(ap->a_tdvp);
1370 	}
1371 
1372 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1373 		struct union_node *un = VTOUNION(tvp);
1374 
1375 		tvp = un->un_uppervp;
1376 		if (tvp != NULLVP) {
1377 			vref(tvp);
1378 			un->un_flags |= UN_KLOCK;
1379 		}
1380 		vput(ap->a_tvp);
1381 	}
1382 
1383 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1384 	goto out;
1385 
1386 bad:
1387 	vput(tdvp);
1388 	if (tvp != NULLVP)
1389 		vput(tvp);
1390 	vrele(fdvp);
1391 	vrele(fvp);
1392 
1393 out:
1394 	if (fdvp != ap->a_fdvp) {
1395 		vrele(ap->a_fdvp);
1396 	}
1397 	if (fvp != ap->a_fvp) {
1398 		vrele(ap->a_fvp);
1399 	}
1400 	return (error);
1401 }
1402 
1403 int
1404 union_mkdir(void *v)
1405 {
1406 	struct vop_mkdir_args /* {
1407 		struct vnode *a_dvp;
1408 		struct vnode **a_vpp;
1409 		struct componentname *a_cnp;
1410 		struct vattr *a_vap;
1411 	} */ *ap = v;
1412 	struct union_node *un = VTOUNION(ap->a_dvp);
1413 	struct vnode *dvp = un->un_uppervp;
1414 	struct componentname *cnp = ap->a_cnp;
1415 
1416 	if (dvp != NULLVP) {
1417 		int error;
1418 		struct vnode *vp;
1419 
1420 		FIXUP(un);
1421 		vref(dvp);
1422 		un->un_flags |= UN_KLOCK;
1423 		VOP_UNLOCK(ap->a_dvp);
1424 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1425 		if (error) {
1426 			vrele(ap->a_dvp);
1427 			return (error);
1428 		}
1429 
1430 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1431 				NULLVP, cnp, vp, NULLVP, 1);
1432 		if (error)
1433 			vput(vp);
1434 		vrele(ap->a_dvp);
1435 		return (error);
1436 	}
1437 
1438 	vput(ap->a_dvp);
1439 	return (EROFS);
1440 }
1441 
1442 int
1443 union_rmdir(void *v)
1444 {
1445 	struct vop_rmdir_args /* {
1446 		struct vnode *a_dvp;
1447 		struct vnode *a_vp;
1448 		struct componentname *a_cnp;
1449 	} */ *ap = v;
1450 	int error;
1451 	struct union_node *dun = VTOUNION(ap->a_dvp);
1452 	struct union_node *un = VTOUNION(ap->a_vp);
1453 	struct componentname *cnp = ap->a_cnp;
1454 
1455 	if (dun->un_uppervp == NULLVP)
1456 		panic("union rmdir: null upper vnode");
1457 
1458 	error = union_check_rmdir(un, cnp->cn_cred);
1459 	if (error) {
1460 		vput(ap->a_dvp);
1461 		vput(ap->a_vp);
1462 		return error;
1463 	}
1464 
1465 	if (un->un_uppervp != NULLVP) {
1466 		struct vnode *dvp = dun->un_uppervp;
1467 		struct vnode *vp = un->un_uppervp;
1468 
1469 		FIXUP(dun);
1470 		vref(dvp);
1471 		dun->un_flags |= UN_KLOCK;
1472 		vput(ap->a_dvp);
1473 		FIXUP(un);
1474 		vref(vp);
1475 		un->un_flags |= UN_KLOCK;
1476 		vput(ap->a_vp);
1477 
1478 		if (union_dowhiteout(un, cnp->cn_cred))
1479 			cnp->cn_flags |= DOWHITEOUT;
1480 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1481 		if (!error)
1482 			union_removed_upper(un);
1483 	} else {
1484 		FIXUP(dun);
1485 		error = union_mkwhiteout(
1486 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1487 			dun->un_uppervp, ap->a_cnp, un);
1488 		vput(ap->a_dvp);
1489 		vput(ap->a_vp);
1490 	}
1491 
1492 	return (error);
1493 }
1494 
1495 int
1496 union_symlink(void *v)
1497 {
1498 	struct vop_symlink_args /* {
1499 		struct vnode *a_dvp;
1500 		struct vnode **a_vpp;
1501 		struct componentname *a_cnp;
1502 		struct vattr *a_vap;
1503 		char *a_target;
1504 	} */ *ap = v;
1505 	struct union_node *un = VTOUNION(ap->a_dvp);
1506 	struct vnode *dvp = un->un_uppervp;
1507 	struct componentname *cnp = ap->a_cnp;
1508 
1509 	if (dvp != NULLVP) {
1510 		int error;
1511 
1512 		FIXUP(un);
1513 		vref(dvp);
1514 		un->un_flags |= UN_KLOCK;
1515 		vput(ap->a_dvp);
1516 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1517 				    ap->a_target);
1518 		return (error);
1519 	}
1520 
1521 	vput(ap->a_dvp);
1522 	return (EROFS);
1523 }
1524 
1525 /*
1526  * union_readdir works in concert with getdirentries and
1527  * readdir(3) to provide a list of entries in the unioned
1528  * directories.  getdirentries is responsible for walking
1529  * down the union stack.  readdir(3) is responsible for
1530  * eliminating duplicate names from the returned data stream.
1531  */
1532 int
1533 union_readdir(void *v)
1534 {
1535 	struct vop_readdir_args /* {
1536 		struct vnodeop_desc *a_desc;
1537 		struct vnode *a_vp;
1538 		struct uio *a_uio;
1539 		kauth_cred_t a_cred;
1540 		int *a_eofflag;
1541 		u_long *a_cookies;
1542 		int a_ncookies;
1543 	} */ *ap = v;
1544 	struct union_node *un = VTOUNION(ap->a_vp);
1545 	struct vnode *uvp = un->un_uppervp;
1546 
1547 	if (uvp == NULLVP)
1548 		return (0);
1549 
1550 	FIXUP(un);
1551 	ap->a_vp = uvp;
1552 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1553 }
1554 
1555 int
1556 union_readlink(void *v)
1557 {
1558 	struct vop_readlink_args /* {
1559 		struct vnode *a_vp;
1560 		struct uio *a_uio;
1561 		kauth_cred_t a_cred;
1562 	} */ *ap = v;
1563 	int error;
1564 	struct vnode *vp = OTHERVP(ap->a_vp);
1565 	int dolock = (vp == LOWERVP(ap->a_vp));
1566 
1567 	if (dolock)
1568 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1569 	else
1570 		FIXUP(VTOUNION(ap->a_vp));
1571 	ap->a_vp = vp;
1572 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1573 	if (dolock)
1574 		VOP_UNLOCK(vp);
1575 
1576 	return (error);
1577 }
1578 
1579 int
1580 union_abortop(void *v)
1581 {
1582 	struct vop_abortop_args /* {
1583 		struct vnode *a_dvp;
1584 		struct componentname *a_cnp;
1585 	} */ *ap = v;
1586 	int error;
1587 	struct vnode *vp = OTHERVP(ap->a_dvp);
1588 	struct union_node *un = VTOUNION(ap->a_dvp);
1589 	int islocked = un->un_flags & UN_LOCKED;
1590 	int dolock = (vp == LOWERVP(ap->a_dvp));
1591 
1592 	if (islocked) {
1593 		if (dolock)
1594 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1595 		else
1596 			FIXUP(VTOUNION(ap->a_dvp));
1597 	}
1598 	ap->a_dvp = vp;
1599 	error = VCALL(vp, VOFFSET(vop_abortop), ap);
1600 	if (islocked && dolock)
1601 		VOP_UNLOCK(vp);
1602 
1603 	return (error);
1604 }
1605 
1606 int
1607 union_inactive(void *v)
1608 {
1609 	struct vop_inactive_args /* {
1610 		const struct vnodeop_desc *a_desc;
1611 		struct vnode *a_vp;
1612 		bool *a_recycle;
1613 	} */ *ap = v;
1614 	struct vnode *vp = ap->a_vp;
1615 	struct union_node *un = VTOUNION(vp);
1616 	struct vnode **vpp;
1617 
1618 	/*
1619 	 * Do nothing (and _don't_ bypass).
1620 	 * Wait to vrele lowervp until reclaim,
1621 	 * so that until then our union_node is in the
1622 	 * cache and reusable.
1623 	 *
1624 	 * NEEDSWORK: Someday, consider inactive'ing
1625 	 * the lowervp and then trying to reactivate it
1626 	 * with capabilities (v_id)
1627 	 * like they do in the name lookup cache code.
1628 	 * That's too much work for now.
1629 	 */
1630 
1631 	if (un->un_dircache != 0) {
1632 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1633 			vrele(*vpp);
1634 		free(un->un_dircache, M_TEMP);
1635 		un->un_dircache = 0;
1636 	}
1637 
1638 	*ap->a_recycle = ((un->un_flags & UN_CACHED) == 0);
1639 	VOP_UNLOCK(vp);
1640 
1641 	return (0);
1642 }
1643 
1644 int
1645 union_reclaim(void *v)
1646 {
1647 	struct vop_reclaim_args /* {
1648 		struct vnode *a_vp;
1649 	} */ *ap = v;
1650 
1651 	union_freevp(ap->a_vp);
1652 
1653 	return (0);
1654 }
1655 
1656 int
1657 union_lock(void *v)
1658 {
1659 	struct vop_lock_args /* {
1660 		struct vnode *a_vp;
1661 		int a_flags;
1662 	} */ *ap = v;
1663 	struct vnode *vp = ap->a_vp;
1664 	int flags = ap->a_flags;
1665 	struct union_node *un;
1666 	int error;
1667 
1668 	/* XXX unionfs can't handle shared locks yet */
1669 	if ((flags & LK_SHARED) != 0) {
1670 		flags = (flags & ~LK_SHARED) | LK_EXCLUSIVE;
1671 	}
1672 
1673 start:
1674 	un = VTOUNION(vp);
1675 
1676 	if (un->un_uppervp != NULLVP) {
1677 		if (((un->un_flags & UN_ULOCK) == 0) &&
1678 		    (vp->v_usecount != 0)) {
1679 			/*
1680 			 * We MUST always use the order of: take upper
1681 			 * vp lock, manipulate union node flags, drop
1682 			 * upper vp lock.  This code must not be an
1683 			 * exception.
1684 			 */
1685 			error = vn_lock(un->un_uppervp, flags);
1686 			if (error)
1687 				return (error);
1688 			un->un_flags |= UN_ULOCK;
1689 		}
1690 #ifdef DIAGNOSTIC
1691 		if (un->un_flags & UN_KLOCK) {
1692 			vprint("union: dangling klock", vp);
1693 			panic("union: dangling upper lock (%p)", vp);
1694 		}
1695 #endif
1696 	}
1697 
1698 	/* XXX ignores LK_NOWAIT */
1699 	if (un->un_flags & UN_LOCKED) {
1700 		KASSERT(curlwp == NULL || un->un_lwp == NULL ||
1701 		    un->un_lwp != curlwp);
1702 		un->un_flags |= UN_WANTED;
1703 		tsleep(&un->un_flags, PINOD, "unionlk2", 0);
1704 		goto start;
1705 	}
1706 
1707 	un->un_lwp = curlwp;
1708 
1709 	un->un_flags |= UN_LOCKED;
1710 	return (0);
1711 }
1712 
1713 /*
1714  * When operations want to vput() a union node yet retain a lock on
1715  * the upper vnode (say, to do some further operations like link(),
1716  * mkdir(), ...), they set UN_KLOCK on the union node, then call
1717  * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1718  * unlocks the union node (leaving the upper vnode alone), clears the
1719  * KLOCK flag, and then returns to vput().  The caller then does whatever
1720  * is left to do with the upper vnode, and ensures that it gets unlocked.
1721  *
1722  * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1723  */
1724 int
1725 union_unlock(void *v)
1726 {
1727 	struct vop_unlock_args /* {
1728 		struct vnode *a_vp;
1729 		int a_flags;
1730 	} */ *ap = v;
1731 	struct union_node *un = VTOUNION(ap->a_vp);
1732 
1733 	KASSERT((un->un_flags & UN_LOCKED) != 0);
1734 	KASSERT(curlwp == NULL || un->un_lwp == NULL ||
1735 	    un->un_lwp == curlwp);
1736 
1737 	un->un_flags &= ~UN_LOCKED;
1738 
1739 	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1740 		VOP_UNLOCK(un->un_uppervp);
1741 
1742 	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1743 
1744 	if (un->un_flags & UN_WANTED) {
1745 		un->un_flags &= ~UN_WANTED;
1746 		wakeup( &un->un_flags);
1747 	}
1748 
1749 	un->un_lwp = NULL;
1750 
1751 	return (0);
1752 }
1753 
1754 int
1755 union_bmap(void *v)
1756 {
1757 	struct vop_bmap_args /* {
1758 		struct vnode *a_vp;
1759 		daddr_t  a_bn;
1760 		struct vnode **a_vpp;
1761 		daddr_t *a_bnp;
1762 		int *a_runp;
1763 	} */ *ap = v;
1764 	int error;
1765 	struct vnode *vp = OTHERVP(ap->a_vp);
1766 	int dolock = (vp == LOWERVP(ap->a_vp));
1767 
1768 	if (dolock)
1769 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1770 	else
1771 		FIXUP(VTOUNION(ap->a_vp));
1772 	ap->a_vp = vp;
1773 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1774 	if (dolock)
1775 		VOP_UNLOCK(vp);
1776 
1777 	return (error);
1778 }
1779 
1780 int
1781 union_print(void *v)
1782 {
1783 	struct vop_print_args /* {
1784 		struct vnode *a_vp;
1785 	} */ *ap = v;
1786 	struct vnode *vp = ap->a_vp;
1787 
1788 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1789 			vp, UPPERVP(vp), LOWERVP(vp));
1790 	if (UPPERVP(vp) != NULLVP)
1791 		vprint("union: upper", UPPERVP(vp));
1792 	if (LOWERVP(vp) != NULLVP)
1793 		vprint("union: lower", LOWERVP(vp));
1794 	if (VTOUNION(vp)->un_dircache) {
1795 		struct vnode **vpp;
1796 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1797 			vprint("dircache:", *vpp);
1798 	}
1799 
1800 	return (0);
1801 }
1802 
1803 int
1804 union_islocked(void *v)
1805 {
1806 	struct vop_islocked_args /* {
1807 		struct vnode *a_vp;
1808 	} */ *ap = v;
1809 
1810 	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? LK_EXCLUSIVE : 0);
1811 }
1812 
1813 int
1814 union_pathconf(void *v)
1815 {
1816 	struct vop_pathconf_args /* {
1817 		struct vnode *a_vp;
1818 		int a_name;
1819 		int *a_retval;
1820 	} */ *ap = v;
1821 	int error;
1822 	struct vnode *vp = OTHERVP(ap->a_vp);
1823 	int dolock = (vp == LOWERVP(ap->a_vp));
1824 
1825 	if (dolock)
1826 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1827 	else
1828 		FIXUP(VTOUNION(ap->a_vp));
1829 	ap->a_vp = vp;
1830 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1831 	if (dolock)
1832 		VOP_UNLOCK(vp);
1833 
1834 	return (error);
1835 }
1836 
1837 int
1838 union_advlock(void *v)
1839 {
1840 	struct vop_advlock_args /* {
1841 		struct vnode *a_vp;
1842 		void *a_id;
1843 		int  a_op;
1844 		struct flock *a_fl;
1845 		int  a_flags;
1846 	} */ *ap = v;
1847 	struct vnode *ovp = OTHERVP(ap->a_vp);
1848 
1849 	ap->a_vp = ovp;
1850 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1851 }
1852 
1853 int
1854 union_strategy(void *v)
1855 {
1856 	struct vop_strategy_args /* {
1857 		struct vnode *a_vp;
1858 		struct buf *a_bp;
1859 	} */ *ap = v;
1860 	struct vnode *ovp = OTHERVP(ap->a_vp);
1861 	struct buf *bp = ap->a_bp;
1862 
1863 	KASSERT(ovp != NULLVP);
1864 	if (!NODE_IS_SPECIAL(ovp))
1865 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1866 
1867 	return (VOP_STRATEGY(ovp, bp));
1868 }
1869 
1870 int
1871 union_bwrite(void *v)
1872 {
1873 	struct vop_bwrite_args /* {
1874 		struct vnode *a_vp;
1875 		struct buf *a_bp;
1876 	} */ *ap = v;
1877 	struct vnode *ovp = OTHERVP(ap->a_vp);
1878 	struct buf *bp = ap->a_bp;
1879 
1880 	KASSERT(ovp != NULLVP);
1881 	if (!NODE_IS_SPECIAL(ovp))
1882 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1883 
1884 	return (VOP_BWRITE(ovp, bp));
1885 }
1886 
1887 int
1888 union_getpages(void *v)
1889 {
1890 	struct vop_getpages_args /* {
1891 		struct vnode *a_vp;
1892 		voff_t a_offset;
1893 		struct vm_page **a_m;
1894 		int *a_count;
1895 		int a_centeridx;
1896 		vm_prot_t a_access_type;
1897 		int a_advice;
1898 		int a_flags;
1899 	} */ *ap = v;
1900 	struct vnode *vp = ap->a_vp;
1901 
1902 	KASSERT(mutex_owned(vp->v_interlock));
1903 
1904 	if (ap->a_flags & PGO_LOCKED) {
1905 		return EBUSY;
1906 	}
1907 	ap->a_vp = OTHERVP(vp);
1908 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1909 
1910 	/* Just pass the request on to the underlying layer. */
1911 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1912 }
1913 
1914 int
1915 union_putpages(void *v)
1916 {
1917 	struct vop_putpages_args /* {
1918 		struct vnode *a_vp;
1919 		voff_t a_offlo;
1920 		voff_t a_offhi;
1921 		int a_flags;
1922 	} */ *ap = v;
1923 	struct vnode *vp = ap->a_vp;
1924 
1925 	KASSERT(mutex_owned(vp->v_interlock));
1926 
1927 	ap->a_vp = OTHERVP(vp);
1928 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1929 
1930 	if (ap->a_flags & PGO_RECLAIM) {
1931 		mutex_exit(vp->v_interlock);
1932 		return 0;
1933 	}
1934 
1935 	/* Just pass the request on to the underlying layer. */
1936 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1937 }
1938 
1939 int
1940 union_kqfilter(void *v)
1941 {
1942 	struct vop_kqfilter_args /* {
1943 		struct vnode	*a_vp;
1944 		struct knote	*a_kn;
1945 	} */ *ap = v;
1946 	int error;
1947 
1948 	/*
1949 	 * We watch either the upper layer file (if it already exists),
1950 	 * or the lower layer one. If there is lower layer file only
1951 	 * at this moment, we will keep watching that lower layer file
1952 	 * even if upper layer file would be created later on.
1953 	 */
1954 	if (UPPERVP(ap->a_vp))
1955 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1956 	else if (LOWERVP(ap->a_vp))
1957 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1958 	else {
1959 		/* panic? */
1960 		error = EOPNOTSUPP;
1961 	}
1962 
1963 	return (error);
1964 }
1965