xref: /netbsd-src/sys/fs/union/union_vnops.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*	$NetBSD: union_vnops.c,v 1.39 2011/01/02 05:09:31 dholland Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.39 2011/01/02 05:09:31 dholland Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 
95 int union_lookup(void *);
96 int union_create(void *);
97 int union_whiteout(void *);
98 int union_mknod(void *);
99 int union_open(void *);
100 int union_close(void *);
101 int union_access(void *);
102 int union_getattr(void *);
103 int union_setattr(void *);
104 int union_read(void *);
105 int union_write(void *);
106 int union_ioctl(void *);
107 int union_poll(void *);
108 int union_revoke(void *);
109 int union_mmap(void *);
110 int union_fsync(void *);
111 int union_seek(void *);
112 int union_remove(void *);
113 int union_link(void *);
114 int union_rename(void *);
115 int union_mkdir(void *);
116 int union_rmdir(void *);
117 int union_symlink(void *);
118 int union_readdir(void *);
119 int union_readlink(void *);
120 int union_abortop(void *);
121 int union_inactive(void *);
122 int union_reclaim(void *);
123 int union_lock(void *);
124 int union_unlock(void *);
125 int union_bmap(void *);
126 int union_print(void *);
127 int union_islocked(void *);
128 int union_pathconf(void *);
129 int union_advlock(void *);
130 int union_strategy(void *);
131 int union_getpages(void *);
132 int union_putpages(void *);
133 int union_kqfilter(void *);
134 
135 static void union_fixup(struct union_node *);
136 static int union_lookup1(struct vnode *, struct vnode **,
137 			      struct vnode **, struct componentname *);
138 
139 
140 /*
141  * Global vfs data structures
142  */
143 int (**union_vnodeop_p)(void *);
144 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
145 	{ &vop_default_desc, vn_default_error },
146 	{ &vop_lookup_desc, union_lookup },		/* lookup */
147 	{ &vop_create_desc, union_create },		/* create */
148 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
149 	{ &vop_mknod_desc, union_mknod },		/* mknod */
150 	{ &vop_open_desc, union_open },			/* open */
151 	{ &vop_close_desc, union_close },		/* close */
152 	{ &vop_access_desc, union_access },		/* access */
153 	{ &vop_getattr_desc, union_getattr },		/* getattr */
154 	{ &vop_setattr_desc, union_setattr },		/* setattr */
155 	{ &vop_read_desc, union_read },			/* read */
156 	{ &vop_write_desc, union_write },		/* write */
157 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
158 	{ &vop_poll_desc, union_poll },			/* select */
159 	{ &vop_revoke_desc, union_revoke },		/* revoke */
160 	{ &vop_mmap_desc, union_mmap },			/* mmap */
161 	{ &vop_fsync_desc, union_fsync },		/* fsync */
162 	{ &vop_seek_desc, union_seek },			/* seek */
163 	{ &vop_remove_desc, union_remove },		/* remove */
164 	{ &vop_link_desc, union_link },			/* link */
165 	{ &vop_rename_desc, union_rename },		/* rename */
166 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
167 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
168 	{ &vop_symlink_desc, union_symlink },		/* symlink */
169 	{ &vop_readdir_desc, union_readdir },		/* readdir */
170 	{ &vop_readlink_desc, union_readlink },		/* readlink */
171 	{ &vop_abortop_desc, union_abortop },		/* abortop */
172 	{ &vop_inactive_desc, union_inactive },		/* inactive */
173 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
174 	{ &vop_lock_desc, union_lock },			/* lock */
175 	{ &vop_unlock_desc, union_unlock },		/* unlock */
176 	{ &vop_bmap_desc, union_bmap },			/* bmap */
177 	{ &vop_strategy_desc, union_strategy },		/* strategy */
178 	{ &vop_print_desc, union_print },		/* print */
179 	{ &vop_islocked_desc, union_islocked },		/* islocked */
180 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
181 	{ &vop_advlock_desc, union_advlock },		/* advlock */
182 	{ &vop_getpages_desc, union_getpages },		/* getpages */
183 	{ &vop_putpages_desc, union_putpages },		/* putpages */
184 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
185 #ifdef notdef
186 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
187 #endif
188 	{ NULL, NULL }
189 };
190 const struct vnodeopv_desc union_vnodeop_opv_desc =
191 	{ &union_vnodeop_p, union_vnodeop_entries };
192 
193 #define FIXUP(un) { \
194 	if (((un)->un_flags & UN_ULOCK) == 0) { \
195 		union_fixup(un); \
196 	} \
197 }
198 
199 static void
200 union_fixup(struct union_node *un)
201 {
202 
203 	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
204 	un->un_flags |= UN_ULOCK;
205 }
206 
207 static int
208 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
209 	struct componentname *cnp)
210 {
211 	int error;
212 	struct vnode *tdvp;
213 	struct vnode *dvp;
214 	struct mount *mp;
215 
216 	dvp = *dvpp;
217 
218 	/*
219 	 * If stepping up the directory tree, check for going
220 	 * back across the mount point, in which case do what
221 	 * lookup would do by stepping back down the mount
222 	 * hierarchy.
223 	 */
224 	if (cnp->cn_flags & ISDOTDOT) {
225 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
226 			/*
227 			 * Don't do the NOCROSSMOUNT check
228 			 * at this level.  By definition,
229 			 * union fs deals with namespaces, not
230 			 * filesystems.
231 			 */
232 			tdvp = dvp;
233 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
234 			VOP_UNLOCK(tdvp);
235 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
236 		}
237 	}
238 
239         error = VOP_LOOKUP(dvp, &tdvp, cnp);
240 	if (error)
241 		return (error);
242 
243 	dvp = tdvp;
244 
245 	/*
246 	 * Lastly check if the current node is a mount point in
247 	 * which case walk up the mount hierarchy making sure not to
248 	 * bump into the root of the mount tree (ie. dvp != udvp).
249 	 */
250 	while (dvp != udvp && (dvp->v_type == VDIR) &&
251 	       (mp = dvp->v_mountedhere)) {
252 		if (vfs_busy(mp, NULL))
253 			continue;
254 		vput(dvp);
255 		error = VFS_ROOT(mp, &tdvp);
256 		vfs_unbusy(mp, false, NULL);
257 		if (error) {
258 			return (error);
259 		}
260 		dvp = tdvp;
261 	}
262 
263 	*vpp = dvp;
264 	return (0);
265 }
266 
267 int
268 union_lookup(void *v)
269 {
270 	struct vop_lookup_args /* {
271 		struct vnodeop_desc *a_desc;
272 		struct vnode *a_dvp;
273 		struct vnode **a_vpp;
274 		struct componentname *a_cnp;
275 	} */ *ap = v;
276 	int error;
277 	int uerror, lerror;
278 	struct vnode *uppervp, *lowervp;
279 	struct vnode *upperdvp, *lowerdvp;
280 	struct vnode *dvp = ap->a_dvp;
281 	struct union_node *dun = VTOUNION(dvp);
282 	struct componentname *cnp = ap->a_cnp;
283 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
284 	kauth_cred_t saved_cred = NULL;
285 	int iswhiteout;
286 	struct vattr va;
287 
288 #ifdef notyet
289 	if (cnp->cn_namelen == 3 &&
290 			cnp->cn_nameptr[2] == '.' &&
291 			cnp->cn_nameptr[1] == '.' &&
292 			cnp->cn_nameptr[0] == '.') {
293 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
294 		if (dvp == NULLVP)
295 			return (ENOENT);
296 		vref(dvp);
297 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
298 		return (0);
299 	}
300 #endif
301 
302 	if ((cnp->cn_flags & ISLASTCN) &&
303 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
304 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
305 		return (EROFS);
306 
307 	upperdvp = dun->un_uppervp;
308 	lowerdvp = dun->un_lowervp;
309 	uppervp = NULLVP;
310 	lowervp = NULLVP;
311 	iswhiteout = 0;
312 
313 	/*
314 	 * do the lookup in the upper level.
315 	 * if that level comsumes additional pathnames,
316 	 * then assume that something special is going
317 	 * on and just return that vnode.
318 	 */
319 	if (upperdvp != NULLVP) {
320 		FIXUP(dun);
321 		/*
322 		 * If we're doing `..' in the underlying filesystem,
323 		 * we must drop our lock on the union node before
324 		 * going up the tree in the lower file system--if we block
325 		 * on the lowervp lock, and that's held by someone else
326 		 * coming down the tree and who's waiting for our lock,
327 		 * we would be hosed.
328 		 */
329 		if (cnp->cn_flags & ISDOTDOT) {
330 			/* retain lock on underlying VP */
331 			dun->un_flags |= UN_KLOCK;
332 			VOP_UNLOCK(dvp);
333 		}
334 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
335 					&uppervp, cnp);
336 
337 		if (cnp->cn_flags & ISDOTDOT) {
338 			if (dun->un_uppervp == upperdvp) {
339 				/*
340 				 * we got the underlying bugger back locked...
341 				 * now take back the union node lock.  Since we
342 				 *  hold the uppervp lock, we can diddle union
343 				 * locking flags at will. :)
344 				 */
345 				dun->un_flags |= UN_ULOCK;
346 			}
347 			/*
348 			 * if upperdvp got swapped out, it means we did
349 			 * some mount point magic, and we do not have
350 			 * dun->un_uppervp locked currently--so we get it
351 			 * locked here (don't set the UN_ULOCK flag).
352 			 */
353 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
354 		}
355 		if (cnp->cn_consume != 0) {
356 			*ap->a_vpp = uppervp;
357 			return (uerror);
358 		}
359 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
360 			if (cnp->cn_flags & ISWHITEOUT) {
361 				iswhiteout = 1;
362 			} else if (lowerdvp != NULLVP) {
363 				lerror = VOP_GETATTR(upperdvp, &va,
364 					cnp->cn_cred);
365 				if (lerror == 0 && (va.va_flags & OPAQUE))
366 					iswhiteout = 1;
367 			}
368 		}
369 	} else {
370 		uerror = ENOENT;
371 	}
372 
373 	/*
374 	 * in a similar way to the upper layer, do the lookup
375 	 * in the lower layer.   this time, if there is some
376 	 * component magic going on, then vput whatever we got
377 	 * back from the upper layer and return the lower vnode
378 	 * instead.
379 	 */
380 	if (lowerdvp != NULLVP && !iswhiteout) {
381 		int nameiop;
382 
383 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
384 
385 		/*
386 		 * Only do a LOOKUP on the bottom node, since
387 		 * we won't be making changes to it anyway.
388 		 */
389 		nameiop = cnp->cn_nameiop;
390 		cnp->cn_nameiop = LOOKUP;
391 		if (um->um_op == UNMNT_BELOW) {
392 			saved_cred = cnp->cn_cred;
393 			cnp->cn_cred = um->um_cred;
394 		}
395 
396 		/*
397 		 * we shouldn't have to worry about locking interactions
398 		 * between the lower layer and our union layer (w.r.t.
399 		 * `..' processing) because we don't futz with lowervp
400 		 * locks in the union-node instantiation code path.
401 		 */
402 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
403 				&lowervp, cnp);
404 		if (um->um_op == UNMNT_BELOW)
405 			cnp->cn_cred = saved_cred;
406 		cnp->cn_nameiop = nameiop;
407 
408 		if (lowervp != lowerdvp)
409 			VOP_UNLOCK(lowerdvp);
410 
411 		if (cnp->cn_consume != 0) {
412 			if (uppervp != NULLVP) {
413 				if (uppervp == upperdvp)
414 					vrele(uppervp);
415 				else
416 					vput(uppervp);
417 				uppervp = NULLVP;
418 			}
419 			*ap->a_vpp = lowervp;
420 			return (lerror);
421 		}
422 	} else {
423 		lerror = ENOENT;
424 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
425 			lowervp = LOWERVP(dun->un_pvp);
426 			if (lowervp != NULLVP) {
427 				vref(lowervp);
428 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
429 				lerror = 0;
430 			}
431 		}
432 	}
433 
434 	/*
435 	 * EJUSTRETURN is used by underlying filesystems to indicate that
436 	 * a directory modification op was started successfully.
437 	 * This will only happen in the upper layer, since
438 	 * the lower layer only does LOOKUPs.
439 	 * If this union is mounted read-only, bounce it now.
440 	 */
441 
442 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
443 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
444 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
445 		uerror = EROFS;
446 
447 	/*
448 	 * at this point, we have uerror and lerror indicating
449 	 * possible errors with the lookups in the upper and lower
450 	 * layers.  additionally, uppervp and lowervp are (locked)
451 	 * references to existing vnodes in the upper and lower layers.
452 	 *
453 	 * there are now three cases to consider.
454 	 * 1. if both layers returned an error, then return whatever
455 	 *    error the upper layer generated.
456 	 *
457 	 * 2. if the top layer failed and the bottom layer succeeded
458 	 *    then two subcases occur.
459 	 *    a.  the bottom vnode is not a directory, in which
460 	 *	  case just return a new union vnode referencing
461 	 *	  an empty top layer and the existing bottom layer.
462 	 *    b.  the bottom vnode is a directory, in which case
463 	 *	  create a new directory in the top-level and
464 	 *	  continue as in case 3.
465 	 *
466 	 * 3. if the top layer succeeded then return a new union
467 	 *    vnode referencing whatever the new top layer and
468 	 *    whatever the bottom layer returned.
469 	 */
470 
471 	*ap->a_vpp = NULLVP;
472 
473 
474 	/* case 1. */
475 	if ((uerror != 0) && (lerror != 0)) {
476 		return (uerror);
477 	}
478 
479 	/* case 2. */
480 	if (uerror != 0 /* && (lerror == 0) */ ) {
481 		if (lowervp->v_type == VDIR) { /* case 2b. */
482 			/*
483 			 * We may be racing another process to make the
484 			 * upper-level shadow directory.  Be careful with
485 			 * locks/etc!
486 			 */
487 			if (upperdvp) {
488 				dun->un_flags &= ~UN_ULOCK;
489 				VOP_UNLOCK(upperdvp);
490 				uerror = union_mkshadow(um, upperdvp, cnp,
491 				    &uppervp);
492 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
493 				dun->un_flags |= UN_ULOCK;
494 			}
495 			if (uerror) {
496 				if (lowervp != NULLVP) {
497 					vput(lowervp);
498 					lowervp = NULLVP;
499 				}
500 				return (uerror);
501 			}
502 		}
503 	}
504 
505 	if (lowervp != NULLVP)
506 		VOP_UNLOCK(lowervp);
507 
508 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
509 			      uppervp, lowervp, 1);
510 
511 	if (error) {
512 		if (uppervp != NULLVP)
513 			vput(uppervp);
514 		if (lowervp != NULLVP)
515 			vrele(lowervp);
516 	}
517 
518 	return (error);
519 }
520 
521 int
522 union_create(void *v)
523 {
524 	struct vop_create_args /* {
525 		struct vnode *a_dvp;
526 		struct vnode **a_vpp;
527 		struct componentname *a_cnp;
528 		struct vattr *a_vap;
529 	} */ *ap = v;
530 	struct union_node *un = VTOUNION(ap->a_dvp);
531 	struct vnode *dvp = un->un_uppervp;
532 	struct componentname *cnp = ap->a_cnp;
533 
534 	if (dvp != NULLVP) {
535 		int error;
536 		struct vnode *vp;
537 		struct mount *mp;
538 
539 		FIXUP(un);
540 
541 		vref(dvp);
542 		un->un_flags |= UN_KLOCK;
543 		mp = ap->a_dvp->v_mount;
544 		vput(ap->a_dvp);
545 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
546 		if (error)
547 			return (error);
548 
549 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
550 				NULLVP, 1);
551 		if (error)
552 			vput(vp);
553 		return (error);
554 	}
555 
556 	vput(ap->a_dvp);
557 	return (EROFS);
558 }
559 
560 int
561 union_whiteout(void *v)
562 {
563 	struct vop_whiteout_args /* {
564 		struct vnode *a_dvp;
565 		struct componentname *a_cnp;
566 		int a_flags;
567 	} */ *ap = v;
568 	struct union_node *un = VTOUNION(ap->a_dvp);
569 	struct componentname *cnp = ap->a_cnp;
570 
571 	if (un->un_uppervp == NULLVP)
572 		return (EOPNOTSUPP);
573 
574 	FIXUP(un);
575 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
576 }
577 
578 int
579 union_mknod(void *v)
580 {
581 	struct vop_mknod_args /* {
582 		struct vnode *a_dvp;
583 		struct vnode **a_vpp;
584 		struct componentname *a_cnp;
585 		struct vattr *a_vap;
586 	} */ *ap = v;
587 	struct union_node *un = VTOUNION(ap->a_dvp);
588 	struct vnode *dvp = un->un_uppervp;
589 	struct componentname *cnp = ap->a_cnp;
590 
591 	if (dvp != NULLVP) {
592 		int error;
593 		struct vnode *vp;
594 		struct mount *mp;
595 
596 		FIXUP(un);
597 
598 		vref(dvp);
599 		un->un_flags |= UN_KLOCK;
600 		mp = ap->a_dvp->v_mount;
601 		vput(ap->a_dvp);
602 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
603 		if (error)
604 			return (error);
605 
606 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
607 				      cnp, vp, NULLVP, 1);
608 		if (error)
609 		    vput(vp);
610 		return (error);
611 	}
612 
613 	vput(ap->a_dvp);
614 	return (EROFS);
615 }
616 
617 int
618 union_open(void *v)
619 {
620 	struct vop_open_args /* {
621 		struct vnodeop_desc *a_desc;
622 		struct vnode *a_vp;
623 		int a_mode;
624 		kauth_cred_t a_cred;
625 	} */ *ap = v;
626 	struct union_node *un = VTOUNION(ap->a_vp);
627 	struct vnode *tvp;
628 	int mode = ap->a_mode;
629 	kauth_cred_t cred = ap->a_cred;
630 	struct lwp *l = curlwp;
631 	int error;
632 
633 	/*
634 	 * If there is an existing upper vp then simply open that.
635 	 */
636 	tvp = un->un_uppervp;
637 	if (tvp == NULLVP) {
638 		/*
639 		 * If the lower vnode is being opened for writing, then
640 		 * copy the file contents to the upper vnode and open that,
641 		 * otherwise can simply open the lower vnode.
642 		 */
643 		tvp = un->un_lowervp;
644 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
645 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
646 			if (error == 0)
647 				error = VOP_OPEN(un->un_uppervp, mode, cred);
648 			return (error);
649 		}
650 
651 		/*
652 		 * Just open the lower vnode, but check for nodev mount flag
653 		 */
654 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
655 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
656 			return ENXIO;
657 		un->un_openl++;
658 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
659 		error = VOP_OPEN(tvp, mode, cred);
660 		VOP_UNLOCK(tvp);
661 
662 		return (error);
663 	}
664 	/*
665 	 * Just open the upper vnode, checking for nodev mount flag first
666 	 */
667 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
668 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
669 		return ENXIO;
670 
671 	FIXUP(un);
672 
673 	error = VOP_OPEN(tvp, mode, cred);
674 
675 	return (error);
676 }
677 
678 int
679 union_close(void *v)
680 {
681 	struct vop_close_args /* {
682 		struct vnode *a_vp;
683 		int  a_fflag;
684 		kauth_cred_t a_cred;
685 	} */ *ap = v;
686 	struct union_node *un = VTOUNION(ap->a_vp);
687 	struct vnode *vp;
688 
689 	vp = un->un_uppervp;
690 	if (vp == NULLVP) {
691 #ifdef UNION_DIAGNOSTIC
692 		if (un->un_openl <= 0)
693 			panic("union: un_openl cnt");
694 #endif
695 		--un->un_openl;
696 		vp = un->un_lowervp;
697 	}
698 
699 #ifdef DIAGNOSTIC
700 	if (vp == NULLVP)
701 		panic("union_close empty union vnode");
702 #endif
703 
704 	ap->a_vp = vp;
705 	return (VCALL(vp, VOFFSET(vop_close), ap));
706 }
707 
708 /*
709  * Check access permission on the union vnode.
710  * The access check being enforced is to check
711  * against both the underlying vnode, and any
712  * copied vnode.  This ensures that no additional
713  * file permissions are given away simply because
714  * the user caused an implicit file copy.
715  */
716 int
717 union_access(void *v)
718 {
719 	struct vop_access_args /* {
720 		struct vnodeop_desc *a_desc;
721 		struct vnode *a_vp;
722 		int a_mode;
723 		kauth_cred_t a_cred;
724 	} */ *ap = v;
725 	struct vnode *vp = ap->a_vp;
726 	struct union_node *un = VTOUNION(vp);
727 	int error = EACCES;
728 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
729 
730 	/*
731 	 * Disallow write attempts on read-only file systems;
732 	 * unless the file is a socket, fifo, or a block or
733 	 * character device resident on the file system.
734 	 */
735 	if (ap->a_mode & VWRITE) {
736 		switch (vp->v_type) {
737 		case VDIR:
738 		case VLNK:
739 		case VREG:
740 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
741 				return (EROFS);
742 			break;
743 		case VBAD:
744 		case VBLK:
745 		case VCHR:
746 		case VSOCK:
747 		case VFIFO:
748 		case VNON:
749 		default:
750 			break;
751 		}
752 	}
753 
754 
755 	if ((vp = un->un_uppervp) != NULLVP) {
756 		FIXUP(un);
757 		ap->a_vp = vp;
758 		return (VCALL(vp, VOFFSET(vop_access), ap));
759 	}
760 
761 	if ((vp = un->un_lowervp) != NULLVP) {
762 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
763 		ap->a_vp = vp;
764 		error = VCALL(vp, VOFFSET(vop_access), ap);
765 		if (error == 0) {
766 			if (um->um_op == UNMNT_BELOW) {
767 				ap->a_cred = um->um_cred;
768 				error = VCALL(vp, VOFFSET(vop_access), ap);
769 			}
770 		}
771 		VOP_UNLOCK(vp);
772 		if (error)
773 			return (error);
774 	}
775 
776 	return (error);
777 }
778 
779 /*
780  * We handle getattr only to change the fsid and
781  * track object sizes
782  */
783 int
784 union_getattr(void *v)
785 {
786 	struct vop_getattr_args /* {
787 		struct vnode *a_vp;
788 		struct vattr *a_vap;
789 		kauth_cred_t a_cred;
790 	} */ *ap = v;
791 	int error;
792 	struct union_node *un = VTOUNION(ap->a_vp);
793 	struct vnode *vp = un->un_uppervp;
794 	struct vattr *vap;
795 	struct vattr va;
796 
797 
798 	/*
799 	 * Some programs walk the filesystem hierarchy by counting
800 	 * links to directories to avoid stat'ing all the time.
801 	 * This means the link count on directories needs to be "correct".
802 	 * The only way to do that is to call getattr on both layers
803 	 * and fix up the link count.  The link count will not necessarily
804 	 * be accurate but will be large enough to defeat the tree walkers.
805 	 *
806 	 * To make life more interesting, some filesystems don't keep
807 	 * track of link counts in the expected way, and return a
808 	 * link count of `1' for those directories; if either of the
809 	 * component directories returns a link count of `1', we return a 1.
810 	 */
811 
812 	vap = ap->a_vap;
813 
814 	vp = un->un_uppervp;
815 	if (vp != NULLVP) {
816 		/*
817 		 * It's not clear whether VOP_GETATTR is to be
818 		 * called with the vnode locked or not.  stat() calls
819 		 * it with (vp) locked, and fstat calls it with
820 		 * (vp) unlocked.
821 		 * In the mean time, compensate here by checking
822 		 * the union_node's lock flag.
823 		 */
824 		if (un->un_flags & UN_LOCKED)
825 			FIXUP(un);
826 
827 		error = VOP_GETATTR(vp, vap, ap->a_cred);
828 		if (error)
829 			return (error);
830 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
831 	}
832 
833 	if (vp == NULLVP) {
834 		vp = un->un_lowervp;
835 	} else if (vp->v_type == VDIR) {
836 		vp = un->un_lowervp;
837 		if (vp != NULLVP)
838 			vap = &va;
839 	} else {
840 		vp = NULLVP;
841 	}
842 
843 	if (vp != NULLVP) {
844 		error = VOP_GETATTR(vp, vap, ap->a_cred);
845 		if (error)
846 			return (error);
847 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
848 	}
849 
850 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
851 		/*
852 		 * Link count manipulation:
853 		 *	- If both return "2", return 2 (no subdirs)
854 		 *	- If one or the other return "1", return "1" (ENOCLUE)
855 		 */
856 		if ((ap->a_vap->va_nlink == 2) &&
857 		    (vap->va_nlink == 2))
858 			;
859 		else if (ap->a_vap->va_nlink != 1) {
860 			if (vap->va_nlink == 1)
861 				ap->a_vap->va_nlink = 1;
862 			else
863 				ap->a_vap->va_nlink += vap->va_nlink;
864 		}
865 	}
866 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
867 	return (0);
868 }
869 
870 int
871 union_setattr(void *v)
872 {
873 	struct vop_setattr_args /* {
874 		struct vnode *a_vp;
875 		struct vattr *a_vap;
876 		kauth_cred_t a_cred;
877 	} */ *ap = v;
878 	struct vattr *vap = ap->a_vap;
879 	struct vnode *vp = ap->a_vp;
880 	struct union_node *un = VTOUNION(vp);
881 	int error;
882 
883   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
884 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
885 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
886 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
887 		return (EROFS);
888 	if (vap->va_size != VNOVAL) {
889  		switch (vp->v_type) {
890  		case VDIR:
891  			return (EISDIR);
892  		case VCHR:
893  		case VBLK:
894  		case VSOCK:
895  		case VFIFO:
896 			break;
897 		case VREG:
898 		case VLNK:
899  		default:
900 			/*
901 			 * Disallow write attempts if the filesystem is
902 			 * mounted read-only.
903 			 */
904 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
905 				return (EROFS);
906 		}
907 	}
908 
909 	/*
910 	 * Handle case of truncating lower object to zero size,
911 	 * by creating a zero length upper object.  This is to
912 	 * handle the case of open with O_TRUNC and O_CREAT.
913 	 */
914 	if ((un->un_uppervp == NULLVP) &&
915 	    /* assert(un->un_lowervp != NULLVP) */
916 	    (un->un_lowervp->v_type == VREG)) {
917 		error = union_copyup(un, (vap->va_size != 0),
918 						ap->a_cred, curlwp);
919 		if (error)
920 			return (error);
921 	}
922 
923 	/*
924 	 * Try to set attributes in upper layer,
925 	 * otherwise return read-only filesystem error.
926 	 */
927 	if (un->un_uppervp != NULLVP) {
928 		FIXUP(un);
929 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
930 		if ((error == 0) && (vap->va_size != VNOVAL))
931 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
932 	} else {
933 		error = EROFS;
934 	}
935 
936 	return (error);
937 }
938 
939 int
940 union_read(void *v)
941 {
942 	struct vop_read_args /* {
943 		struct vnode *a_vp;
944 		struct uio *a_uio;
945 		int  a_ioflag;
946 		kauth_cred_t a_cred;
947 	} */ *ap = v;
948 	int error;
949 	struct vnode *vp = OTHERVP(ap->a_vp);
950 	int dolock = (vp == LOWERVP(ap->a_vp));
951 
952 	if (dolock)
953 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
954 	else
955 		FIXUP(VTOUNION(ap->a_vp));
956 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
957 	if (dolock)
958 		VOP_UNLOCK(vp);
959 
960 	/*
961 	 * XXX
962 	 * perhaps the size of the underlying object has changed under
963 	 * our feet.  take advantage of the offset information present
964 	 * in the uio structure.
965 	 */
966 	if (error == 0) {
967 		struct union_node *un = VTOUNION(ap->a_vp);
968 		off_t cur = ap->a_uio->uio_offset;
969 
970 		if (vp == un->un_uppervp) {
971 			if (cur > un->un_uppersz)
972 				union_newsize(ap->a_vp, cur, VNOVAL);
973 		} else {
974 			if (cur > un->un_lowersz)
975 				union_newsize(ap->a_vp, VNOVAL, cur);
976 		}
977 	}
978 
979 	return (error);
980 }
981 
982 int
983 union_write(void *v)
984 {
985 	struct vop_read_args /* {
986 		struct vnode *a_vp;
987 		struct uio *a_uio;
988 		int  a_ioflag;
989 		kauth_cred_t a_cred;
990 	} */ *ap = v;
991 	int error;
992 	struct vnode *vp;
993 	struct union_node *un = VTOUNION(ap->a_vp);
994 
995 	vp = UPPERVP(ap->a_vp);
996 	if (vp == NULLVP)
997 		panic("union: missing upper layer in write");
998 
999 	FIXUP(un);
1000 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1001 
1002 	/*
1003 	 * the size of the underlying object may be changed by the
1004 	 * write.
1005 	 */
1006 	if (error == 0) {
1007 		off_t cur = ap->a_uio->uio_offset;
1008 
1009 		if (cur > un->un_uppersz)
1010 			union_newsize(ap->a_vp, cur, VNOVAL);
1011 	}
1012 
1013 	return (error);
1014 }
1015 
1016 int
1017 union_ioctl(void *v)
1018 {
1019 	struct vop_ioctl_args /* {
1020 		struct vnode *a_vp;
1021 		int  a_command;
1022 		void *a_data;
1023 		int  a_fflag;
1024 		kauth_cred_t a_cred;
1025 	} */ *ap = v;
1026 	struct vnode *ovp = OTHERVP(ap->a_vp);
1027 
1028 	ap->a_vp = ovp;
1029 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1030 }
1031 
1032 int
1033 union_poll(void *v)
1034 {
1035 	struct vop_poll_args /* {
1036 		struct vnode *a_vp;
1037 		int a_events;
1038 	} */ *ap = v;
1039 	struct vnode *ovp = OTHERVP(ap->a_vp);
1040 
1041 	ap->a_vp = ovp;
1042 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1043 }
1044 
1045 int
1046 union_revoke(void *v)
1047 {
1048 	struct vop_revoke_args /* {
1049 		struct vnode *a_vp;
1050 		int a_flags;
1051 		struct proc *a_p;
1052 	} */ *ap = v;
1053 	struct vnode *vp = ap->a_vp;
1054 
1055 	if (UPPERVP(vp))
1056 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1057 	if (LOWERVP(vp))
1058 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1059 	vgone(vp);	/* XXXAD?? */
1060 	return (0);
1061 }
1062 
1063 int
1064 union_mmap(void *v)
1065 {
1066 	struct vop_mmap_args /* {
1067 		struct vnode *a_vp;
1068 		vm_prot_t a_prot;
1069 		kauth_cred_t a_cred;
1070 	} */ *ap = v;
1071 	struct vnode *ovp = OTHERVP(ap->a_vp);
1072 
1073 	ap->a_vp = ovp;
1074 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1075 }
1076 
1077 int
1078 union_fsync(void *v)
1079 {
1080 	struct vop_fsync_args /* {
1081 		struct vnode *a_vp;
1082 		kauth_cred_t a_cred;
1083 		int  a_flags;
1084 		off_t offhi;
1085 		off_t offlo;
1086 	} */ *ap = v;
1087 	int error = 0;
1088 	struct vnode *targetvp;
1089 
1090 	/*
1091 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1092 	 * bother syncing the underlying vnodes, since (a) they'll be
1093 	 * fsync'ed when reclaimed and (b) we could deadlock if
1094 	 * they're locked; otherwise, pass it through to the
1095 	 * underlying layer.
1096 	 */
1097 	if (ap->a_flags & FSYNC_RECLAIM)
1098 		return 0;
1099 
1100 	targetvp = OTHERVP(ap->a_vp);
1101 
1102 	if (targetvp != NULLVP) {
1103 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1104 
1105 		if (dolock)
1106 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1107 		else
1108 			FIXUP(VTOUNION(ap->a_vp));
1109 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1110 			    ap->a_offlo, ap->a_offhi);
1111 		if (dolock)
1112 			VOP_UNLOCK(targetvp);
1113 	}
1114 
1115 	return (error);
1116 }
1117 
1118 int
1119 union_seek(void *v)
1120 {
1121 	struct vop_seek_args /* {
1122 		struct vnode *a_vp;
1123 		off_t  a_oldoff;
1124 		off_t  a_newoff;
1125 		kauth_cred_t a_cred;
1126 	} */ *ap = v;
1127 	struct vnode *ovp = OTHERVP(ap->a_vp);
1128 
1129 	ap->a_vp = ovp;
1130 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1131 }
1132 
1133 int
1134 union_remove(void *v)
1135 {
1136 	struct vop_remove_args /* {
1137 		struct vnode *a_dvp;
1138 		struct vnode *a_vp;
1139 		struct componentname *a_cnp;
1140 	} */ *ap = v;
1141 	int error;
1142 	struct union_node *dun = VTOUNION(ap->a_dvp);
1143 	struct union_node *un = VTOUNION(ap->a_vp);
1144 	struct componentname *cnp = ap->a_cnp;
1145 
1146 	if (dun->un_uppervp == NULLVP)
1147 		panic("union remove: null upper vnode");
1148 
1149 	if (un->un_uppervp != NULLVP) {
1150 		struct vnode *dvp = dun->un_uppervp;
1151 		struct vnode *vp = un->un_uppervp;
1152 
1153 		FIXUP(dun);
1154 		vref(dvp);
1155 		dun->un_flags |= UN_KLOCK;
1156 		vput(ap->a_dvp);
1157 		FIXUP(un);
1158 		vref(vp);
1159 		un->un_flags |= UN_KLOCK;
1160 		vput(ap->a_vp);
1161 
1162 		if (union_dowhiteout(un, cnp->cn_cred))
1163 			cnp->cn_flags |= DOWHITEOUT;
1164 		error = VOP_REMOVE(dvp, vp, cnp);
1165 		if (!error)
1166 			union_removed_upper(un);
1167 	} else {
1168 		FIXUP(dun);
1169 		error = union_mkwhiteout(
1170 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1171 			dun->un_uppervp, ap->a_cnp, un->un_path);
1172 		vput(ap->a_dvp);
1173 		vput(ap->a_vp);
1174 	}
1175 
1176 	return (error);
1177 }
1178 
1179 int
1180 union_link(void *v)
1181 {
1182 	struct vop_link_args /* {
1183 		struct vnode *a_dvp;
1184 		struct vnode *a_vp;
1185 		struct componentname *a_cnp;
1186 	} */ *ap = v;
1187 	int error = 0;
1188 	struct componentname *cnp = ap->a_cnp;
1189 	struct union_node *dun;
1190 	struct vnode *vp;
1191 	struct vnode *dvp;
1192 
1193 	dun = VTOUNION(ap->a_dvp);
1194 
1195 #ifdef DIAGNOSTIC
1196 	if (!(ap->a_cnp->cn_flags & LOCKPARENT)) {
1197 		printf("union_link called without LOCKPARENT set!\n");
1198 		error = EIO; /* need some error code for "caller is a bozo" */
1199 	} else
1200 #endif
1201 
1202 
1203 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1204 		vp = ap->a_vp;
1205 	} else {
1206 		struct union_node *un = VTOUNION(ap->a_vp);
1207 		if (un->un_uppervp == NULLVP) {
1208 			/*
1209 			 * Needs to be copied before we can link it.
1210 			 */
1211 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1212 			if (dun->un_uppervp == un->un_dirvp) {
1213 				dun->un_flags &= ~UN_ULOCK;
1214 				VOP_UNLOCK(dun->un_uppervp);
1215 			}
1216 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1217 			if (dun->un_uppervp == un->un_dirvp) {
1218 				/*
1219 				 * During copyup, we dropped the lock on the
1220 				 * dir and invalidated any saved namei lookup
1221 				 * state for the directory we'll be entering
1222 				 * the link in.  We need to re-run the lookup
1223 				 * in that directory to reset any state needed
1224 				 * for VOP_LINK.
1225 				 * Call relookup on the union-layer to reset
1226 				 * the state.
1227 				 */
1228 				vp  = NULLVP;
1229 				if (dun->un_uppervp == NULLVP)
1230 					 panic("union: null upperdvp?");
1231 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1232 				if (error) {
1233 					VOP_UNLOCK(ap->a_vp);
1234 					return EROFS;	/* ? */
1235 				}
1236 				if (vp != NULLVP) {
1237 					/*
1238 					 * The name we want to create has
1239 					 * mysteriously appeared (a race?)
1240 					 */
1241 					error = EEXIST;
1242 					VOP_UNLOCK(ap->a_vp);
1243 					vput(ap->a_dvp);
1244 					vput(vp);
1245 					return (error);
1246 				}
1247 			}
1248 			VOP_UNLOCK(ap->a_vp);
1249 		}
1250 		vp = un->un_uppervp;
1251 	}
1252 
1253 	dvp = dun->un_uppervp;
1254 	if (dvp == NULLVP)
1255 		error = EROFS;
1256 
1257 	if (error) {
1258 		vput(ap->a_dvp);
1259 		return (error);
1260 	}
1261 
1262 	FIXUP(dun);
1263 	vref(dvp);
1264 	dun->un_flags |= UN_KLOCK;
1265 	vput(ap->a_dvp);
1266 
1267 	return (VOP_LINK(dvp, vp, cnp));
1268 }
1269 
1270 int
1271 union_rename(void *v)
1272 {
1273 	struct vop_rename_args  /* {
1274 		struct vnode *a_fdvp;
1275 		struct vnode *a_fvp;
1276 		struct componentname *a_fcnp;
1277 		struct vnode *a_tdvp;
1278 		struct vnode *a_tvp;
1279 		struct componentname *a_tcnp;
1280 	} */ *ap = v;
1281 	int error;
1282 
1283 	struct vnode *fdvp = ap->a_fdvp;
1284 	struct vnode *fvp = ap->a_fvp;
1285 	struct vnode *tdvp = ap->a_tdvp;
1286 	struct vnode *tvp = ap->a_tvp;
1287 
1288 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1289 		struct union_node *un = VTOUNION(fdvp);
1290 		if (un->un_uppervp == NULLVP) {
1291 			/*
1292 			 * this should never happen in normal
1293 			 * operation but might if there was
1294 			 * a problem creating the top-level shadow
1295 			 * directory.
1296 			 */
1297 			error = EXDEV;
1298 			goto bad;
1299 		}
1300 
1301 		fdvp = un->un_uppervp;
1302 		vref(fdvp);
1303 	}
1304 
1305 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1306 		struct union_node *un = VTOUNION(fvp);
1307 		if (un->un_uppervp == NULLVP) {
1308 			/* XXX: should do a copyup */
1309 			error = EXDEV;
1310 			goto bad;
1311 		}
1312 
1313 		if (un->un_lowervp != NULLVP)
1314 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1315 
1316 		fvp = un->un_uppervp;
1317 		vref(fvp);
1318 	}
1319 
1320 	if (tdvp->v_op == union_vnodeop_p) {
1321 		struct union_node *un = VTOUNION(tdvp);
1322 		if (un->un_uppervp == NULLVP) {
1323 			/*
1324 			 * this should never happen in normal
1325 			 * operation but might if there was
1326 			 * a problem creating the top-level shadow
1327 			 * directory.
1328 			 */
1329 			error = EXDEV;
1330 			goto bad;
1331 		}
1332 
1333 		tdvp = un->un_uppervp;
1334 		vref(tdvp);
1335 		un->un_flags |= UN_KLOCK;
1336 		vput(ap->a_tdvp);
1337 	}
1338 
1339 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1340 		struct union_node *un = VTOUNION(tvp);
1341 
1342 		tvp = un->un_uppervp;
1343 		if (tvp != NULLVP) {
1344 			vref(tvp);
1345 			un->un_flags |= UN_KLOCK;
1346 		}
1347 		vput(ap->a_tvp);
1348 	}
1349 
1350 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1351 	goto out;
1352 
1353 bad:
1354 	vput(tdvp);
1355 	if (tvp != NULLVP)
1356 		vput(tvp);
1357 	vrele(fdvp);
1358 	vrele(fvp);
1359 
1360 out:
1361 	if (fdvp != ap->a_fdvp) {
1362 		vrele(ap->a_fdvp);
1363 	}
1364 	if (fvp != ap->a_fvp) {
1365 		vrele(ap->a_fvp);
1366 	}
1367 	return (error);
1368 }
1369 
1370 int
1371 union_mkdir(void *v)
1372 {
1373 	struct vop_mkdir_args /* {
1374 		struct vnode *a_dvp;
1375 		struct vnode **a_vpp;
1376 		struct componentname *a_cnp;
1377 		struct vattr *a_vap;
1378 	} */ *ap = v;
1379 	struct union_node *un = VTOUNION(ap->a_dvp);
1380 	struct vnode *dvp = un->un_uppervp;
1381 	struct componentname *cnp = ap->a_cnp;
1382 
1383 	if (dvp != NULLVP) {
1384 		int error;
1385 		struct vnode *vp;
1386 
1387 		FIXUP(un);
1388 		vref(dvp);
1389 		un->un_flags |= UN_KLOCK;
1390 		VOP_UNLOCK(ap->a_dvp);
1391 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1392 		if (error) {
1393 			vrele(ap->a_dvp);
1394 			return (error);
1395 		}
1396 
1397 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1398 				NULLVP, cnp, vp, NULLVP, 1);
1399 		if (error)
1400 			vput(vp);
1401 		vrele(ap->a_dvp);
1402 		return (error);
1403 	}
1404 
1405 	vput(ap->a_dvp);
1406 	return (EROFS);
1407 }
1408 
1409 int
1410 union_rmdir(void *v)
1411 {
1412 	struct vop_rmdir_args /* {
1413 		struct vnode *a_dvp;
1414 		struct vnode *a_vp;
1415 		struct componentname *a_cnp;
1416 	} */ *ap = v;
1417 	int error;
1418 	struct union_node *dun = VTOUNION(ap->a_dvp);
1419 	struct union_node *un = VTOUNION(ap->a_vp);
1420 	struct componentname *cnp = ap->a_cnp;
1421 
1422 	if (dun->un_uppervp == NULLVP)
1423 		panic("union rmdir: null upper vnode");
1424 
1425 	if (un->un_uppervp != NULLVP) {
1426 		struct vnode *dvp = dun->un_uppervp;
1427 		struct vnode *vp = un->un_uppervp;
1428 
1429 		FIXUP(dun);
1430 		vref(dvp);
1431 		dun->un_flags |= UN_KLOCK;
1432 		vput(ap->a_dvp);
1433 		FIXUP(un);
1434 		vref(vp);
1435 		un->un_flags |= UN_KLOCK;
1436 		vput(ap->a_vp);
1437 
1438 		if (union_dowhiteout(un, cnp->cn_cred))
1439 			cnp->cn_flags |= DOWHITEOUT;
1440 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1441 		if (!error)
1442 			union_removed_upper(un);
1443 	} else {
1444 		FIXUP(dun);
1445 		error = union_mkwhiteout(
1446 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1447 			dun->un_uppervp, ap->a_cnp, un->un_path);
1448 		vput(ap->a_dvp);
1449 		vput(ap->a_vp);
1450 	}
1451 
1452 	return (error);
1453 }
1454 
1455 int
1456 union_symlink(void *v)
1457 {
1458 	struct vop_symlink_args /* {
1459 		struct vnode *a_dvp;
1460 		struct vnode **a_vpp;
1461 		struct componentname *a_cnp;
1462 		struct vattr *a_vap;
1463 		char *a_target;
1464 	} */ *ap = v;
1465 	struct union_node *un = VTOUNION(ap->a_dvp);
1466 	struct vnode *dvp = un->un_uppervp;
1467 	struct componentname *cnp = ap->a_cnp;
1468 
1469 	if (dvp != NULLVP) {
1470 		int error;
1471 
1472 		FIXUP(un);
1473 		vref(dvp);
1474 		un->un_flags |= UN_KLOCK;
1475 		vput(ap->a_dvp);
1476 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1477 				    ap->a_target);
1478 		return (error);
1479 	}
1480 
1481 	vput(ap->a_dvp);
1482 	return (EROFS);
1483 }
1484 
1485 /*
1486  * union_readdir works in concert with getdirentries and
1487  * readdir(3) to provide a list of entries in the unioned
1488  * directories.  getdirentries is responsible for walking
1489  * down the union stack.  readdir(3) is responsible for
1490  * eliminating duplicate names from the returned data stream.
1491  */
1492 int
1493 union_readdir(void *v)
1494 {
1495 	struct vop_readdir_args /* {
1496 		struct vnodeop_desc *a_desc;
1497 		struct vnode *a_vp;
1498 		struct uio *a_uio;
1499 		kauth_cred_t a_cred;
1500 		int *a_eofflag;
1501 		u_long *a_cookies;
1502 		int a_ncookies;
1503 	} */ *ap = v;
1504 	struct union_node *un = VTOUNION(ap->a_vp);
1505 	struct vnode *uvp = un->un_uppervp;
1506 
1507 	if (uvp == NULLVP)
1508 		return (0);
1509 
1510 	FIXUP(un);
1511 	ap->a_vp = uvp;
1512 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1513 }
1514 
1515 int
1516 union_readlink(void *v)
1517 {
1518 	struct vop_readlink_args /* {
1519 		struct vnode *a_vp;
1520 		struct uio *a_uio;
1521 		kauth_cred_t a_cred;
1522 	} */ *ap = v;
1523 	int error;
1524 	struct vnode *vp = OTHERVP(ap->a_vp);
1525 	int dolock = (vp == LOWERVP(ap->a_vp));
1526 
1527 	if (dolock)
1528 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1529 	else
1530 		FIXUP(VTOUNION(ap->a_vp));
1531 	ap->a_vp = vp;
1532 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1533 	if (dolock)
1534 		VOP_UNLOCK(vp);
1535 
1536 	return (error);
1537 }
1538 
1539 int
1540 union_abortop(void *v)
1541 {
1542 	struct vop_abortop_args /* {
1543 		struct vnode *a_dvp;
1544 		struct componentname *a_cnp;
1545 	} */ *ap = v;
1546 	int error;
1547 	struct vnode *vp = OTHERVP(ap->a_dvp);
1548 	struct union_node *un = VTOUNION(ap->a_dvp);
1549 	int islocked = un->un_flags & UN_LOCKED;
1550 	int dolock = (vp == LOWERVP(ap->a_dvp));
1551 
1552 	if (islocked) {
1553 		if (dolock)
1554 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1555 		else
1556 			FIXUP(VTOUNION(ap->a_dvp));
1557 	}
1558 	ap->a_dvp = vp;
1559 	error = VCALL(vp, VOFFSET(vop_abortop), ap);
1560 	if (islocked && dolock)
1561 		VOP_UNLOCK(vp);
1562 
1563 	return (error);
1564 }
1565 
1566 int
1567 union_inactive(void *v)
1568 {
1569 	struct vop_inactive_args /* {
1570 		const struct vnodeop_desc *a_desc;
1571 		struct vnode *a_vp;
1572 		bool *a_recycle;
1573 	} */ *ap = v;
1574 	struct vnode *vp = ap->a_vp;
1575 	struct union_node *un = VTOUNION(vp);
1576 	struct vnode **vpp;
1577 
1578 	/*
1579 	 * Do nothing (and _don't_ bypass).
1580 	 * Wait to vrele lowervp until reclaim,
1581 	 * so that until then our union_node is in the
1582 	 * cache and reusable.
1583 	 *
1584 	 * NEEDSWORK: Someday, consider inactive'ing
1585 	 * the lowervp and then trying to reactivate it
1586 	 * with capabilities (v_id)
1587 	 * like they do in the name lookup cache code.
1588 	 * That's too much work for now.
1589 	 */
1590 
1591 	if (un->un_dircache != 0) {
1592 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1593 			vrele(*vpp);
1594 		free(un->un_dircache, M_TEMP);
1595 		un->un_dircache = 0;
1596 	}
1597 
1598 	*ap->a_recycle = ((un->un_flags & UN_CACHED) == 0);
1599 	VOP_UNLOCK(vp);
1600 
1601 	return (0);
1602 }
1603 
1604 int
1605 union_reclaim(void *v)
1606 {
1607 	struct vop_reclaim_args /* {
1608 		struct vnode *a_vp;
1609 	} */ *ap = v;
1610 
1611 	union_freevp(ap->a_vp);
1612 
1613 	return (0);
1614 }
1615 
1616 int
1617 union_lock(void *v)
1618 {
1619 	struct vop_lock_args /* {
1620 		struct vnode *a_vp;
1621 		int a_flags;
1622 	} */ *ap = v;
1623 	struct vnode *vp = ap->a_vp;
1624 	int flags = ap->a_flags;
1625 	struct union_node *un;
1626 	int error;
1627 
1628 	/* XXX unionfs can't handle shared locks yet */
1629 	if ((flags & LK_SHARED) != 0) {
1630 		flags = (flags & ~LK_SHARED) | LK_EXCLUSIVE;
1631 	}
1632 
1633 	/*
1634 	 * Need to do real lockmgr-style locking here.
1635 	 * in the mean time, draining won't work quite right,
1636 	 * which could lead to a few race conditions.
1637 	 * the following test was here, but is not quite right, we
1638 	 * still need to take the lock:
1639 	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1640 		return (0);
1641 	 */
1642 
1643 	un = VTOUNION(vp);
1644 start:
1645 	un = VTOUNION(vp);
1646 
1647 	if (un->un_uppervp != NULLVP) {
1648 		if (((un->un_flags & UN_ULOCK) == 0) &&
1649 		    (vp->v_usecount != 0)) {
1650 			/*
1651 			 * We MUST always use the order of: take upper
1652 			 * vp lock, manipulate union node flags, drop
1653 			 * upper vp lock.  This code must not be an
1654 			 * exception.
1655 			 */
1656 			error = vn_lock(un->un_uppervp, flags);
1657 			if (error)
1658 				return (error);
1659 			un->un_flags |= UN_ULOCK;
1660 		}
1661 #ifdef DIAGNOSTIC
1662 		if (un->un_flags & UN_KLOCK) {
1663 			vprint("union: dangling klock", vp);
1664 			panic("union: dangling upper lock (%p)", vp);
1665 		}
1666 #endif
1667 	}
1668 
1669 	/* XXX ignores LK_NOWAIT */
1670 	if (un->un_flags & UN_LOCKED) {
1671 #ifdef DIAGNOSTIC
1672 		if (curproc && un->un_pid == curproc->p_pid &&
1673 			    un->un_pid > -1 && curproc->p_pid > -1)
1674 			panic("union: locking against myself");
1675 #endif
1676 		un->un_flags |= UN_WANTED;
1677 		tsleep(&un->un_flags, PINOD, "unionlk2", 0);
1678 		goto start;
1679 	}
1680 
1681 #ifdef DIAGNOSTIC
1682 	if (curproc)
1683 		un->un_pid = curproc->p_pid;
1684 	else
1685 		un->un_pid = -1;
1686 #endif
1687 
1688 	un->un_flags |= UN_LOCKED;
1689 	return (0);
1690 }
1691 
1692 /*
1693  * When operations want to vput() a union node yet retain a lock on
1694  * the upper vnode (say, to do some further operations like link(),
1695  * mkdir(), ...), they set UN_KLOCK on the union node, then call
1696  * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1697  * unlocks the union node (leaving the upper vnode alone), clears the
1698  * KLOCK flag, and then returns to vput().  The caller then does whatever
1699  * is left to do with the upper vnode, and ensures that it gets unlocked.
1700  *
1701  * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1702  */
1703 int
1704 union_unlock(void *v)
1705 {
1706 	struct vop_unlock_args /* {
1707 		struct vnode *a_vp;
1708 		int a_flags;
1709 	} */ *ap = v;
1710 	struct union_node *un = VTOUNION(ap->a_vp);
1711 
1712 #ifdef DIAGNOSTIC
1713 	if ((un->un_flags & UN_LOCKED) == 0)
1714 		panic("union: unlock unlocked node");
1715 	if (curproc && un->un_pid != curproc->p_pid &&
1716 			curproc->p_pid > -1 && un->un_pid > -1)
1717 		panic("union: unlocking other process's union node");
1718 #endif
1719 
1720 	un->un_flags &= ~UN_LOCKED;
1721 
1722 	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1723 		VOP_UNLOCK(un->un_uppervp);
1724 
1725 	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1726 
1727 	if (un->un_flags & UN_WANTED) {
1728 		un->un_flags &= ~UN_WANTED;
1729 		wakeup( &un->un_flags);
1730 	}
1731 
1732 #ifdef DIAGNOSTIC
1733 	un->un_pid = 0;
1734 #endif
1735 
1736 	return (0);
1737 }
1738 
1739 int
1740 union_bmap(void *v)
1741 {
1742 	struct vop_bmap_args /* {
1743 		struct vnode *a_vp;
1744 		daddr_t  a_bn;
1745 		struct vnode **a_vpp;
1746 		daddr_t *a_bnp;
1747 		int *a_runp;
1748 	} */ *ap = v;
1749 	int error;
1750 	struct vnode *vp = OTHERVP(ap->a_vp);
1751 	int dolock = (vp == LOWERVP(ap->a_vp));
1752 
1753 	if (dolock)
1754 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1755 	else
1756 		FIXUP(VTOUNION(ap->a_vp));
1757 	ap->a_vp = vp;
1758 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1759 	if (dolock)
1760 		VOP_UNLOCK(vp);
1761 
1762 	return (error);
1763 }
1764 
1765 int
1766 union_print(void *v)
1767 {
1768 	struct vop_print_args /* {
1769 		struct vnode *a_vp;
1770 	} */ *ap = v;
1771 	struct vnode *vp = ap->a_vp;
1772 
1773 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1774 			vp, UPPERVP(vp), LOWERVP(vp));
1775 	if (UPPERVP(vp) != NULLVP)
1776 		vprint("union: upper", UPPERVP(vp));
1777 	if (LOWERVP(vp) != NULLVP)
1778 		vprint("union: lower", LOWERVP(vp));
1779 	if (VTOUNION(vp)->un_dircache) {
1780 		struct vnode **vpp;
1781 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1782 			vprint("dircache:", *vpp);
1783 	}
1784 
1785 	return (0);
1786 }
1787 
1788 int
1789 union_islocked(void *v)
1790 {
1791 	struct vop_islocked_args /* {
1792 		struct vnode *a_vp;
1793 	} */ *ap = v;
1794 
1795 	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? LK_EXCLUSIVE : 0);
1796 }
1797 
1798 int
1799 union_pathconf(void *v)
1800 {
1801 	struct vop_pathconf_args /* {
1802 		struct vnode *a_vp;
1803 		int a_name;
1804 		int *a_retval;
1805 	} */ *ap = v;
1806 	int error;
1807 	struct vnode *vp = OTHERVP(ap->a_vp);
1808 	int dolock = (vp == LOWERVP(ap->a_vp));
1809 
1810 	if (dolock)
1811 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1812 	else
1813 		FIXUP(VTOUNION(ap->a_vp));
1814 	ap->a_vp = vp;
1815 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1816 	if (dolock)
1817 		VOP_UNLOCK(vp);
1818 
1819 	return (error);
1820 }
1821 
1822 int
1823 union_advlock(void *v)
1824 {
1825 	struct vop_advlock_args /* {
1826 		struct vnode *a_vp;
1827 		void *a_id;
1828 		int  a_op;
1829 		struct flock *a_fl;
1830 		int  a_flags;
1831 	} */ *ap = v;
1832 	struct vnode *ovp = OTHERVP(ap->a_vp);
1833 
1834 	ap->a_vp = ovp;
1835 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1836 }
1837 
1838 
1839 /*
1840  * XXX - vop_strategy must be hand coded because it has no
1841  * vnode in its arguments.
1842  * This goes away with a merged VM/buffer cache.
1843  */
1844 int
1845 union_strategy(void *v)
1846 {
1847 	struct vop_strategy_args /* {
1848 		struct vnode *a_vp;
1849 		struct buf *a_bp;
1850 	} */ *ap = v;
1851 	struct vnode *ovp = OTHERVP(ap->a_vp);
1852 	struct buf *bp = ap->a_bp;
1853 
1854 #ifdef DIAGNOSTIC
1855 	if (ovp == NULLVP)
1856 		panic("union_strategy: nil vp");
1857 	if (((bp->b_flags & B_READ) == 0) &&
1858 	    (ovp == LOWERVP(bp->b_vp)))
1859 		panic("union_strategy: writing to lowervp");
1860 #endif
1861 
1862 	return (VOP_STRATEGY(ovp, bp));
1863 }
1864 
1865 int
1866 union_getpages(void *v)
1867 {
1868 	struct vop_getpages_args /* {
1869 		struct vnode *a_vp;
1870 		voff_t a_offset;
1871 		struct vm_page **a_m;
1872 		int *a_count;
1873 		int a_centeridx;
1874 		vm_prot_t a_access_type;
1875 		int a_advice;
1876 		int a_flags;
1877 	} */ *ap = v;
1878 	struct vnode *vp = ap->a_vp;
1879 	int error;
1880 
1881 	/*
1882 	 * just pass the request on to the underlying layer.
1883 	 */
1884 
1885 	if (ap->a_flags & PGO_LOCKED) {
1886 		return EBUSY;
1887 	}
1888 	ap->a_vp = OTHERVP(vp);
1889 	mutex_exit(&vp->v_interlock);
1890 	mutex_enter(&ap->a_vp->v_interlock);
1891 	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1892 	return error;
1893 }
1894 
1895 int
1896 union_putpages(void *v)
1897 {
1898 	struct vop_putpages_args /* {
1899 		struct vnode *a_vp;
1900 		voff_t a_offlo;
1901 		voff_t a_offhi;
1902 		int a_flags;
1903 	} */ *ap = v;
1904 	struct vnode *vp = ap->a_vp;
1905 	int error;
1906 
1907 	/*
1908 	 * just pass the request on to the underlying layer.
1909 	 */
1910 
1911 	ap->a_vp = OTHERVP(vp);
1912 	mutex_exit(&vp->v_interlock);
1913 	if (ap->a_flags & PGO_RECLAIM) {
1914 		return 0;
1915 	}
1916 	mutex_enter(&ap->a_vp->v_interlock);
1917 	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1918 	return error;
1919 }
1920 
1921 int
1922 union_kqfilter(void *v)
1923 {
1924 	struct vop_kqfilter_args /* {
1925 		struct vnode	*a_vp;
1926 		struct knote	*a_kn;
1927 	} */ *ap = v;
1928 	int error;
1929 
1930 	/*
1931 	 * We watch either the upper layer file (if it already exists),
1932 	 * or the lower layer one. If there is lower layer file only
1933 	 * at this moment, we will keep watching that lower layer file
1934 	 * even if upper layer file would be created later on.
1935 	 */
1936 	if (UPPERVP(ap->a_vp))
1937 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1938 	else if (LOWERVP(ap->a_vp))
1939 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1940 	else {
1941 		/* panic? */
1942 		error = EOPNOTSUPP;
1943 	}
1944 
1945 	return (error);
1946 }
1947