xref: /dflybsd-src/sys/kern/vfs_syscalls.c (revision ee173d09dc3fba168bf56a31bffd0468b38f06ef)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/sysent.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mountctl.h>
46 #include <sys/sysproto.h>
47 #include <sys/filedesc.h>
48 #include <sys/kernel.h>
49 #include <sys/fcntl.h>
50 #include <sys/file.h>
51 #include <sys/linker.h>
52 #include <sys/stat.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/priv.h>
57 #include <sys/jail.h>
58 #include <sys/namei.h>
59 #include <sys/nlookup.h>
60 #include <sys/dirent.h>
61 #include <sys/extattr.h>
62 #include <sys/spinlock.h>
63 #include <sys/kern_syscall.h>
64 #include <sys/objcache.h>
65 #include <sys/sysctl.h>
66 
67 #include <sys/buf2.h>
68 #include <sys/file2.h>
69 #include <sys/spinlock2.h>
70 #include <sys/mplock2.h>
71 
72 #include <vm/vm.h>
73 #include <vm/vm_object.h>
74 #include <vm/vm_page.h>
75 
76 #include <machine/limits.h>
77 #include <machine/stdarg.h>
78 
79 #include <vfs/union/union.h>
80 
81 static void mount_warning(struct mount *mp, const char *ctl, ...)
82 		__printflike(2, 3);
83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb);
84 static int checkvp_chdir (struct vnode *vn, struct thread *td);
85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch);
86 static int chroot_refuse_vdir_fds (struct filedesc *fdp);
87 static int chroot_visible_mnt(struct mount *mp, struct proc *p);
88 static int getutimes (const struct timeval *, struct timespec *);
89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t);
90 static int setfmode (struct vnode *, int);
91 static int setfflags (struct vnode *, int);
92 static int setutimes (struct vnode *, struct vattr *,
93 			const struct timespec *, int);
94 static int	usermount = 0;	/* if 1, non-root can mount fs. */
95 
96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *);
97 
98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
99     "Allow non-root users to mount filesystems");
100 
101 /*
102  * Virtual File System System Calls
103  */
104 
105 /*
106  * Mount a file system.
107  *
108  * mount_args(char *type, char *path, int flags, caddr_t data)
109  *
110  * MPALMOSTSAFE
111  */
112 int
113 sys_mount(struct mount_args *uap)
114 {
115 	struct thread *td = curthread;
116 	struct vnode *vp;
117 	struct nchandle nch;
118 	struct mount *mp, *nullmp;
119 	struct vfsconf *vfsp;
120 	int error, flag = 0, flag2 = 0;
121 	int hasmount;
122 	struct vattr va;
123 	struct nlookupdata nd;
124 	char fstypename[MFSNAMELEN];
125 	struct ucred *cred;
126 
127 	cred = td->td_ucred;
128 	if (jailed(cred)) {
129 		error = EPERM;
130 		goto done;
131 	}
132 	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
133 		goto done;
134 
135 	/*
136 	 * Do not allow NFS export by non-root users.
137 	 */
138 	if (uap->flags & MNT_EXPORTED) {
139 		error = priv_check(td, PRIV_ROOT);
140 		if (error)
141 			goto done;
142 	}
143 	/*
144 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
145 	 */
146 	if (priv_check(td, PRIV_ROOT))
147 		uap->flags |= MNT_NOSUID | MNT_NODEV;
148 
149 	/*
150 	 * Lookup the requested path and extract the nch and vnode.
151 	 */
152 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
153 	if (error == 0) {
154 		if ((error = nlookup(&nd)) == 0) {
155 			if (nd.nl_nch.ncp->nc_vp == NULL)
156 				error = ENOENT;
157 		}
158 	}
159 	if (error) {
160 		nlookup_done(&nd);
161 		goto done;
162 	}
163 
164 	/*
165 	 * If the target filesystem is resolved via a nullfs mount, then
166 	 * nd.nl_nch.mount will be pointing to the nullfs mount structure
167 	 * instead of the target file system. We need it in case we are
168 	 * doing an update.
169 	 */
170 	nullmp = nd.nl_nch.mount;
171 
172 	/*
173 	 * Extract the locked+refd ncp and cleanup the nd structure
174 	 */
175 	nch = nd.nl_nch;
176 	cache_zero(&nd.nl_nch);
177 	nlookup_done(&nd);
178 
179 	if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
180 	    (mp = cache_findmount(&nch)) != NULL) {
181 		cache_dropmount(mp);
182 		hasmount = 1;
183 	} else {
184 		hasmount = 0;
185 	}
186 
187 
188 	/*
189 	 * now we have the locked ref'd nch and unreferenced vnode.
190 	 */
191 	vp = nch.ncp->nc_vp;
192 	if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
193 		cache_put(&nch);
194 		goto done;
195 	}
196 	cache_unlock(&nch);
197 
198 	/*
199 	 * Extract the file system type. We need to know this early, to take
200 	 * appropriate actions if we are dealing with a nullfs.
201 	 */
202         if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
203                 cache_drop(&nch);
204                 vput(vp);
205 		goto done;
206         }
207 
208 	/*
209 	 * Now we have an unlocked ref'd nch and a locked ref'd vp
210 	 */
211 	if (uap->flags & MNT_UPDATE) {
212 		if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
213 			cache_drop(&nch);
214 			vput(vp);
215 			error = EINVAL;
216 			goto done;
217 		}
218 
219 		if (strncmp(fstypename, "null", 5) == 0) {
220 			KKASSERT(nullmp);
221 			mp = nullmp;
222 		} else {
223 			mp = vp->v_mount;
224 		}
225 
226 		flag = mp->mnt_flag;
227 		flag2 = mp->mnt_kern_flag;
228 		/*
229 		 * We only allow the filesystem to be reloaded if it
230 		 * is currently mounted read-only.
231 		 */
232 		if ((uap->flags & MNT_RELOAD) &&
233 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
234 			cache_drop(&nch);
235 			vput(vp);
236 			error = EOPNOTSUPP;	/* Needs translation */
237 			goto done;
238 		}
239 		/*
240 		 * Only root, or the user that did the original mount is
241 		 * permitted to update it.
242 		 */
243 		if (mp->mnt_stat.f_owner != cred->cr_uid &&
244 		    (error = priv_check(td, PRIV_ROOT))) {
245 			cache_drop(&nch);
246 			vput(vp);
247 			goto done;
248 		}
249 		if (vfs_busy(mp, LK_NOWAIT)) {
250 			cache_drop(&nch);
251 			vput(vp);
252 			error = EBUSY;
253 			goto done;
254 		}
255 		if (hasmount) {
256 			cache_drop(&nch);
257 			vfs_unbusy(mp);
258 			vput(vp);
259 			error = EBUSY;
260 			goto done;
261 		}
262 		mp->mnt_flag |=
263 		    uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
264 		lwkt_gettoken(&mp->mnt_token);
265 		vn_unlock(vp);
266 		goto update;
267 	}
268 
269 	/*
270 	 * If the user is not root, ensure that they own the directory
271 	 * onto which we are attempting to mount.
272 	 */
273 	if ((error = VOP_GETATTR(vp, &va)) ||
274 	    (va.va_uid != cred->cr_uid &&
275 	     (error = priv_check(td, PRIV_ROOT)))) {
276 		cache_drop(&nch);
277 		vput(vp);
278 		goto done;
279 	}
280 	if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
281 		cache_drop(&nch);
282 		vput(vp);
283 		goto done;
284 	}
285 	if (vp->v_type != VDIR) {
286 		cache_drop(&nch);
287 		vput(vp);
288 		error = ENOTDIR;
289 		goto done;
290 	}
291 	if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
292 		cache_drop(&nch);
293 		vput(vp);
294 		error = EPERM;
295 		goto done;
296 	}
297 	vfsp = vfsconf_find_by_name(fstypename);
298 	if (vfsp == NULL) {
299 		linker_file_t lf;
300 
301 		/* Only load modules for root (very important!) */
302 		if ((error = priv_check(td, PRIV_ROOT)) != 0) {
303 			cache_drop(&nch);
304 			vput(vp);
305 			goto done;
306 		}
307 		error = linker_load_file(fstypename, &lf);
308 		if (error || lf == NULL) {
309 			cache_drop(&nch);
310 			vput(vp);
311 			if (lf == NULL)
312 				error = ENODEV;
313 			goto done;
314 		}
315 		lf->userrefs++;
316 		/* lookup again, see if the VFS was loaded */
317 		vfsp = vfsconf_find_by_name(fstypename);
318 		if (vfsp == NULL) {
319 			lf->userrefs--;
320 			linker_file_unload(lf);
321 			cache_drop(&nch);
322 			vput(vp);
323 			error = ENODEV;
324 			goto done;
325 		}
326 	}
327 	if (hasmount) {
328 		cache_drop(&nch);
329 		vput(vp);
330 		error = EBUSY;
331 		goto done;
332 	}
333 
334 	/*
335 	 * Allocate and initialize the filesystem.
336 	 */
337 	mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
338 	mount_init(mp);
339 	vfs_busy(mp, LK_NOWAIT);
340 	mp->mnt_op = vfsp->vfc_vfsops;
341 	mp->mnt_vfc = vfsp;
342 	vfsp->vfc_refcount++;
343 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
344 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
345 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
346 	mp->mnt_stat.f_owner = cred->cr_uid;
347 	lwkt_gettoken(&mp->mnt_token);
348 	vn_unlock(vp);
349 update:
350 	/*
351 	 * (per-mount token acquired at this point)
352 	 *
353 	 * Set the mount level flags.
354 	 */
355 	if (uap->flags & MNT_RDONLY)
356 		mp->mnt_flag |= MNT_RDONLY;
357 	else if (mp->mnt_flag & MNT_RDONLY)
358 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
359 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
360 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
361 	    MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
362 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
363 	mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |
364 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
365 	    MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
366 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
367 	/*
368 	 * Mount the filesystem.
369 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
370 	 * get.
371 	 */
372 	error = VFS_MOUNT(mp, uap->path, uap->data, cred);
373 	if (mp->mnt_flag & MNT_UPDATE) {
374 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
375 			mp->mnt_flag &= ~MNT_RDONLY;
376 		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
377 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
378 		if (error) {
379 			mp->mnt_flag = flag;
380 			mp->mnt_kern_flag = flag2;
381 		}
382 		lwkt_reltoken(&mp->mnt_token);
383 		vfs_unbusy(mp);
384 		vrele(vp);
385 		cache_drop(&nch);
386 		goto done;
387 	}
388 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
389 
390 	/*
391 	 * Put the new filesystem on the mount list after root.  The mount
392 	 * point gets its own mnt_ncmountpt (unless the VFS already set one
393 	 * up) which represents the root of the mount.  The lookup code
394 	 * detects the mount point going forward and checks the root of
395 	 * the mount going backwards.
396 	 *
397 	 * It is not necessary to invalidate or purge the vnode underneath
398 	 * because elements under the mount will be given their own glue
399 	 * namecache record.
400 	 */
401 	if (!error) {
402 		if (mp->mnt_ncmountpt.ncp == NULL) {
403 			/*
404 			 * allocate, then unlock, but leave the ref intact
405 			 */
406 			cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
407 			cache_unlock(&mp->mnt_ncmountpt);
408 		}
409 		mp->mnt_ncmounton = nch;		/* inherits ref */
410 		nch.ncp->nc_flag |= NCF_ISMOUNTPT;
411 		cache_ismounting(mp);
412 
413 		mountlist_insert(mp, MNTINS_LAST);
414 		vn_unlock(vp);
415 		checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
416 		error = vfs_allocate_syncvnode(mp);
417 		lwkt_reltoken(&mp->mnt_token);
418 		vfs_unbusy(mp);
419 		error = VFS_START(mp, 0);
420 		vrele(vp);
421 	} else {
422 		vn_syncer_thr_stop(mp);
423 		vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
424 		vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
425 		vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
426 		vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
427 		vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
428 		mp->mnt_vfc->vfc_refcount--;
429 		lwkt_reltoken(&mp->mnt_token);
430 		vfs_unbusy(mp);
431 		kfree(mp, M_MOUNT);
432 		cache_drop(&nch);
433 		vput(vp);
434 	}
435 done:
436 	return (error);
437 }
438 
439 /*
440  * Scan all active processes to see if any of them have a current
441  * or root directory onto which the new filesystem has just been
442  * mounted. If so, replace them with the new mount point.
443  *
444  * Both old_nch and new_nch are ref'd on call but not locked.
445  * new_nch must be temporarily locked so it can be associated with the
446  * vnode representing the root of the mount point.
447  */
448 struct checkdirs_info {
449 	struct nchandle old_nch;
450 	struct nchandle new_nch;
451 	struct vnode *old_vp;
452 	struct vnode *new_vp;
453 };
454 
455 static int checkdirs_callback(struct proc *p, void *data);
456 
457 static void
458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch)
459 {
460 	struct checkdirs_info info;
461 	struct vnode *olddp;
462 	struct vnode *newdp;
463 	struct mount *mp;
464 
465 	/*
466 	 * If the old mount point's vnode has a usecount of 1, it is not
467 	 * being held as a descriptor anywhere.
468 	 */
469 	olddp = old_nch->ncp->nc_vp;
470 	if (olddp == NULL || VREFCNT(olddp) == 1)
471 		return;
472 
473 	/*
474 	 * Force the root vnode of the new mount point to be resolved
475 	 * so we can update any matching processes.
476 	 */
477 	mp = new_nch->mount;
478 	if (VFS_ROOT(mp, &newdp))
479 		panic("mount: lost mount");
480 	vn_unlock(newdp);
481 	cache_lock(new_nch);
482 	vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
483 	cache_setunresolved(new_nch);
484 	cache_setvp(new_nch, newdp);
485 	cache_unlock(new_nch);
486 
487 	/*
488 	 * Special handling of the root node
489 	 */
490 	if (rootvnode == olddp) {
491 		vref(newdp);
492 		vfs_cache_setroot(newdp, cache_hold(new_nch));
493 	}
494 
495 	/*
496 	 * Pass newdp separately so the callback does not have to access
497 	 * it via new_nch->ncp->nc_vp.
498 	 */
499 	info.old_nch = *old_nch;
500 	info.new_nch = *new_nch;
501 	info.new_vp = newdp;
502 	allproc_scan(checkdirs_callback, &info);
503 	vput(newdp);
504 }
505 
506 /*
507  * NOTE: callback is not MP safe because the scanned process's filedesc
508  * structure can be ripped out from under us, amoung other things.
509  */
510 static int
511 checkdirs_callback(struct proc *p, void *data)
512 {
513 	struct checkdirs_info *info = data;
514 	struct filedesc *fdp;
515 	struct nchandle ncdrop1;
516 	struct nchandle ncdrop2;
517 	struct vnode *vprele1;
518 	struct vnode *vprele2;
519 
520 	if ((fdp = p->p_fd) != NULL) {
521 		cache_zero(&ncdrop1);
522 		cache_zero(&ncdrop2);
523 		vprele1 = NULL;
524 		vprele2 = NULL;
525 
526 		/*
527 		 * MPUNSAFE - XXX fdp can be pulled out from under a
528 		 * foreign process.
529 		 *
530 		 * A shared filedesc is ok, we don't have to copy it
531 		 * because we are making this change globally.
532 		 */
533 		spin_lock(&fdp->fd_spin);
534 		if (fdp->fd_ncdir.mount == info->old_nch.mount &&
535 		    fdp->fd_ncdir.ncp == info->old_nch.ncp) {
536 			vprele1 = fdp->fd_cdir;
537 			vref(info->new_vp);
538 			fdp->fd_cdir = info->new_vp;
539 			ncdrop1 = fdp->fd_ncdir;
540 			cache_copy(&info->new_nch, &fdp->fd_ncdir);
541 		}
542 		if (fdp->fd_nrdir.mount == info->old_nch.mount &&
543 		    fdp->fd_nrdir.ncp == info->old_nch.ncp) {
544 			vprele2 = fdp->fd_rdir;
545 			vref(info->new_vp);
546 			fdp->fd_rdir = info->new_vp;
547 			ncdrop2 = fdp->fd_nrdir;
548 			cache_copy(&info->new_nch, &fdp->fd_nrdir);
549 		}
550 		spin_unlock(&fdp->fd_spin);
551 		if (ncdrop1.ncp)
552 			cache_drop(&ncdrop1);
553 		if (ncdrop2.ncp)
554 			cache_drop(&ncdrop2);
555 		if (vprele1)
556 			vrele(vprele1);
557 		if (vprele2)
558 			vrele(vprele2);
559 	}
560 	return(0);
561 }
562 
563 /*
564  * Unmount a file system.
565  *
566  * Note: unmount takes a path to the vnode mounted on as argument,
567  * not special file (as before).
568  *
569  * umount_args(char *path, int flags)
570  *
571  * MPALMOSTSAFE
572  */
573 int
574 sys_unmount(struct unmount_args *uap)
575 {
576 	struct thread *td = curthread;
577 	struct proc *p __debugvar = td->td_proc;
578 	struct mount *mp = NULL;
579 	struct nlookupdata nd;
580 	int error;
581 
582 	KKASSERT(p);
583 	get_mplock();
584 	if (td->td_ucred->cr_prison != NULL) {
585 		error = EPERM;
586 		goto done;
587 	}
588 	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
589 		goto done;
590 
591 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
592 	if (error == 0)
593 		error = nlookup(&nd);
594 	if (error)
595 		goto out;
596 
597 	mp = nd.nl_nch.mount;
598 
599 	/*
600 	 * Only root, or the user that did the original mount is
601 	 * permitted to unmount this filesystem.
602 	 */
603 	if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) &&
604 	    (error = priv_check(td, PRIV_ROOT)))
605 		goto out;
606 
607 	/*
608 	 * Don't allow unmounting the root file system.
609 	 */
610 	if (mp->mnt_flag & MNT_ROOTFS) {
611 		error = EINVAL;
612 		goto out;
613 	}
614 
615 	/*
616 	 * Must be the root of the filesystem
617 	 */
618 	if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
619 		error = EINVAL;
620 		goto out;
621 	}
622 
623 out:
624 	nlookup_done(&nd);
625 	if (error == 0)
626 		error = dounmount(mp, uap->flags);
627 done:
628 	rel_mplock();
629 	return (error);
630 }
631 
632 /*
633  * Do the actual file system unmount.
634  */
635 static int
636 dounmount_interlock(struct mount *mp)
637 {
638 	if (mp->mnt_kern_flag & MNTK_UNMOUNT)
639 		return (EBUSY);
640 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
641 	return(0);
642 }
643 
644 static int
645 unmount_allproc_cb(struct proc *p, void *arg)
646 {
647 	struct mount *mp;
648 
649 	if (p->p_textnch.ncp == NULL)
650 		return 0;
651 
652 	mp = (struct mount *)arg;
653 	if (p->p_textnch.mount == mp)
654 		cache_drop(&p->p_textnch);
655 
656 	return 0;
657 }
658 
659 int
660 dounmount(struct mount *mp, int flags)
661 {
662 	struct namecache *ncp;
663 	struct nchandle nch;
664 	struct vnode *vp;
665 	int error;
666 	int async_flag;
667 	int lflags;
668 	int freeok = 1;
669 	int retry;
670 
671 	lwkt_gettoken(&mp->mnt_token);
672 	/*
673 	 * Exclusive access for unmounting purposes
674 	 */
675 	if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
676 		goto out;
677 
678 	/*
679 	 * Allow filesystems to detect that a forced unmount is in progress.
680 	 */
681 	if (flags & MNT_FORCE)
682 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
683 	lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK);
684 	error = lockmgr(&mp->mnt_lock, lflags);
685 	if (error) {
686 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
687 		if (mp->mnt_kern_flag & MNTK_MWAIT) {
688 			mp->mnt_kern_flag &= ~MNTK_MWAIT;
689 			wakeup(mp);
690 		}
691 		goto out;
692 	}
693 
694 	if (mp->mnt_flag & MNT_EXPUBLIC)
695 		vfs_setpublicfs(NULL, NULL, NULL);
696 
697 	vfs_msync(mp, MNT_WAIT);
698 	async_flag = mp->mnt_flag & MNT_ASYNC;
699 	mp->mnt_flag &=~ MNT_ASYNC;
700 
701 	/*
702 	 * If this filesystem isn't aliasing other filesystems,
703 	 * try to invalidate any remaining namecache entries and
704 	 * check the count afterwords.
705 	 */
706 	if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
707 		cache_lock(&mp->mnt_ncmountpt);
708 		cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN);
709 		cache_unlock(&mp->mnt_ncmountpt);
710 
711 		if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
712 		    (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
713 			allproc_scan(&unmount_allproc_cb, mp);
714 		}
715 
716 		if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
717 		    (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
718 
719 			if ((flags & MNT_FORCE) == 0) {
720 				error = EBUSY;
721 				mount_warning(mp, "Cannot unmount: "
722 						  "%d namecache "
723 						  "references still "
724 						  "present",
725 						  ncp->nc_refs - 1);
726 			} else {
727 				mount_warning(mp, "Forced unmount: "
728 						  "%d namecache "
729 						  "references still "
730 						  "present",
731 						  ncp->nc_refs - 1);
732 				freeok = 0;
733 			}
734 		}
735 	}
736 
737 	/*
738 	 * Decomission our special mnt_syncer vnode.  This also stops
739 	 * the vnlru code.  If we are unable to unmount we recommission
740 	 * the vnode.
741 	 *
742 	 * Then sync the filesystem.
743 	 */
744 	if ((vp = mp->mnt_syncer) != NULL) {
745 		mp->mnt_syncer = NULL;
746 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
747 		vrele(vp);
748 	}
749 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
750 		VFS_SYNC(mp, MNT_WAIT);
751 
752 	/*
753 	 * nchandle records ref the mount structure.  Expect a count of 1
754 	 * (our mount->mnt_ncmountpt).
755 	 *
756 	 * Scans can get temporary refs on a mountpoint (thought really
757 	 * heavy duty stuff like cache_findmount() do not).
758 	 */
759 	for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) {
760 		cache_unmounting(mp);
761 		tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1);
762 	}
763 	if (mp->mnt_refs != 1) {
764 		if ((flags & MNT_FORCE) == 0) {
765 			mount_warning(mp, "Cannot unmount: "
766 					  "%d mount refs still present",
767 					  mp->mnt_refs);
768 			error = EBUSY;
769 		} else {
770 			mount_warning(mp, "Forced unmount: "
771 					  "%d mount refs still present",
772 					  mp->mnt_refs);
773 			freeok = 0;
774 		}
775 	}
776 
777 	/*
778 	 * So far so good, sync the filesystem once more and
779 	 * call the VFS unmount code if the sync succeeds.
780 	 */
781 	if (error == 0) {
782 		if (((mp->mnt_flag & MNT_RDONLY) ||
783 		     (error = VFS_SYNC(mp, MNT_WAIT)) == 0) ||
784 		    (flags & MNT_FORCE)) {
785 			error = VFS_UNMOUNT(mp, flags);
786 		}
787 	}
788 
789 	/*
790 	 * If an error occurred we can still recover, restoring the
791 	 * syncer vnode and misc flags.
792 	 */
793 	if (error) {
794 		if (mp->mnt_syncer == NULL)
795 			vfs_allocate_syncvnode(mp);
796 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
797 		mp->mnt_flag |= async_flag;
798 		lockmgr(&mp->mnt_lock, LK_RELEASE);
799 		if (mp->mnt_kern_flag & MNTK_MWAIT) {
800 			mp->mnt_kern_flag &= ~MNTK_MWAIT;
801 			wakeup(mp);
802 		}
803 		goto out;
804 	}
805 	/*
806 	 * Clean up any journals still associated with the mount after
807 	 * filesystem activity has ceased.
808 	 */
809 	journal_remove_all_journals(mp,
810 	    ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
811 
812 	mountlist_remove(mp);
813 
814 	/*
815 	 * Remove any installed vnode ops here so the individual VFSs don't
816 	 * have to.
817 	 */
818 	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
819 	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
820 	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
821 	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
822 	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
823 
824 	if (mp->mnt_ncmountpt.ncp != NULL) {
825 		nch = mp->mnt_ncmountpt;
826 		cache_zero(&mp->mnt_ncmountpt);
827 		cache_clrmountpt(&nch);
828 		cache_drop(&nch);
829 	}
830 	if (mp->mnt_ncmounton.ncp != NULL) {
831 		cache_unmounting(mp);
832 		nch = mp->mnt_ncmounton;
833 		cache_zero(&mp->mnt_ncmounton);
834 		cache_clrmountpt(&nch);
835 		cache_drop(&nch);
836 	}
837 
838 	mp->mnt_vfc->vfc_refcount--;
839 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
840 		panic("unmount: dangling vnode");
841 	lockmgr(&mp->mnt_lock, LK_RELEASE);
842 	if (mp->mnt_kern_flag & MNTK_MWAIT) {
843 		mp->mnt_kern_flag &= ~MNTK_MWAIT;
844 		wakeup(mp);
845 	}
846 
847 	/*
848 	 * If we reach here and freeok != 0 we must free the mount.
849 	 * If refs > 1 cycle and wait, just in case someone tried
850 	 * to busy the mount after we decided to do the unmount.
851 	 */
852 	if (freeok) {
853 		while (mp->mnt_refs > 1) {
854 			cache_unmounting(mp);
855 			wakeup(mp);
856 			tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1);
857 		}
858 		lwkt_reltoken(&mp->mnt_token);
859 		kfree(mp, M_MOUNT);
860 		mp = NULL;
861 	}
862 	error = 0;
863 out:
864 	if (mp)
865 		lwkt_reltoken(&mp->mnt_token);
866 	return (error);
867 }
868 
869 static
870 void
871 mount_warning(struct mount *mp, const char *ctl, ...)
872 {
873 	char *ptr;
874 	char *buf;
875 	__va_list va;
876 
877 	__va_start(va, ctl);
878 	if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL,
879 			   &ptr, &buf, 0) == 0) {
880 		kprintf("unmount(%s): ", ptr);
881 		kvprintf(ctl, va);
882 		kprintf("\n");
883 		kfree(buf, M_TEMP);
884 	} else {
885 		kprintf("unmount(%p", mp);
886 		if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
887 			kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
888 		kprintf("): ");
889 		kvprintf(ctl, va);
890 		kprintf("\n");
891 	}
892 	__va_end(va);
893 }
894 
895 /*
896  * Shim cache_fullpath() to handle the case where a process is chrooted into
897  * a subdirectory of a mount.  In this case if the root mount matches the
898  * process root directory's mount we have to specify the process's root
899  * directory instead of the mount point, because the mount point might
900  * be above the root directory.
901  */
902 static
903 int
904 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb)
905 {
906 	struct nchandle *nch;
907 
908 	if (p && p->p_fd->fd_nrdir.mount == mp)
909 		nch = &p->p_fd->fd_nrdir;
910 	else
911 		nch = &mp->mnt_ncmountpt;
912 	return(cache_fullpath(p, nch, NULL, rb, fb, 0));
913 }
914 
915 /*
916  * Sync each mounted filesystem.
917  */
918 
919 #ifdef DEBUG
920 static int syncprt = 0;
921 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
922 #endif /* DEBUG */
923 
924 static int sync_callback(struct mount *mp, void *data);
925 
926 int
927 sys_sync(struct sync_args *uap)
928 {
929 	mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
930 #ifdef DEBUG
931 	/*
932 	 * print out buffer pool stat information on each sync() call.
933 	 */
934 	if (syncprt)
935 		vfs_bufstats();
936 #endif /* DEBUG */
937 	return (0);
938 }
939 
940 static
941 int
942 sync_callback(struct mount *mp, void *data __unused)
943 {
944 	int asyncflag;
945 
946 	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
947 		asyncflag = mp->mnt_flag & MNT_ASYNC;
948 		mp->mnt_flag &= ~MNT_ASYNC;
949 		vfs_msync(mp, MNT_NOWAIT);
950 		VFS_SYNC(mp, MNT_NOWAIT);
951 		mp->mnt_flag |= asyncflag;
952 	}
953 	return(0);
954 }
955 
956 /* XXX PRISON: could be per prison flag */
957 static int prison_quotas;
958 #if 0
959 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
960 #endif
961 
962 /*
963  *  quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
964  *
965  * Change filesystem quotas.
966  *
967  * MPALMOSTSAFE
968  */
969 int
970 sys_quotactl(struct quotactl_args *uap)
971 {
972 	struct nlookupdata nd;
973 	struct thread *td;
974 	struct mount *mp;
975 	int error;
976 
977 	get_mplock();
978 	td = curthread;
979 	if (td->td_ucred->cr_prison && !prison_quotas) {
980 		error = EPERM;
981 		goto done;
982 	}
983 
984 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
985 	if (error == 0)
986 		error = nlookup(&nd);
987 	if (error == 0) {
988 		mp = nd.nl_nch.mount;
989 		error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
990 				    uap->arg, nd.nl_cred);
991 	}
992 	nlookup_done(&nd);
993 done:
994 	rel_mplock();
995 	return (error);
996 }
997 
998 /*
999  * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
1000  *		void *buf, int buflen)
1001  *
1002  * This function operates on a mount point and executes the specified
1003  * operation using the specified control data, and possibly returns data.
1004  *
1005  * The actual number of bytes stored in the result buffer is returned, 0
1006  * if none, otherwise an error is returned.
1007  *
1008  * MPALMOSTSAFE
1009  */
1010 int
1011 sys_mountctl(struct mountctl_args *uap)
1012 {
1013 	struct thread *td = curthread;
1014 	struct proc *p = td->td_proc;
1015 	struct file *fp;
1016 	void *ctl = NULL;
1017 	void *buf = NULL;
1018 	char *path = NULL;
1019 	int error;
1020 
1021 	/*
1022 	 * Sanity and permissions checks.  We must be root.
1023 	 */
1024 	KKASSERT(p);
1025 	if (td->td_ucred->cr_prison != NULL)
1026 		return (EPERM);
1027 	if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
1028 	    (error = priv_check(td, PRIV_ROOT)) != 0)
1029 		return (error);
1030 
1031 	/*
1032 	 * Argument length checks
1033 	 */
1034 	if (uap->ctllen < 0 || uap->ctllen > 1024)
1035 		return (EINVAL);
1036 	if (uap->buflen < 0 || uap->buflen > 16 * 1024)
1037 		return (EINVAL);
1038 	if (uap->path == NULL)
1039 		return (EINVAL);
1040 
1041 	/*
1042 	 * Allocate the necessary buffers and copyin data
1043 	 */
1044 	path = objcache_get(namei_oc, M_WAITOK);
1045 	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1046 	if (error)
1047 		goto done;
1048 
1049 	if (uap->ctllen) {
1050 		ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
1051 		error = copyin(uap->ctl, ctl, uap->ctllen);
1052 		if (error)
1053 			goto done;
1054 	}
1055 	if (uap->buflen)
1056 		buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
1057 
1058 	/*
1059 	 * Validate the descriptor
1060 	 */
1061 	if (uap->fd >= 0) {
1062 		fp = holdfp(p->p_fd, uap->fd, -1);
1063 		if (fp == NULL) {
1064 			error = EBADF;
1065 			goto done;
1066 		}
1067 	} else {
1068 		fp = NULL;
1069 	}
1070 
1071 	/*
1072 	 * Execute the internal kernel function and clean up.
1073 	 */
1074 	get_mplock();
1075 	error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
1076 	rel_mplock();
1077 	if (fp)
1078 		fdrop(fp);
1079 	if (error == 0 && uap->sysmsg_result > 0)
1080 		error = copyout(buf, uap->buf, uap->sysmsg_result);
1081 done:
1082 	if (path)
1083 		objcache_put(namei_oc, path);
1084 	if (ctl)
1085 		kfree(ctl, M_TEMP);
1086 	if (buf)
1087 		kfree(buf, M_TEMP);
1088 	return (error);
1089 }
1090 
1091 /*
1092  * Execute a mount control operation by resolving the path to a mount point
1093  * and calling vop_mountctl().
1094  *
1095  * Use the mount point from the nch instead of the vnode so nullfs mounts
1096  * can properly spike the VOP.
1097  */
1098 int
1099 kern_mountctl(const char *path, int op, struct file *fp,
1100 		const void *ctl, int ctllen,
1101 		void *buf, int buflen, int *res)
1102 {
1103 	struct vnode *vp;
1104 	struct mount *mp;
1105 	struct nlookupdata nd;
1106 	int error;
1107 
1108 	*res = 0;
1109 	vp = NULL;
1110 	error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
1111 	if (error == 0)
1112 		error = nlookup(&nd);
1113 	if (error == 0)
1114 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
1115 	mp = nd.nl_nch.mount;
1116 	nlookup_done(&nd);
1117 	if (error)
1118 		return (error);
1119 	vn_unlock(vp);
1120 
1121 	/*
1122 	 * Must be the root of the filesystem
1123 	 */
1124 	if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
1125 		vrele(vp);
1126 		return (EINVAL);
1127 	}
1128 	error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen,
1129 			     buf, buflen, res);
1130 	vrele(vp);
1131 	return (error);
1132 }
1133 
1134 int
1135 kern_statfs(struct nlookupdata *nd, struct statfs *buf)
1136 {
1137 	struct thread *td = curthread;
1138 	struct proc *p = td->td_proc;
1139 	struct mount *mp;
1140 	struct statfs *sp;
1141 	char *fullpath, *freepath;
1142 	int error;
1143 
1144 	if ((error = nlookup(nd)) != 0)
1145 		return (error);
1146 	mp = nd->nl_nch.mount;
1147 	sp = &mp->mnt_stat;
1148 	if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
1149 		return (error);
1150 
1151 	error = mount_path(p, mp, &fullpath, &freepath);
1152 	if (error)
1153 		return(error);
1154 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1155 	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1156 	kfree(freepath, M_TEMP);
1157 
1158 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1159 	bcopy(sp, buf, sizeof(*buf));
1160 	/* Only root should have access to the fsid's. */
1161 	if (priv_check(td, PRIV_ROOT))
1162 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1163 	return (0);
1164 }
1165 
1166 /*
1167  * statfs_args(char *path, struct statfs *buf)
1168  *
1169  * Get filesystem statistics.
1170  */
1171 int
1172 sys_statfs(struct statfs_args *uap)
1173 {
1174 	struct nlookupdata nd;
1175 	struct statfs buf;
1176 	int error;
1177 
1178 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1179 	if (error == 0)
1180 		error = kern_statfs(&nd, &buf);
1181 	nlookup_done(&nd);
1182 	if (error == 0)
1183 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1184 	return (error);
1185 }
1186 
1187 int
1188 kern_fstatfs(int fd, struct statfs *buf)
1189 {
1190 	struct thread *td = curthread;
1191 	struct proc *p = td->td_proc;
1192 	struct file *fp;
1193 	struct mount *mp;
1194 	struct statfs *sp;
1195 	char *fullpath, *freepath;
1196 	int error;
1197 
1198 	KKASSERT(p);
1199 	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
1200 		return (error);
1201 
1202 	/*
1203 	 * Try to use mount info from any overlays rather than the
1204 	 * mount info for the underlying vnode, otherwise we will
1205 	 * fail when operating on null-mounted paths inside a chroot.
1206 	 */
1207 	if ((mp = fp->f_nchandle.mount) == NULL)
1208 		mp = ((struct vnode *)fp->f_data)->v_mount;
1209 	if (mp == NULL) {
1210 		error = EBADF;
1211 		goto done;
1212 	}
1213 	if (fp->f_cred == NULL) {
1214 		error = EINVAL;
1215 		goto done;
1216 	}
1217 	sp = &mp->mnt_stat;
1218 	if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
1219 		goto done;
1220 
1221 	if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
1222 		goto done;
1223 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1224 	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1225 	kfree(freepath, M_TEMP);
1226 
1227 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1228 	bcopy(sp, buf, sizeof(*buf));
1229 
1230 	/* Only root should have access to the fsid's. */
1231 	if (priv_check(td, PRIV_ROOT))
1232 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1233 	error = 0;
1234 done:
1235 	fdrop(fp);
1236 	return (error);
1237 }
1238 
1239 /*
1240  * fstatfs_args(int fd, struct statfs *buf)
1241  *
1242  * Get filesystem statistics.
1243  */
1244 int
1245 sys_fstatfs(struct fstatfs_args *uap)
1246 {
1247 	struct statfs buf;
1248 	int error;
1249 
1250 	error = kern_fstatfs(uap->fd, &buf);
1251 
1252 	if (error == 0)
1253 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1254 	return (error);
1255 }
1256 
1257 int
1258 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf)
1259 {
1260 	struct mount *mp;
1261 	struct statvfs *sp;
1262 	int error;
1263 
1264 	if ((error = nlookup(nd)) != 0)
1265 		return (error);
1266 	mp = nd->nl_nch.mount;
1267 	sp = &mp->mnt_vstat;
1268 	if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
1269 		return (error);
1270 
1271 	sp->f_flag = 0;
1272 	if (mp->mnt_flag & MNT_RDONLY)
1273 		sp->f_flag |= ST_RDONLY;
1274 	if (mp->mnt_flag & MNT_NOSUID)
1275 		sp->f_flag |= ST_NOSUID;
1276 	bcopy(sp, buf, sizeof(*buf));
1277 	return (0);
1278 }
1279 
1280 /*
1281  * statfs_args(char *path, struct statfs *buf)
1282  *
1283  * Get filesystem statistics.
1284  */
1285 int
1286 sys_statvfs(struct statvfs_args *uap)
1287 {
1288 	struct nlookupdata nd;
1289 	struct statvfs buf;
1290 	int error;
1291 
1292 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1293 	if (error == 0)
1294 		error = kern_statvfs(&nd, &buf);
1295 	nlookup_done(&nd);
1296 	if (error == 0)
1297 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1298 	return (error);
1299 }
1300 
1301 int
1302 kern_fstatvfs(int fd, struct statvfs *buf)
1303 {
1304 	struct thread *td = curthread;
1305 	struct proc *p = td->td_proc;
1306 	struct file *fp;
1307 	struct mount *mp;
1308 	struct statvfs *sp;
1309 	int error;
1310 
1311 	KKASSERT(p);
1312 	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
1313 		return (error);
1314 	if ((mp = fp->f_nchandle.mount) == NULL)
1315 		mp = ((struct vnode *)fp->f_data)->v_mount;
1316 	if (mp == NULL) {
1317 		error = EBADF;
1318 		goto done;
1319 	}
1320 	if (fp->f_cred == NULL) {
1321 		error = EINVAL;
1322 		goto done;
1323 	}
1324 	sp = &mp->mnt_vstat;
1325 	if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
1326 		goto done;
1327 
1328 	sp->f_flag = 0;
1329 	if (mp->mnt_flag & MNT_RDONLY)
1330 		sp->f_flag |= ST_RDONLY;
1331 	if (mp->mnt_flag & MNT_NOSUID)
1332 		sp->f_flag |= ST_NOSUID;
1333 
1334 	bcopy(sp, buf, sizeof(*buf));
1335 	error = 0;
1336 done:
1337 	fdrop(fp);
1338 	return (error);
1339 }
1340 
1341 /*
1342  * fstatfs_args(int fd, struct statfs *buf)
1343  *
1344  * Get filesystem statistics.
1345  */
1346 int
1347 sys_fstatvfs(struct fstatvfs_args *uap)
1348 {
1349 	struct statvfs buf;
1350 	int error;
1351 
1352 	error = kern_fstatvfs(uap->fd, &buf);
1353 
1354 	if (error == 0)
1355 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1356 	return (error);
1357 }
1358 
1359 /*
1360  * getfsstat_args(struct statfs *buf, long bufsize, int flags)
1361  *
1362  * Get statistics on all filesystems.
1363  */
1364 
1365 struct getfsstat_info {
1366 	struct statfs *sfsp;
1367 	long count;
1368 	long maxcount;
1369 	int error;
1370 	int flags;
1371 	struct thread *td;
1372 };
1373 
1374 static int getfsstat_callback(struct mount *, void *);
1375 
1376 int
1377 sys_getfsstat(struct getfsstat_args *uap)
1378 {
1379 	struct thread *td = curthread;
1380 	struct getfsstat_info info;
1381 
1382 	bzero(&info, sizeof(info));
1383 
1384 	info.maxcount = uap->bufsize / sizeof(struct statfs);
1385 	info.sfsp = uap->buf;
1386 	info.count = 0;
1387 	info.flags = uap->flags;
1388 	info.td = td;
1389 
1390 	mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
1391 	if (info.sfsp && info.count > info.maxcount)
1392 		uap->sysmsg_result = info.maxcount;
1393 	else
1394 		uap->sysmsg_result = info.count;
1395 	return (info.error);
1396 }
1397 
1398 static int
1399 getfsstat_callback(struct mount *mp, void *data)
1400 {
1401 	struct getfsstat_info *info = data;
1402 	struct statfs *sp;
1403 	char *freepath;
1404 	char *fullpath;
1405 	int error;
1406 
1407 	if (info->sfsp && info->count < info->maxcount) {
1408 		if (info->td->td_proc &&
1409 		    !chroot_visible_mnt(mp, info->td->td_proc)) {
1410 			return(0);
1411 		}
1412 		sp = &mp->mnt_stat;
1413 
1414 		/*
1415 		 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1416 		 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1417 		 * overrides MNT_WAIT.
1418 		 */
1419 		if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1420 		    (info->flags & MNT_WAIT)) &&
1421 		    (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1422 			return(0);
1423 		}
1424 		sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1425 
1426 		error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1427 		if (error) {
1428 			info->error = error;
1429 			return(-1);
1430 		}
1431 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1432 		strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1433 		kfree(freepath, M_TEMP);
1434 
1435 		error = copyout(sp, info->sfsp, sizeof(*sp));
1436 		if (error) {
1437 			info->error = error;
1438 			return (-1);
1439 		}
1440 		++info->sfsp;
1441 	}
1442 	info->count++;
1443 	return(0);
1444 }
1445 
1446 /*
1447  * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf,
1448 		   long bufsize, int flags)
1449  *
1450  * Get statistics on all filesystems.
1451  */
1452 
1453 struct getvfsstat_info {
1454 	struct statfs *sfsp;
1455 	struct statvfs *vsfsp;
1456 	long count;
1457 	long maxcount;
1458 	int error;
1459 	int flags;
1460 	struct thread *td;
1461 };
1462 
1463 static int getvfsstat_callback(struct mount *, void *);
1464 
1465 int
1466 sys_getvfsstat(struct getvfsstat_args *uap)
1467 {
1468 	struct thread *td = curthread;
1469 	struct getvfsstat_info info;
1470 
1471 	bzero(&info, sizeof(info));
1472 
1473 	info.maxcount = uap->vbufsize / sizeof(struct statvfs);
1474 	info.sfsp = uap->buf;
1475 	info.vsfsp = uap->vbuf;
1476 	info.count = 0;
1477 	info.flags = uap->flags;
1478 	info.td = td;
1479 
1480 	mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
1481 	if (info.vsfsp && info.count > info.maxcount)
1482 		uap->sysmsg_result = info.maxcount;
1483 	else
1484 		uap->sysmsg_result = info.count;
1485 	return (info.error);
1486 }
1487 
1488 static int
1489 getvfsstat_callback(struct mount *mp, void *data)
1490 {
1491 	struct getvfsstat_info *info = data;
1492 	struct statfs *sp;
1493 	struct statvfs *vsp;
1494 	char *freepath;
1495 	char *fullpath;
1496 	int error;
1497 
1498 	if (info->vsfsp && info->count < info->maxcount) {
1499 		if (info->td->td_proc &&
1500 		    !chroot_visible_mnt(mp, info->td->td_proc)) {
1501 			return(0);
1502 		}
1503 		sp = &mp->mnt_stat;
1504 		vsp = &mp->mnt_vstat;
1505 
1506 		/*
1507 		 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1508 		 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1509 		 * overrides MNT_WAIT.
1510 		 */
1511 		if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1512 		    (info->flags & MNT_WAIT)) &&
1513 		    (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1514 			return(0);
1515 		}
1516 		sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1517 
1518 		if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1519 		    (info->flags & MNT_WAIT)) &&
1520 		    (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) {
1521 			return(0);
1522 		}
1523 		vsp->f_flag = 0;
1524 		if (mp->mnt_flag & MNT_RDONLY)
1525 			vsp->f_flag |= ST_RDONLY;
1526 		if (mp->mnt_flag & MNT_NOSUID)
1527 			vsp->f_flag |= ST_NOSUID;
1528 
1529 		error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1530 		if (error) {
1531 			info->error = error;
1532 			return(-1);
1533 		}
1534 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1535 		strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1536 		kfree(freepath, M_TEMP);
1537 
1538 		error = copyout(sp, info->sfsp, sizeof(*sp));
1539 		if (error == 0)
1540 			error = copyout(vsp, info->vsfsp, sizeof(*vsp));
1541 		if (error) {
1542 			info->error = error;
1543 			return (-1);
1544 		}
1545 		++info->sfsp;
1546 		++info->vsfsp;
1547 	}
1548 	info->count++;
1549 	return(0);
1550 }
1551 
1552 
1553 /*
1554  * fchdir_args(int fd)
1555  *
1556  * Change current working directory to a given file descriptor.
1557  */
1558 int
1559 sys_fchdir(struct fchdir_args *uap)
1560 {
1561 	struct thread *td = curthread;
1562 	struct proc *p = td->td_proc;
1563 	struct filedesc *fdp = p->p_fd;
1564 	struct vnode *vp, *ovp;
1565 	struct mount *mp;
1566 	struct file *fp;
1567 	struct nchandle nch, onch, tnch;
1568 	int error;
1569 
1570 	if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
1571 		return (error);
1572 	lwkt_gettoken(&p->p_token);
1573 	vp = (struct vnode *)fp->f_data;
1574 	vref(vp);
1575 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1576 	if (fp->f_nchandle.ncp == NULL)
1577 		error = ENOTDIR;
1578 	else
1579 		error = checkvp_chdir(vp, td);
1580 	if (error) {
1581 		vput(vp);
1582 		goto done;
1583 	}
1584 	cache_copy(&fp->f_nchandle, &nch);
1585 
1586 	/*
1587 	 * If the ncp has become a mount point, traverse through
1588 	 * the mount point.
1589 	 */
1590 
1591 	while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
1592 	       (mp = cache_findmount(&nch)) != NULL
1593 	) {
1594 		error = nlookup_mp(mp, &tnch);
1595 		if (error == 0) {
1596 			cache_unlock(&tnch);	/* leave ref intact */
1597 			vput(vp);
1598 			vp = tnch.ncp->nc_vp;
1599 			error = vget(vp, LK_SHARED);
1600 			KKASSERT(error == 0);
1601 			cache_drop(&nch);
1602 			nch = tnch;
1603 		}
1604 		cache_dropmount(mp);
1605 	}
1606 	if (error == 0) {
1607 		ovp = fdp->fd_cdir;
1608 		onch = fdp->fd_ncdir;
1609 		vn_unlock(vp);		/* leave ref intact */
1610 		fdp->fd_cdir = vp;
1611 		fdp->fd_ncdir = nch;
1612 		cache_drop(&onch);
1613 		vrele(ovp);
1614 	} else {
1615 		cache_drop(&nch);
1616 		vput(vp);
1617 	}
1618 	fdrop(fp);
1619 done:
1620 	lwkt_reltoken(&p->p_token);
1621 	return (error);
1622 }
1623 
1624 int
1625 kern_chdir(struct nlookupdata *nd)
1626 {
1627 	struct thread *td = curthread;
1628 	struct proc *p = td->td_proc;
1629 	struct filedesc *fdp = p->p_fd;
1630 	struct vnode *vp, *ovp;
1631 	struct nchandle onch;
1632 	int error;
1633 
1634 	if ((error = nlookup(nd)) != 0)
1635 		return (error);
1636 	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
1637 		return (ENOENT);
1638 	if ((error = vget(vp, LK_SHARED)) != 0)
1639 		return (error);
1640 
1641 	lwkt_gettoken(&p->p_token);
1642 	error = checkvp_chdir(vp, td);
1643 	vn_unlock(vp);
1644 	if (error == 0) {
1645 		ovp = fdp->fd_cdir;
1646 		onch = fdp->fd_ncdir;
1647 		cache_unlock(&nd->nl_nch);	/* leave reference intact */
1648 		fdp->fd_ncdir = nd->nl_nch;
1649 		fdp->fd_cdir = vp;
1650 		cache_drop(&onch);
1651 		vrele(ovp);
1652 		cache_zero(&nd->nl_nch);
1653 	} else {
1654 		vrele(vp);
1655 	}
1656 	lwkt_reltoken(&p->p_token);
1657 	return (error);
1658 }
1659 
1660 /*
1661  * chdir_args(char *path)
1662  *
1663  * Change current working directory (``.'').
1664  */
1665 int
1666 sys_chdir(struct chdir_args *uap)
1667 {
1668 	struct nlookupdata nd;
1669 	int error;
1670 
1671 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1672 	if (error == 0)
1673 		error = kern_chdir(&nd);
1674 	nlookup_done(&nd);
1675 	return (error);
1676 }
1677 
1678 /*
1679  * Helper function for raised chroot(2) security function:  Refuse if
1680  * any filedescriptors are open directories.
1681  */
1682 static int
1683 chroot_refuse_vdir_fds(struct filedesc *fdp)
1684 {
1685 	struct vnode *vp;
1686 	struct file *fp;
1687 	int error;
1688 	int fd;
1689 
1690 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1691 		if ((error = holdvnode(fdp, fd, &fp)) != 0)
1692 			continue;
1693 		vp = (struct vnode *)fp->f_data;
1694 		if (vp->v_type != VDIR) {
1695 			fdrop(fp);
1696 			continue;
1697 		}
1698 		fdrop(fp);
1699 		return(EPERM);
1700 	}
1701 	return (0);
1702 }
1703 
1704 /*
1705  * This sysctl determines if we will allow a process to chroot(2) if it
1706  * has a directory open:
1707  *	0: disallowed for all processes.
1708  *	1: allowed for processes that were not already chroot(2)'ed.
1709  *	2: allowed for all processes.
1710  */
1711 
1712 static int chroot_allow_open_directories = 1;
1713 
1714 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1715      &chroot_allow_open_directories, 0, "");
1716 
1717 /*
1718  * chroot to the specified namecache entry.  We obtain the vp from the
1719  * namecache data.  The passed ncp must be locked and referenced and will
1720  * remain locked and referenced on return.
1721  */
1722 int
1723 kern_chroot(struct nchandle *nch)
1724 {
1725 	struct thread *td = curthread;
1726 	struct proc *p = td->td_proc;
1727 	struct filedesc *fdp = p->p_fd;
1728 	struct vnode *vp;
1729 	int error;
1730 
1731 	/*
1732 	 * Only privileged user can chroot
1733 	 */
1734 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
1735 	if (error)
1736 		return (error);
1737 
1738 	/*
1739 	 * Disallow open directory descriptors (fchdir() breakouts).
1740 	 */
1741 	if (chroot_allow_open_directories == 0 ||
1742 	   (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1743 		if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
1744 			return (error);
1745 	}
1746 	if ((vp = nch->ncp->nc_vp) == NULL)
1747 		return (ENOENT);
1748 
1749 	if ((error = vget(vp, LK_SHARED)) != 0)
1750 		return (error);
1751 
1752 	/*
1753 	 * Check the validity of vp as a directory to change to and
1754 	 * associate it with rdir/jdir.
1755 	 */
1756 	error = checkvp_chdir(vp, td);
1757 	vn_unlock(vp);			/* leave reference intact */
1758 	if (error == 0) {
1759 		vrele(fdp->fd_rdir);
1760 		fdp->fd_rdir = vp;	/* reference inherited by fd_rdir */
1761 		cache_drop(&fdp->fd_nrdir);
1762 		cache_copy(nch, &fdp->fd_nrdir);
1763 		if (fdp->fd_jdir == NULL) {
1764 			fdp->fd_jdir = vp;
1765 			vref(fdp->fd_jdir);
1766 			cache_copy(nch, &fdp->fd_njdir);
1767 		}
1768 	} else {
1769 		vrele(vp);
1770 	}
1771 	return (error);
1772 }
1773 
1774 /*
1775  * chroot_args(char *path)
1776  *
1777  * Change notion of root (``/'') directory.
1778  */
1779 int
1780 sys_chroot(struct chroot_args *uap)
1781 {
1782 	struct thread *td __debugvar = curthread;
1783 	struct nlookupdata nd;
1784 	int error;
1785 
1786 	KKASSERT(td->td_proc);
1787 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1788 	if (error == 0) {
1789 		nd.nl_flags |= NLC_EXEC;
1790 		error = nlookup(&nd);
1791 		if (error == 0)
1792 			error = kern_chroot(&nd.nl_nch);
1793 	}
1794 	nlookup_done(&nd);
1795 	return(error);
1796 }
1797 
1798 int
1799 sys_chroot_kernel(struct chroot_kernel_args *uap)
1800 {
1801 	struct thread *td = curthread;
1802 	struct nlookupdata nd;
1803 	struct nchandle *nch;
1804 	struct vnode *vp;
1805 	int error;
1806 
1807 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1808 	if (error)
1809 		goto error_nond;
1810 
1811 	error = nlookup(&nd);
1812 	if (error)
1813 		goto error_out;
1814 
1815 	nch = &nd.nl_nch;
1816 
1817 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
1818 	if (error)
1819 		goto error_out;
1820 
1821 	if ((vp = nch->ncp->nc_vp) == NULL) {
1822 		error = ENOENT;
1823 		goto error_out;
1824 	}
1825 
1826 	if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0)
1827 		goto error_out;
1828 
1829 	kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path);
1830 	get_mplock();
1831 	vfs_cache_setroot(vp, cache_hold(nch));
1832 	rel_mplock();
1833 
1834 error_out:
1835 	nlookup_done(&nd);
1836 error_nond:
1837 	return(error);
1838 }
1839 
1840 /*
1841  * Common routine for chroot and chdir.  Given a locked, referenced vnode,
1842  * determine whether it is legal to chdir to the vnode.  The vnode's state
1843  * is not changed by this call.
1844  */
1845 int
1846 checkvp_chdir(struct vnode *vp, struct thread *td)
1847 {
1848 	int error;
1849 
1850 	if (vp->v_type != VDIR)
1851 		error = ENOTDIR;
1852 	else
1853 		error = VOP_EACCESS(vp, VEXEC, td->td_ucred);
1854 	return (error);
1855 }
1856 
1857 int
1858 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
1859 {
1860 	struct thread *td = curthread;
1861 	struct proc *p = td->td_proc;
1862 	struct lwp *lp = td->td_lwp;
1863 	struct filedesc *fdp = p->p_fd;
1864 	int cmode, flags;
1865 	struct file *nfp;
1866 	struct file *fp;
1867 	struct vnode *vp;
1868 	int type, indx, error = 0;
1869 	struct flock lf;
1870 
1871 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1872 		return (EINVAL);
1873 	flags = FFLAGS(oflags);
1874 	error = falloc(lp, &nfp, NULL);
1875 	if (error)
1876 		return (error);
1877 	fp = nfp;
1878 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1879 
1880 	/*
1881 	 * XXX p_dupfd is a real mess.  It allows a device to return a
1882 	 * file descriptor to be duplicated rather then doing the open
1883 	 * itself.
1884 	 */
1885 	lp->lwp_dupfd = -1;
1886 
1887 	/*
1888 	 * Call vn_open() to do the lookup and assign the vnode to the
1889 	 * file pointer.  vn_open() does not change the ref count on fp
1890 	 * and the vnode, on success, will be inherited by the file pointer
1891 	 * and unlocked.
1892 	 */
1893 	nd->nl_flags |= NLC_LOCKVP;
1894 	error = vn_open(nd, fp, flags, cmode);
1895 	nlookup_done(nd);
1896 	if (error) {
1897 		/*
1898 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1899 		 * responsible for dropping the old contents of ofiles[indx]
1900 		 * if it succeeds.
1901 		 *
1902 		 * Note that fsetfd() will add a ref to fp which represents
1903 		 * the fd_files[] assignment.  We must still drop our
1904 		 * reference.
1905 		 */
1906 		if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) {
1907 			if (fdalloc(p, 0, &indx) == 0) {
1908 				error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error);
1909 				if (error == 0) {
1910 					*res = indx;
1911 					fdrop(fp);	/* our ref */
1912 					return (0);
1913 				}
1914 				fsetfd(fdp, NULL, indx);
1915 			}
1916 		}
1917 		fdrop(fp);	/* our ref */
1918 		if (error == ERESTART)
1919 			error = EINTR;
1920 		return (error);
1921 	}
1922 
1923 	/*
1924 	 * ref the vnode for ourselves so it can't be ripped out from under
1925 	 * is.  XXX need an ND flag to request that the vnode be returned
1926 	 * anyway.
1927 	 *
1928 	 * Reserve a file descriptor but do not assign it until the open
1929 	 * succeeds.
1930 	 */
1931 	vp = (struct vnode *)fp->f_data;
1932 	vref(vp);
1933 	if ((error = fdalloc(p, 0, &indx)) != 0) {
1934 		fdrop(fp);
1935 		vrele(vp);
1936 		return (error);
1937 	}
1938 
1939 	/*
1940 	 * If no error occurs the vp will have been assigned to the file
1941 	 * pointer.
1942 	 */
1943 	lp->lwp_dupfd = 0;
1944 
1945 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1946 		lf.l_whence = SEEK_SET;
1947 		lf.l_start = 0;
1948 		lf.l_len = 0;
1949 		if (flags & O_EXLOCK)
1950 			lf.l_type = F_WRLCK;
1951 		else
1952 			lf.l_type = F_RDLCK;
1953 		if (flags & FNONBLOCK)
1954 			type = 0;
1955 		else
1956 			type = F_WAIT;
1957 
1958 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
1959 			/*
1960 			 * lock request failed.  Clean up the reserved
1961 			 * descriptor.
1962 			 */
1963 			vrele(vp);
1964 			fsetfd(fdp, NULL, indx);
1965 			fdrop(fp);
1966 			return (error);
1967 		}
1968 		fp->f_flag |= FHASLOCK;
1969 	}
1970 #if 0
1971 	/*
1972 	 * Assert that all regular file vnodes were created with a object.
1973 	 */
1974 	KASSERT(vp->v_type != VREG || vp->v_object != NULL,
1975 		("open: regular file has no backing object after vn_open"));
1976 #endif
1977 
1978 	vrele(vp);
1979 
1980 	/*
1981 	 * release our private reference, leaving the one associated with the
1982 	 * descriptor table intact.
1983 	 */
1984 	fsetfd(fdp, fp, indx);
1985 	fdrop(fp);
1986 	*res = indx;
1987 	if (oflags & O_CLOEXEC)
1988 		error = fsetfdflags(fdp, *res, UF_EXCLOSE);
1989 	return (error);
1990 }
1991 
1992 /*
1993  * open_args(char *path, int flags, int mode)
1994  *
1995  * Check permissions, allocate an open file structure,
1996  * and call the device open routine if any.
1997  */
1998 int
1999 sys_open(struct open_args *uap)
2000 {
2001 	struct nlookupdata nd;
2002 	int error;
2003 
2004 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2005 	if (error == 0) {
2006 		error = kern_open(&nd, uap->flags,
2007 				    uap->mode, &uap->sysmsg_result);
2008 	}
2009 	nlookup_done(&nd);
2010 	return (error);
2011 }
2012 
2013 /*
2014  * openat_args(int fd, char *path, int flags, int mode)
2015  */
2016 int
2017 sys_openat(struct openat_args *uap)
2018 {
2019 	struct nlookupdata nd;
2020 	int error;
2021 	struct file *fp;
2022 
2023 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2024 	if (error == 0) {
2025 		error = kern_open(&nd, uap->flags, uap->mode,
2026 					&uap->sysmsg_result);
2027 	}
2028 	nlookup_done_at(&nd, fp);
2029 	return (error);
2030 }
2031 
2032 int
2033 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
2034 {
2035 	struct thread *td = curthread;
2036 	struct proc *p = td->td_proc;
2037 	struct vnode *vp;
2038 	struct vattr vattr;
2039 	int error;
2040 	int whiteout = 0;
2041 
2042 	KKASSERT(p);
2043 
2044 	VATTR_NULL(&vattr);
2045 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2046 	vattr.va_rmajor = rmajor;
2047 	vattr.va_rminor = rminor;
2048 
2049 	switch (mode & S_IFMT) {
2050 	case S_IFMT:	/* used by badsect to flag bad sectors */
2051 		error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0);
2052 		vattr.va_type = VBAD;
2053 		break;
2054 	case S_IFCHR:
2055 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
2056 		vattr.va_type = VCHR;
2057 		break;
2058 	case S_IFBLK:
2059 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
2060 		vattr.va_type = VBLK;
2061 		break;
2062 	case S_IFWHT:
2063 		error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0);
2064 		whiteout = 1;
2065 		break;
2066 	case S_IFDIR:	/* special directories support for HAMMER */
2067 		error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0);
2068 		vattr.va_type = VDIR;
2069 		break;
2070 	default:
2071 		error = EINVAL;
2072 		break;
2073 	}
2074 
2075 	if (error)
2076 		return (error);
2077 
2078 	bwillinode(1);
2079 	nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2080 	if ((error = nlookup(nd)) != 0)
2081 		return (error);
2082 	if (nd->nl_nch.ncp->nc_vp)
2083 		return (EEXIST);
2084 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2085 		return (error);
2086 
2087 	if (whiteout) {
2088 		error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
2089 				      nd->nl_cred, NAMEI_CREATE);
2090 	} else {
2091 		vp = NULL;
2092 		error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
2093 				   &vp, nd->nl_cred, &vattr);
2094 		if (error == 0)
2095 			vput(vp);
2096 	}
2097 	return (error);
2098 }
2099 
2100 /*
2101  * mknod_args(char *path, int mode, int dev)
2102  *
2103  * Create a special file.
2104  */
2105 int
2106 sys_mknod(struct mknod_args *uap)
2107 {
2108 	struct nlookupdata nd;
2109 	int error;
2110 
2111 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2112 	if (error == 0) {
2113 		error = kern_mknod(&nd, uap->mode,
2114 				   umajor(uap->dev), uminor(uap->dev));
2115 	}
2116 	nlookup_done(&nd);
2117 	return (error);
2118 }
2119 
2120 /*
2121  * mknodat_args(int fd, char *path, mode_t mode, dev_t dev)
2122  *
2123  * Create a special file.  The path is relative to the directory associated
2124  * with fd.
2125  */
2126 int
2127 sys_mknodat(struct mknodat_args *uap)
2128 {
2129 	struct nlookupdata nd;
2130 	struct file *fp;
2131 	int error;
2132 
2133 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2134 	if (error == 0) {
2135 		error = kern_mknod(&nd, uap->mode,
2136 				   umajor(uap->dev), uminor(uap->dev));
2137 	}
2138 	nlookup_done_at(&nd, fp);
2139 	return (error);
2140 }
2141 
2142 int
2143 kern_mkfifo(struct nlookupdata *nd, int mode)
2144 {
2145 	struct thread *td = curthread;
2146 	struct proc *p = td->td_proc;
2147 	struct vattr vattr;
2148 	struct vnode *vp;
2149 	int error;
2150 
2151 	bwillinode(1);
2152 
2153 	nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2154 	if ((error = nlookup(nd)) != 0)
2155 		return (error);
2156 	if (nd->nl_nch.ncp->nc_vp)
2157 		return (EEXIST);
2158 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2159 		return (error);
2160 
2161 	VATTR_NULL(&vattr);
2162 	vattr.va_type = VFIFO;
2163 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2164 	vp = NULL;
2165 	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
2166 	if (error == 0)
2167 		vput(vp);
2168 	return (error);
2169 }
2170 
2171 /*
2172  * mkfifo_args(char *path, int mode)
2173  *
2174  * Create a named pipe.
2175  */
2176 int
2177 sys_mkfifo(struct mkfifo_args *uap)
2178 {
2179 	struct nlookupdata nd;
2180 	int error;
2181 
2182 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2183 	if (error == 0)
2184 		error = kern_mkfifo(&nd, uap->mode);
2185 	nlookup_done(&nd);
2186 	return (error);
2187 }
2188 
2189 /*
2190  * mkfifoat_args(int fd, char *path, mode_t mode)
2191  *
2192  * Create a named pipe.  The path is relative to the directory associated
2193  * with fd.
2194  */
2195 int
2196 sys_mkfifoat(struct mkfifoat_args *uap)
2197 {
2198 	struct nlookupdata nd;
2199 	struct file *fp;
2200 	int error;
2201 
2202 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2203 	if (error == 0)
2204 		error = kern_mkfifo(&nd, uap->mode);
2205 	nlookup_done_at(&nd, fp);
2206 	return (error);
2207 }
2208 
2209 static int hardlink_check_uid = 0;
2210 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
2211     &hardlink_check_uid, 0,
2212     "Unprivileged processes cannot create hard links to files owned by other "
2213     "users");
2214 static int hardlink_check_gid = 0;
2215 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
2216     &hardlink_check_gid, 0,
2217     "Unprivileged processes cannot create hard links to files owned by other "
2218     "groups");
2219 
2220 static int
2221 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
2222 {
2223 	struct vattr va;
2224 	int error;
2225 
2226 	/*
2227 	 * Shortcut if disabled
2228 	 */
2229 	if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
2230 		return (0);
2231 
2232 	/*
2233 	 * Privileged user can always hardlink
2234 	 */
2235 	if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0)
2236 		return (0);
2237 
2238 	/*
2239 	 * Otherwise only if the originating file is owned by the
2240 	 * same user or group.  Note that any group is allowed if
2241 	 * the file is owned by the caller.
2242 	 */
2243 	error = VOP_GETATTR(vp, &va);
2244 	if (error != 0)
2245 		return (error);
2246 
2247 	if (hardlink_check_uid) {
2248 		if (cred->cr_uid != va.va_uid)
2249 			return (EPERM);
2250 	}
2251 
2252 	if (hardlink_check_gid) {
2253 		if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
2254 			return (EPERM);
2255 	}
2256 
2257 	return (0);
2258 }
2259 
2260 int
2261 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
2262 {
2263 	struct thread *td = curthread;
2264 	struct vnode *vp;
2265 	int error;
2266 
2267 	/*
2268 	 * Lookup the source and obtained a locked vnode.
2269 	 *
2270 	 * You may only hardlink a file which you have write permission
2271 	 * on or which you own.
2272 	 *
2273 	 * XXX relookup on vget failure / race ?
2274 	 */
2275 	bwillinode(1);
2276 	nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK;
2277 	if ((error = nlookup(nd)) != 0)
2278 		return (error);
2279 	vp = nd->nl_nch.ncp->nc_vp;
2280 	KKASSERT(vp != NULL);
2281 	if (vp->v_type == VDIR)
2282 		return (EPERM);		/* POSIX */
2283 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2284 		return (error);
2285 	if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
2286 		return (error);
2287 
2288 	/*
2289 	 * Unlock the source so we can lookup the target without deadlocking
2290 	 * (XXX vp is locked already, possible other deadlock?).  The target
2291 	 * must not exist.
2292 	 */
2293 	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
2294 	nd->nl_flags &= ~NLC_NCPISLOCKED;
2295 	cache_unlock(&nd->nl_nch);
2296 	vn_unlock(vp);
2297 
2298 	linknd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2299 	if ((error = nlookup(linknd)) != 0) {
2300 		vrele(vp);
2301 		return (error);
2302 	}
2303 	if (linknd->nl_nch.ncp->nc_vp) {
2304 		vrele(vp);
2305 		return (EEXIST);
2306 	}
2307 	if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) {
2308 		vrele(vp);
2309 		return (error);
2310 	}
2311 
2312 	/*
2313 	 * Finally run the new API VOP.
2314 	 */
2315 	error = can_hardlink(vp, td, td->td_ucred);
2316 	if (error == 0) {
2317 		error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
2318 				  vp, linknd->nl_cred);
2319 	}
2320 	vput(vp);
2321 	return (error);
2322 }
2323 
2324 /*
2325  * link_args(char *path, char *link)
2326  *
2327  * Make a hard file link.
2328  */
2329 int
2330 sys_link(struct link_args *uap)
2331 {
2332 	struct nlookupdata nd, linknd;
2333 	int error;
2334 
2335 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2336 	if (error == 0) {
2337 		error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
2338 		if (error == 0)
2339 			error = kern_link(&nd, &linknd);
2340 		nlookup_done(&linknd);
2341 	}
2342 	nlookup_done(&nd);
2343 	return (error);
2344 }
2345 
2346 /*
2347  * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags)
2348  *
2349  * Make a hard file link. The path1 argument is relative to the directory
2350  * associated with fd1, and similarly the path2 argument is relative to
2351  * the directory associated with fd2.
2352  */
2353 int
2354 sys_linkat(struct linkat_args *uap)
2355 {
2356 	struct nlookupdata nd, linknd;
2357 	struct file *fp1, *fp2;
2358 	int error;
2359 
2360 	error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE,
2361 	    (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0);
2362 	if (error == 0) {
2363 		error = nlookup_init_at(&linknd, &fp2, uap->fd2,
2364 		    uap->path2, UIO_USERSPACE, 0);
2365 		if (error == 0)
2366 			error = kern_link(&nd, &linknd);
2367 		nlookup_done_at(&linknd, fp2);
2368 	}
2369 	nlookup_done_at(&nd, fp1);
2370 	return (error);
2371 }
2372 
2373 int
2374 kern_symlink(struct nlookupdata *nd, char *path, int mode)
2375 {
2376 	struct vattr vattr;
2377 	struct vnode *vp;
2378 	struct vnode *dvp;
2379 	int error;
2380 
2381 	bwillinode(1);
2382 	nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2383 	if ((error = nlookup(nd)) != 0)
2384 		return (error);
2385 	if (nd->nl_nch.ncp->nc_vp)
2386 		return (EEXIST);
2387 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2388 		return (error);
2389 	dvp = nd->nl_dvp;
2390 	VATTR_NULL(&vattr);
2391 	vattr.va_mode = mode;
2392 	error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
2393 	if (error == 0)
2394 		vput(vp);
2395 	return (error);
2396 }
2397 
2398 /*
2399  * symlink(char *path, char *link)
2400  *
2401  * Make a symbolic link.
2402  */
2403 int
2404 sys_symlink(struct symlink_args *uap)
2405 {
2406 	struct thread *td = curthread;
2407 	struct nlookupdata nd;
2408 	char *path;
2409 	int error;
2410 	int mode;
2411 
2412 	path = objcache_get(namei_oc, M_WAITOK);
2413 	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
2414 	if (error == 0) {
2415 		error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
2416 		if (error == 0) {
2417 			mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2418 			error = kern_symlink(&nd, path, mode);
2419 		}
2420 		nlookup_done(&nd);
2421 	}
2422 	objcache_put(namei_oc, path);
2423 	return (error);
2424 }
2425 
2426 /*
2427  * symlinkat_args(char *path1, int fd, char *path2)
2428  *
2429  * Make a symbolic link.  The path2 argument is relative to the directory
2430  * associated with fd.
2431  */
2432 int
2433 sys_symlinkat(struct symlinkat_args *uap)
2434 {
2435 	struct thread *td = curthread;
2436 	struct nlookupdata nd;
2437 	struct file *fp;
2438 	char *path1;
2439 	int error;
2440 	int mode;
2441 
2442 	path1 = objcache_get(namei_oc, M_WAITOK);
2443 	error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL);
2444 	if (error == 0) {
2445 		error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2,
2446 		    UIO_USERSPACE, 0);
2447 		if (error == 0) {
2448 			mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2449 			error = kern_symlink(&nd, path1, mode);
2450 		}
2451 		nlookup_done_at(&nd, fp);
2452 	}
2453 	objcache_put(namei_oc, path1);
2454 	return (error);
2455 }
2456 
2457 /*
2458  * undelete_args(char *path)
2459  *
2460  * Delete a whiteout from the filesystem.
2461  */
2462 int
2463 sys_undelete(struct undelete_args *uap)
2464 {
2465 	struct nlookupdata nd;
2466 	int error;
2467 
2468 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2469 	bwillinode(1);
2470 	nd.nl_flags |= NLC_DELETE | NLC_REFDVP;
2471 	if (error == 0)
2472 		error = nlookup(&nd);
2473 	if (error == 0)
2474 		error = ncp_writechk(&nd.nl_nch);
2475 	if (error == 0) {
2476 		error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
2477 				      NAMEI_DELETE);
2478 	}
2479 	nlookup_done(&nd);
2480 	return (error);
2481 }
2482 
2483 int
2484 kern_unlink(struct nlookupdata *nd)
2485 {
2486 	int error;
2487 
2488 	bwillinode(1);
2489 	nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
2490 	if ((error = nlookup(nd)) != 0)
2491 		return (error);
2492 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2493 		return (error);
2494 	error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
2495 	return (error);
2496 }
2497 
2498 /*
2499  * unlink_args(char *path)
2500  *
2501  * Delete a name from the filesystem.
2502  */
2503 int
2504 sys_unlink(struct unlink_args *uap)
2505 {
2506 	struct nlookupdata nd;
2507 	int error;
2508 
2509 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2510 	if (error == 0)
2511 		error = kern_unlink(&nd);
2512 	nlookup_done(&nd);
2513 	return (error);
2514 }
2515 
2516 
2517 /*
2518  * unlinkat_args(int fd, char *path, int flags)
2519  *
2520  * Delete the file or directory entry pointed to by fd/path.
2521  */
2522 int
2523 sys_unlinkat(struct unlinkat_args *uap)
2524 {
2525 	struct nlookupdata nd;
2526 	struct file *fp;
2527 	int error;
2528 
2529 	if (uap->flags & ~AT_REMOVEDIR)
2530 		return (EINVAL);
2531 
2532 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2533 	if (error == 0) {
2534 		if (uap->flags & AT_REMOVEDIR)
2535 			error = kern_rmdir(&nd);
2536 		else
2537 			error = kern_unlink(&nd);
2538 	}
2539 	nlookup_done_at(&nd, fp);
2540 	return (error);
2541 }
2542 
2543 int
2544 kern_lseek(int fd, off_t offset, int whence, off_t *res)
2545 {
2546 	struct thread *td = curthread;
2547 	struct proc *p = td->td_proc;
2548 	struct file *fp;
2549 	struct vnode *vp;
2550 	struct vattr vattr;
2551 	off_t new_offset;
2552 	int error;
2553 
2554 	fp = holdfp(p->p_fd, fd, -1);
2555 	if (fp == NULL)
2556 		return (EBADF);
2557 	if (fp->f_type != DTYPE_VNODE) {
2558 		error = ESPIPE;
2559 		goto done;
2560 	}
2561 	vp = (struct vnode *)fp->f_data;
2562 
2563 	switch (whence) {
2564 	case L_INCR:
2565 		spin_lock(&fp->f_spin);
2566 		new_offset = fp->f_offset + offset;
2567 		error = 0;
2568 		break;
2569 	case L_XTND:
2570 		error = VOP_GETATTR(vp, &vattr);
2571 		spin_lock(&fp->f_spin);
2572 		new_offset = offset + vattr.va_size;
2573 		break;
2574 	case L_SET:
2575 		new_offset = offset;
2576 		error = 0;
2577 		spin_lock(&fp->f_spin);
2578 		break;
2579 	default:
2580 		new_offset = 0;
2581 		error = EINVAL;
2582 		spin_lock(&fp->f_spin);
2583 		break;
2584 	}
2585 
2586 	/*
2587 	 * Validate the seek position.  Negative offsets are not allowed
2588 	 * for regular files or directories.
2589 	 *
2590 	 * Normally we would also not want to allow negative offsets for
2591 	 * character and block-special devices.  However kvm addresses
2592 	 * on 64 bit architectures might appear to be negative and must
2593 	 * be allowed.
2594 	 */
2595 	if (error == 0) {
2596 		if (new_offset < 0 &&
2597 		    (vp->v_type == VREG || vp->v_type == VDIR)) {
2598 			error = EINVAL;
2599 		} else {
2600 			fp->f_offset = new_offset;
2601 		}
2602 	}
2603 	*res = fp->f_offset;
2604 	spin_unlock(&fp->f_spin);
2605 done:
2606 	fdrop(fp);
2607 	return (error);
2608 }
2609 
2610 /*
2611  * lseek_args(int fd, int pad, off_t offset, int whence)
2612  *
2613  * Reposition read/write file offset.
2614  */
2615 int
2616 sys_lseek(struct lseek_args *uap)
2617 {
2618 	int error;
2619 
2620 	error = kern_lseek(uap->fd, uap->offset, uap->whence,
2621 			   &uap->sysmsg_offset);
2622 
2623 	return (error);
2624 }
2625 
2626 /*
2627  * Check if current process can access given file.  amode is a bitmask of *_OK
2628  * access bits.  flags is a bitmask of AT_* flags.
2629  */
2630 int
2631 kern_access(struct nlookupdata *nd, int amode, int flags)
2632 {
2633 	struct vnode *vp;
2634 	int error, mode;
2635 
2636 	if (flags & ~AT_EACCESS)
2637 		return (EINVAL);
2638 	if ((error = nlookup(nd)) != 0)
2639 		return (error);
2640 retry:
2641 	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
2642 	if (error)
2643 		return (error);
2644 
2645 	/* Flags == 0 means only check for existence. */
2646 	if (amode) {
2647 		mode = 0;
2648 		if (amode & R_OK)
2649 			mode |= VREAD;
2650 		if (amode & W_OK)
2651 			mode |= VWRITE;
2652 		if (amode & X_OK)
2653 			mode |= VEXEC;
2654 		if ((mode & VWRITE) == 0 ||
2655 		    (error = vn_writechk(vp, &nd->nl_nch)) == 0)
2656 			error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
2657 
2658 		/*
2659 		 * If the file handle is stale we have to re-resolve the
2660 		 * entry.  This is a hack at the moment.
2661 		 */
2662 		if (error == ESTALE) {
2663 			vput(vp);
2664 			cache_setunresolved(&nd->nl_nch);
2665 			error = cache_resolve(&nd->nl_nch, nd->nl_cred);
2666 			if (error == 0) {
2667 				vp = NULL;
2668 				goto retry;
2669 			}
2670 			return(error);
2671 		}
2672 	}
2673 	vput(vp);
2674 	return (error);
2675 }
2676 
2677 /*
2678  * access_args(char *path, int flags)
2679  *
2680  * Check access permissions.
2681  */
2682 int
2683 sys_access(struct access_args *uap)
2684 {
2685 	struct nlookupdata nd;
2686 	int error;
2687 
2688 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2689 	if (error == 0)
2690 		error = kern_access(&nd, uap->flags, 0);
2691 	nlookup_done(&nd);
2692 	return (error);
2693 }
2694 
2695 
2696 /*
2697  * eaccess_args(char *path, int flags)
2698  *
2699  * Check access permissions.
2700  */
2701 int
2702 sys_eaccess(struct eaccess_args *uap)
2703 {
2704 	struct nlookupdata nd;
2705 	int error;
2706 
2707 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2708 	if (error == 0)
2709 		error = kern_access(&nd, uap->flags, AT_EACCESS);
2710 	nlookup_done(&nd);
2711 	return (error);
2712 }
2713 
2714 
2715 /*
2716  * faccessat_args(int fd, char *path, int amode, int flags)
2717  *
2718  * Check access permissions.
2719  */
2720 int
2721 sys_faccessat(struct faccessat_args *uap)
2722 {
2723 	struct nlookupdata nd;
2724 	struct file *fp;
2725 	int error;
2726 
2727 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE,
2728 				NLC_FOLLOW);
2729 	if (error == 0)
2730 		error = kern_access(&nd, uap->amode, uap->flags);
2731 	nlookup_done_at(&nd, fp);
2732 	return (error);
2733 }
2734 
2735 
2736 int
2737 kern_stat(struct nlookupdata *nd, struct stat *st)
2738 {
2739 	int error;
2740 	struct vnode *vp;
2741 
2742 	if ((error = nlookup(nd)) != 0)
2743 		return (error);
2744 again:
2745 	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
2746 		return (ENOENT);
2747 
2748 	if ((error = vget(vp, LK_SHARED)) != 0)
2749 		return (error);
2750 	error = vn_stat(vp, st, nd->nl_cred);
2751 
2752 	/*
2753 	 * If the file handle is stale we have to re-resolve the entry.  This
2754 	 * is a hack at the moment.
2755 	 */
2756 	if (error == ESTALE) {
2757 		vput(vp);
2758 		cache_setunresolved(&nd->nl_nch);
2759 		error = cache_resolve(&nd->nl_nch, nd->nl_cred);
2760 		if (error == 0)
2761 			goto again;
2762 	} else {
2763 		vput(vp);
2764 	}
2765 	return (error);
2766 }
2767 
2768 /*
2769  * stat_args(char *path, struct stat *ub)
2770  *
2771  * Get file status; this version follows links.
2772  */
2773 int
2774 sys_stat(struct stat_args *uap)
2775 {
2776 	struct nlookupdata nd;
2777 	struct stat st;
2778 	int error;
2779 
2780 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2781 	if (error == 0) {
2782 		error = kern_stat(&nd, &st);
2783 		if (error == 0)
2784 			error = copyout(&st, uap->ub, sizeof(*uap->ub));
2785 	}
2786 	nlookup_done(&nd);
2787 	return (error);
2788 }
2789 
2790 /*
2791  * lstat_args(char *path, struct stat *ub)
2792  *
2793  * Get file status; this version does not follow links.
2794  */
2795 int
2796 sys_lstat(struct lstat_args *uap)
2797 {
2798 	struct nlookupdata nd;
2799 	struct stat st;
2800 	int error;
2801 
2802 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2803 	if (error == 0) {
2804 		error = kern_stat(&nd, &st);
2805 		if (error == 0)
2806 			error = copyout(&st, uap->ub, sizeof(*uap->ub));
2807 	}
2808 	nlookup_done(&nd);
2809 	return (error);
2810 }
2811 
2812 /*
2813  * fstatat_args(int fd, char *path, struct stat *sb, int flags)
2814  *
2815  * Get status of file pointed to by fd/path.
2816  */
2817 int
2818 sys_fstatat(struct fstatat_args *uap)
2819 {
2820 	struct nlookupdata nd;
2821 	struct stat st;
2822 	int error;
2823 	int flags;
2824 	struct file *fp;
2825 
2826 	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
2827 		return (EINVAL);
2828 
2829 	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
2830 
2831 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
2832 				UIO_USERSPACE, flags);
2833 	if (error == 0) {
2834 		error = kern_stat(&nd, &st);
2835 		if (error == 0)
2836 			error = copyout(&st, uap->sb, sizeof(*uap->sb));
2837 	}
2838 	nlookup_done_at(&nd, fp);
2839 	return (error);
2840 }
2841 
2842 static int
2843 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp)
2844 {
2845 	struct nlookupdata nd;
2846 	struct vnode *vp;
2847 	int error;
2848 
2849 	vp = NULL;
2850 	error = nlookup_init(&nd, path, UIO_USERSPACE, flags);
2851 	if (error == 0)
2852 		error = nlookup(&nd);
2853 	if (error == 0)
2854 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
2855 	nlookup_done(&nd);
2856 	if (error == 0) {
2857 		error = VOP_PATHCONF(vp, name, sysmsg_regp);
2858 		vput(vp);
2859 	}
2860 	return (error);
2861 }
2862 
2863 /*
2864  * pathconf_Args(char *path, int name)
2865  *
2866  * Get configurable pathname variables.
2867  */
2868 int
2869 sys_pathconf(struct pathconf_args *uap)
2870 {
2871 	return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW,
2872 		&uap->sysmsg_reg));
2873 }
2874 
2875 /*
2876  * lpathconf_Args(char *path, int name)
2877  *
2878  * Get configurable pathname variables, but don't follow symlinks.
2879  */
2880 int
2881 sys_lpathconf(struct lpathconf_args *uap)
2882 {
2883 	return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg));
2884 }
2885 
2886 /*
2887  * XXX: daver
2888  * kern_readlink isn't properly split yet.  There is a copyin burried
2889  * in VOP_READLINK().
2890  */
2891 int
2892 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
2893 {
2894 	struct thread *td = curthread;
2895 	struct vnode *vp;
2896 	struct iovec aiov;
2897 	struct uio auio;
2898 	int error;
2899 
2900 	if ((error = nlookup(nd)) != 0)
2901 		return (error);
2902 	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
2903 	if (error)
2904 		return (error);
2905 	if (vp->v_type != VLNK) {
2906 		error = EINVAL;
2907 	} else {
2908 		aiov.iov_base = buf;
2909 		aiov.iov_len = count;
2910 		auio.uio_iov = &aiov;
2911 		auio.uio_iovcnt = 1;
2912 		auio.uio_offset = 0;
2913 		auio.uio_rw = UIO_READ;
2914 		auio.uio_segflg = UIO_USERSPACE;
2915 		auio.uio_td = td;
2916 		auio.uio_resid = count;
2917 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2918 	}
2919 	vput(vp);
2920 	*res = count - auio.uio_resid;
2921 	return (error);
2922 }
2923 
2924 /*
2925  * readlink_args(char *path, char *buf, int count)
2926  *
2927  * Return target name of a symbolic link.
2928  */
2929 int
2930 sys_readlink(struct readlink_args *uap)
2931 {
2932 	struct nlookupdata nd;
2933 	int error;
2934 
2935 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2936 	if (error == 0) {
2937 		error = kern_readlink(&nd, uap->buf, uap->count,
2938 					&uap->sysmsg_result);
2939 	}
2940 	nlookup_done(&nd);
2941 	return (error);
2942 }
2943 
2944 /*
2945  * readlinkat_args(int fd, char *path, char *buf, size_t bufsize)
2946  *
2947  * Return target name of a symbolic link.  The path is relative to the
2948  * directory associated with fd.
2949  */
2950 int
2951 sys_readlinkat(struct readlinkat_args *uap)
2952 {
2953 	struct nlookupdata nd;
2954 	struct file *fp;
2955 	int error;
2956 
2957 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2958 	if (error == 0) {
2959 		error = kern_readlink(&nd, uap->buf, uap->bufsize,
2960 					&uap->sysmsg_result);
2961 	}
2962 	nlookup_done_at(&nd, fp);
2963 	return (error);
2964 }
2965 
2966 static int
2967 setfflags(struct vnode *vp, int flags)
2968 {
2969 	struct thread *td = curthread;
2970 	int error;
2971 	struct vattr vattr;
2972 
2973 	/*
2974 	 * Prevent non-root users from setting flags on devices.  When
2975 	 * a device is reused, users can retain ownership of the device
2976 	 * if they are allowed to set flags and programs assume that
2977 	 * chown can't fail when done as root.
2978 	 */
2979 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2980 	    ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0))
2981 		return (error);
2982 
2983 	/*
2984 	 * note: vget is required for any operation that might mod the vnode
2985 	 * so VINACTIVE is properly cleared.
2986 	 */
2987 	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
2988 		VATTR_NULL(&vattr);
2989 		vattr.va_flags = flags;
2990 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2991 		vput(vp);
2992 	}
2993 	return (error);
2994 }
2995 
2996 /*
2997  * chflags(char *path, int flags)
2998  *
2999  * Change flags of a file given a path name.
3000  */
3001 int
3002 sys_chflags(struct chflags_args *uap)
3003 {
3004 	struct nlookupdata nd;
3005 	struct vnode *vp;
3006 	int error;
3007 
3008 	vp = NULL;
3009 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3010 	if (error == 0)
3011 		error = nlookup(&nd);
3012 	if (error == 0)
3013 		error = ncp_writechk(&nd.nl_nch);
3014 	if (error == 0)
3015 		error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3016 	nlookup_done(&nd);
3017 	if (error == 0) {
3018 		error = setfflags(vp, uap->flags);
3019 		vrele(vp);
3020 	}
3021 	return (error);
3022 }
3023 
3024 /*
3025  * lchflags(char *path, int flags)
3026  *
3027  * Change flags of a file given a path name, but don't follow symlinks.
3028  */
3029 int
3030 sys_lchflags(struct lchflags_args *uap)
3031 {
3032 	struct nlookupdata nd;
3033 	struct vnode *vp;
3034 	int error;
3035 
3036 	vp = NULL;
3037 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3038 	if (error == 0)
3039 		error = nlookup(&nd);
3040 	if (error == 0)
3041 		error = ncp_writechk(&nd.nl_nch);
3042 	if (error == 0)
3043 		error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3044 	nlookup_done(&nd);
3045 	if (error == 0) {
3046 		error = setfflags(vp, uap->flags);
3047 		vrele(vp);
3048 	}
3049 	return (error);
3050 }
3051 
3052 /*
3053  * fchflags_args(int fd, int flags)
3054  *
3055  * Change flags of a file given a file descriptor.
3056  */
3057 int
3058 sys_fchflags(struct fchflags_args *uap)
3059 {
3060 	struct thread *td = curthread;
3061 	struct proc *p = td->td_proc;
3062 	struct file *fp;
3063 	int error;
3064 
3065 	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
3066 		return (error);
3067 	if (fp->f_nchandle.ncp)
3068 		error = ncp_writechk(&fp->f_nchandle);
3069 	if (error == 0)
3070 		error = setfflags((struct vnode *) fp->f_data, uap->flags);
3071 	fdrop(fp);
3072 	return (error);
3073 }
3074 
3075 static int
3076 setfmode(struct vnode *vp, int mode)
3077 {
3078 	struct thread *td = curthread;
3079 	int error;
3080 	struct vattr vattr;
3081 
3082 	/*
3083 	 * note: vget is required for any operation that might mod the vnode
3084 	 * so VINACTIVE is properly cleared.
3085 	 */
3086 	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3087 		VATTR_NULL(&vattr);
3088 		vattr.va_mode = mode & ALLPERMS;
3089 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3090 		vput(vp);
3091 	}
3092 	return error;
3093 }
3094 
3095 int
3096 kern_chmod(struct nlookupdata *nd, int mode)
3097 {
3098 	struct vnode *vp;
3099 	int error;
3100 
3101 	if ((error = nlookup(nd)) != 0)
3102 		return (error);
3103 	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3104 		return (error);
3105 	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3106 		error = setfmode(vp, mode);
3107 	vrele(vp);
3108 	return (error);
3109 }
3110 
3111 /*
3112  * chmod_args(char *path, int mode)
3113  *
3114  * Change mode of a file given path name.
3115  */
3116 int
3117 sys_chmod(struct chmod_args *uap)
3118 {
3119 	struct nlookupdata nd;
3120 	int error;
3121 
3122 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3123 	if (error == 0)
3124 		error = kern_chmod(&nd, uap->mode);
3125 	nlookup_done(&nd);
3126 	return (error);
3127 }
3128 
3129 /*
3130  * lchmod_args(char *path, int mode)
3131  *
3132  * Change mode of a file given path name (don't follow links.)
3133  */
3134 int
3135 sys_lchmod(struct lchmod_args *uap)
3136 {
3137 	struct nlookupdata nd;
3138 	int error;
3139 
3140 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3141 	if (error == 0)
3142 		error = kern_chmod(&nd, uap->mode);
3143 	nlookup_done(&nd);
3144 	return (error);
3145 }
3146 
3147 /*
3148  * fchmod_args(int fd, int mode)
3149  *
3150  * Change mode of a file given a file descriptor.
3151  */
3152 int
3153 sys_fchmod(struct fchmod_args *uap)
3154 {
3155 	struct thread *td = curthread;
3156 	struct proc *p = td->td_proc;
3157 	struct file *fp;
3158 	int error;
3159 
3160 	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
3161 		return (error);
3162 	if (fp->f_nchandle.ncp)
3163 		error = ncp_writechk(&fp->f_nchandle);
3164 	if (error == 0)
3165 		error = setfmode((struct vnode *)fp->f_data, uap->mode);
3166 	fdrop(fp);
3167 	return (error);
3168 }
3169 
3170 /*
3171  * fchmodat_args(char *path, int mode)
3172  *
3173  * Change mode of a file pointed to by fd/path.
3174  */
3175 int
3176 sys_fchmodat(struct fchmodat_args *uap)
3177 {
3178 	struct nlookupdata nd;
3179 	struct file *fp;
3180 	int error;
3181 	int flags;
3182 
3183 	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3184 		return (EINVAL);
3185 	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3186 
3187 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3188 				UIO_USERSPACE, flags);
3189 	if (error == 0)
3190 		error = kern_chmod(&nd, uap->mode);
3191 	nlookup_done_at(&nd, fp);
3192 	return (error);
3193 }
3194 
3195 static int
3196 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid)
3197 {
3198 	struct thread *td = curthread;
3199 	int error;
3200 	struct vattr vattr;
3201 	uid_t o_uid;
3202 	gid_t o_gid;
3203 	uint64_t size;
3204 
3205 	/*
3206 	 * note: vget is required for any operation that might mod the vnode
3207 	 * so VINACTIVE is properly cleared.
3208 	 */
3209 	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3210 		if ((error = VOP_GETATTR(vp, &vattr)) != 0)
3211 			return error;
3212 		o_uid = vattr.va_uid;
3213 		o_gid = vattr.va_gid;
3214 		size = vattr.va_size;
3215 
3216 		VATTR_NULL(&vattr);
3217 		vattr.va_uid = uid;
3218 		vattr.va_gid = gid;
3219 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3220 		vput(vp);
3221 	}
3222 
3223 	if (error == 0) {
3224 		if (uid == -1)
3225 			uid = o_uid;
3226 		if (gid == -1)
3227 			gid = o_gid;
3228 		VFS_ACCOUNT(mp, o_uid, o_gid, -size);
3229 		VFS_ACCOUNT(mp,   uid,   gid,  size);
3230 	}
3231 
3232 	return error;
3233 }
3234 
3235 int
3236 kern_chown(struct nlookupdata *nd, int uid, int gid)
3237 {
3238 	struct vnode *vp;
3239 	int error;
3240 
3241 	if ((error = nlookup(nd)) != 0)
3242 		return (error);
3243 	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3244 		return (error);
3245 	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3246 		error = setfown(nd->nl_nch.mount, vp, uid, gid);
3247 	vrele(vp);
3248 	return (error);
3249 }
3250 
3251 /*
3252  * chown(char *path, int uid, int gid)
3253  *
3254  * Set ownership given a path name.
3255  */
3256 int
3257 sys_chown(struct chown_args *uap)
3258 {
3259 	struct nlookupdata nd;
3260 	int error;
3261 
3262 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3263 	if (error == 0)
3264 		error = kern_chown(&nd, uap->uid, uap->gid);
3265 	nlookup_done(&nd);
3266 	return (error);
3267 }
3268 
3269 /*
3270  * lchown_args(char *path, int uid, int gid)
3271  *
3272  * Set ownership given a path name, do not cross symlinks.
3273  */
3274 int
3275 sys_lchown(struct lchown_args *uap)
3276 {
3277 	struct nlookupdata nd;
3278 	int error;
3279 
3280 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3281 	if (error == 0)
3282 		error = kern_chown(&nd, uap->uid, uap->gid);
3283 	nlookup_done(&nd);
3284 	return (error);
3285 }
3286 
3287 /*
3288  * fchown_args(int fd, int uid, int gid)
3289  *
3290  * Set ownership given a file descriptor.
3291  */
3292 int
3293 sys_fchown(struct fchown_args *uap)
3294 {
3295 	struct thread *td = curthread;
3296 	struct proc *p = td->td_proc;
3297 	struct file *fp;
3298 	int error;
3299 
3300 	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
3301 		return (error);
3302 	if (fp->f_nchandle.ncp)
3303 		error = ncp_writechk(&fp->f_nchandle);
3304 	if (error == 0)
3305 		error = setfown(p->p_fd->fd_ncdir.mount,
3306 			(struct vnode *)fp->f_data, uap->uid, uap->gid);
3307 	fdrop(fp);
3308 	return (error);
3309 }
3310 
3311 /*
3312  * fchownat(int fd, char *path, int uid, int gid, int flags)
3313  *
3314  * Set ownership of file pointed to by fd/path.
3315  */
3316 int
3317 sys_fchownat(struct fchownat_args *uap)
3318 {
3319 	struct nlookupdata nd;
3320 	struct file *fp;
3321 	int error;
3322 	int flags;
3323 
3324 	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3325 		return (EINVAL);
3326 	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3327 
3328 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3329 				UIO_USERSPACE, flags);
3330 	if (error == 0)
3331 		error = kern_chown(&nd, uap->uid, uap->gid);
3332 	nlookup_done_at(&nd, fp);
3333 	return (error);
3334 }
3335 
3336 
3337 static int
3338 getutimes(const struct timeval *tvp, struct timespec *tsp)
3339 {
3340 	struct timeval tv[2];
3341 
3342 	if (tvp == NULL) {
3343 		microtime(&tv[0]);
3344 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3345 		tsp[1] = tsp[0];
3346 	} else {
3347 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3348 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3349 	}
3350 	return 0;
3351 }
3352 
3353 static int
3354 setutimes(struct vnode *vp, struct vattr *vattr,
3355 	  const struct timespec *ts, int nullflag)
3356 {
3357 	struct thread *td = curthread;
3358 	int error;
3359 
3360 	VATTR_NULL(vattr);
3361 	vattr->va_atime = ts[0];
3362 	vattr->va_mtime = ts[1];
3363 	if (nullflag)
3364 		vattr->va_vaflags |= VA_UTIMES_NULL;
3365 	error = VOP_SETATTR(vp, vattr, td->td_ucred);
3366 
3367 	return error;
3368 }
3369 
3370 int
3371 kern_utimes(struct nlookupdata *nd, struct timeval *tptr)
3372 {
3373 	struct timespec ts[2];
3374 	struct vnode *vp;
3375 	struct vattr vattr;
3376 	int error;
3377 
3378 	if ((error = getutimes(tptr, ts)) != 0)
3379 		return (error);
3380 
3381 	/*
3382 	 * NOTE: utimes() succeeds for the owner even if the file
3383 	 * is not user-writable.
3384 	 */
3385 	nd->nl_flags |= NLC_OWN | NLC_WRITE;
3386 
3387 	if ((error = nlookup(nd)) != 0)
3388 		return (error);
3389 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
3390 		return (error);
3391 	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3392 		return (error);
3393 
3394 	/*
3395 	 * note: vget is required for any operation that might mod the vnode
3396 	 * so VINACTIVE is properly cleared.
3397 	 */
3398 	if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
3399 		error = vget(vp, LK_EXCLUSIVE);
3400 		if (error == 0) {
3401 			error = setutimes(vp, &vattr, ts, (tptr == NULL));
3402 			vput(vp);
3403 		}
3404 	}
3405 	vrele(vp);
3406 	return (error);
3407 }
3408 
3409 /*
3410  * utimes_args(char *path, struct timeval *tptr)
3411  *
3412  * Set the access and modification times of a file.
3413  */
3414 int
3415 sys_utimes(struct utimes_args *uap)
3416 {
3417 	struct timeval tv[2];
3418 	struct nlookupdata nd;
3419 	int error;
3420 
3421 	if (uap->tptr) {
3422  		error = copyin(uap->tptr, tv, sizeof(tv));
3423 		if (error)
3424 			return (error);
3425 	}
3426 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3427 	if (error == 0)
3428 		error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3429 	nlookup_done(&nd);
3430 	return (error);
3431 }
3432 
3433 /*
3434  * lutimes_args(char *path, struct timeval *tptr)
3435  *
3436  * Set the access and modification times of a file.
3437  */
3438 int
3439 sys_lutimes(struct lutimes_args *uap)
3440 {
3441 	struct timeval tv[2];
3442 	struct nlookupdata nd;
3443 	int error;
3444 
3445 	if (uap->tptr) {
3446 		error = copyin(uap->tptr, tv, sizeof(tv));
3447 		if (error)
3448 			return (error);
3449 	}
3450 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3451 	if (error == 0)
3452 		error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3453 	nlookup_done(&nd);
3454 	return (error);
3455 }
3456 
3457 /*
3458  * Set utimes on a file descriptor.  The creds used to open the
3459  * file are used to determine whether the operation is allowed
3460  * or not.
3461  */
3462 int
3463 kern_futimes(int fd, struct timeval *tptr)
3464 {
3465 	struct thread *td = curthread;
3466 	struct proc *p = td->td_proc;
3467 	struct timespec ts[2];
3468 	struct file *fp;
3469 	struct vnode *vp;
3470 	struct vattr vattr;
3471 	int error;
3472 
3473 	error = getutimes(tptr, ts);
3474 	if (error)
3475 		return (error);
3476 	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
3477 		return (error);
3478 	if (fp->f_nchandle.ncp)
3479 		error = ncp_writechk(&fp->f_nchandle);
3480 	if (error == 0) {
3481 		vp = fp->f_data;
3482 		error = vget(vp, LK_EXCLUSIVE);
3483 		if (error == 0) {
3484 			error = VOP_GETATTR(vp, &vattr);
3485 			if (error == 0) {
3486 				error = naccess_va(&vattr, NLC_OWN | NLC_WRITE,
3487 						   fp->f_cred);
3488 			}
3489 			if (error == 0) {
3490 				error = setutimes(vp, &vattr, ts,
3491 						  (tptr == NULL));
3492 			}
3493 			vput(vp);
3494 		}
3495 	}
3496 	fdrop(fp);
3497 	return (error);
3498 }
3499 
3500 /*
3501  * futimes_args(int fd, struct timeval *tptr)
3502  *
3503  * Set the access and modification times of a file.
3504  */
3505 int
3506 sys_futimes(struct futimes_args *uap)
3507 {
3508 	struct timeval tv[2];
3509 	int error;
3510 
3511 	if (uap->tptr) {
3512 		error = copyin(uap->tptr, tv, sizeof(tv));
3513 		if (error)
3514 			return (error);
3515 	}
3516 	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
3517 
3518 	return (error);
3519 }
3520 
3521 int
3522 kern_truncate(struct nlookupdata *nd, off_t length)
3523 {
3524 	struct vnode *vp;
3525 	struct vattr vattr;
3526 	int error;
3527 	uid_t uid = 0;
3528 	gid_t gid = 0;
3529 	uint64_t old_size = 0;
3530 
3531 	if (length < 0)
3532 		return(EINVAL);
3533 	nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE;
3534 	if ((error = nlookup(nd)) != 0)
3535 		return (error);
3536 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
3537 		return (error);
3538 	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3539 		return (error);
3540 	if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) {
3541 		vrele(vp);
3542 		return (error);
3543 	}
3544 	if (vp->v_type == VDIR) {
3545 		error = EISDIR;
3546 		goto done;
3547 	}
3548 	if (vfs_quota_enabled) {
3549 		error = VOP_GETATTR(vp, &vattr);
3550 		KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0"));
3551 		uid = vattr.va_uid;
3552 		gid = vattr.va_gid;
3553 		old_size = vattr.va_size;
3554 	}
3555 
3556 	if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
3557 		VATTR_NULL(&vattr);
3558 		vattr.va_size = length;
3559 		error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
3560 		VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size);
3561 	}
3562 done:
3563 	vput(vp);
3564 	return (error);
3565 }
3566 
3567 /*
3568  * truncate(char *path, int pad, off_t length)
3569  *
3570  * Truncate a file given its path name.
3571  */
3572 int
3573 sys_truncate(struct truncate_args *uap)
3574 {
3575 	struct nlookupdata nd;
3576 	int error;
3577 
3578 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3579 	if (error == 0)
3580 		error = kern_truncate(&nd, uap->length);
3581 	nlookup_done(&nd);
3582 	return error;
3583 }
3584 
3585 int
3586 kern_ftruncate(int fd, off_t length)
3587 {
3588 	struct thread *td = curthread;
3589 	struct proc *p = td->td_proc;
3590 	struct vattr vattr;
3591 	struct vnode *vp;
3592 	struct file *fp;
3593 	int error;
3594 	uid_t uid = 0;
3595 	gid_t gid = 0;
3596 	uint64_t old_size = 0;
3597 	struct mount *mp;
3598 
3599 	if (length < 0)
3600 		return(EINVAL);
3601 	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
3602 		return (error);
3603 	if (fp->f_nchandle.ncp) {
3604 		error = ncp_writechk(&fp->f_nchandle);
3605 		if (error)
3606 			goto done;
3607 	}
3608 	if ((fp->f_flag & FWRITE) == 0) {
3609 		error = EINVAL;
3610 		goto done;
3611 	}
3612 	if (fp->f_flag & FAPPENDONLY) {	/* inode was set s/uapnd */
3613 		error = EINVAL;
3614 		goto done;
3615 	}
3616 	vp = (struct vnode *)fp->f_data;
3617 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3618 	if (vp->v_type == VDIR) {
3619 		error = EISDIR;
3620 		goto done;
3621 	}
3622 
3623 	if (vfs_quota_enabled) {
3624 		error = VOP_GETATTR(vp, &vattr);
3625 		KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0"));
3626 		uid = vattr.va_uid;
3627 		gid = vattr.va_gid;
3628 		old_size = vattr.va_size;
3629 	}
3630 
3631 	if ((error = vn_writechk(vp, NULL)) == 0) {
3632 		VATTR_NULL(&vattr);
3633 		vattr.va_size = length;
3634 		error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3635 		mp = vq_vptomp(vp);
3636 		VFS_ACCOUNT(mp, uid, gid, length - old_size);
3637 	}
3638 	vn_unlock(vp);
3639 done:
3640 	fdrop(fp);
3641 	return (error);
3642 }
3643 
3644 /*
3645  * ftruncate_args(int fd, int pad, off_t length)
3646  *
3647  * Truncate a file given a file descriptor.
3648  */
3649 int
3650 sys_ftruncate(struct ftruncate_args *uap)
3651 {
3652 	int error;
3653 
3654 	error = kern_ftruncate(uap->fd, uap->length);
3655 
3656 	return (error);
3657 }
3658 
3659 /*
3660  * fsync(int fd)
3661  *
3662  * Sync an open file.
3663  */
3664 int
3665 sys_fsync(struct fsync_args *uap)
3666 {
3667 	struct thread *td = curthread;
3668 	struct proc *p = td->td_proc;
3669 	struct vnode *vp;
3670 	struct file *fp;
3671 	vm_object_t obj;
3672 	int error;
3673 
3674 	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
3675 		return (error);
3676 	vp = (struct vnode *)fp->f_data;
3677 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3678 	if ((obj = vp->v_object) != NULL) {
3679 		if (vp->v_mount == NULL ||
3680 		    (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) {
3681 			vm_object_page_clean(obj, 0, 0, 0);
3682 		}
3683 	}
3684 	error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL);
3685 	if (error == 0 && vp->v_mount)
3686 		error = buf_fsync(vp);
3687 	vn_unlock(vp);
3688 	fdrop(fp);
3689 
3690 	return (error);
3691 }
3692 
3693 int
3694 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond)
3695 {
3696 	struct nchandle fnchd;
3697 	struct nchandle tnchd;
3698 	struct namecache *ncp;
3699 	struct vnode *fdvp;
3700 	struct vnode *tdvp;
3701 	struct mount *mp;
3702 	int error;
3703 
3704 	bwillinode(1);
3705 	fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC;
3706 	if ((error = nlookup(fromnd)) != 0)
3707 		return (error);
3708 	if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
3709 		return (ENOENT);
3710 	fnchd.mount = fromnd->nl_nch.mount;
3711 	cache_hold(&fnchd);
3712 
3713 	/*
3714 	 * unlock the source nch so we can lookup the target nch without
3715 	 * deadlocking.  The target may or may not exist so we do not check
3716 	 * for a target vp like kern_mkdir() and other creation functions do.
3717 	 *
3718 	 * The source and target directories are ref'd and rechecked after
3719 	 * everything is relocked to determine if the source or target file
3720 	 * has been renamed.
3721 	 */
3722 	KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
3723 	fromnd->nl_flags &= ~NLC_NCPISLOCKED;
3724 	cache_unlock(&fromnd->nl_nch);
3725 
3726 	tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP;
3727 	if ((error = nlookup(tond)) != 0) {
3728 		cache_drop(&fnchd);
3729 		return (error);
3730 	}
3731 	if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
3732 		cache_drop(&fnchd);
3733 		return (ENOENT);
3734 	}
3735 	tnchd.mount = tond->nl_nch.mount;
3736 	cache_hold(&tnchd);
3737 
3738 	/*
3739 	 * If the source and target are the same there is nothing to do
3740 	 */
3741 	if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
3742 		cache_drop(&fnchd);
3743 		cache_drop(&tnchd);
3744 		return (0);
3745 	}
3746 
3747 	/*
3748 	 * Mount points cannot be renamed or overwritten
3749 	 */
3750 	if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) &
3751 	    NCF_ISMOUNTPT
3752 	) {
3753 		cache_drop(&fnchd);
3754 		cache_drop(&tnchd);
3755 		return (EINVAL);
3756 	}
3757 
3758 	/*
3759 	 * Relock the source ncp.  cache_relock() will deal with any
3760 	 * deadlocks against the already-locked tond and will also
3761 	 * make sure both are resolved.
3762 	 *
3763 	 * NOTE AFTER RELOCKING: The source or target ncp may have become
3764 	 * invalid while they were unlocked, nc_vp and nc_mount could
3765 	 * be NULL.
3766 	 */
3767 	cache_relock(&fromnd->nl_nch, fromnd->nl_cred,
3768 		     &tond->nl_nch, tond->nl_cred);
3769 	fromnd->nl_flags |= NLC_NCPISLOCKED;
3770 
3771 	/*
3772 	 * If either fromnd or tond are marked destroyed a ripout occured
3773 	 * out from under us and we must retry.
3774 	 */
3775 	if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) ||
3776 	    fromnd->nl_nch.ncp->nc_vp == NULL ||
3777 	    (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) {
3778 		kprintf("kern_rename: retry due to ripout on: "
3779 			"\"%s\" -> \"%s\"\n",
3780 			fromnd->nl_nch.ncp->nc_name,
3781 			tond->nl_nch.ncp->nc_name);
3782 		cache_drop(&fnchd);
3783 		cache_drop(&tnchd);
3784 		return (EAGAIN);
3785 	}
3786 
3787 	/*
3788 	 * make sure the parent directories linkages are the same
3789 	 */
3790 	if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent ||
3791 	    tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
3792 		cache_drop(&fnchd);
3793 		cache_drop(&tnchd);
3794 		return (ENOENT);
3795 	}
3796 
3797 	/*
3798 	 * Both the source and target must be within the same filesystem and
3799 	 * in the same filesystem as their parent directories within the
3800 	 * namecache topology.
3801 	 *
3802 	 * NOTE: fromnd's nc_mount or nc_vp could be NULL.
3803 	 */
3804 	mp = fnchd.mount;
3805 	if (mp != tnchd.mount || mp != fromnd->nl_nch.mount ||
3806 	    mp != tond->nl_nch.mount) {
3807 		cache_drop(&fnchd);
3808 		cache_drop(&tnchd);
3809 		return (EXDEV);
3810 	}
3811 
3812 	/*
3813 	 * Make sure the mount point is writable
3814 	 */
3815 	if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
3816 		cache_drop(&fnchd);
3817 		cache_drop(&tnchd);
3818 		return (error);
3819 	}
3820 
3821 	/*
3822 	 * If the target exists and either the source or target is a directory,
3823 	 * then both must be directories.
3824 	 *
3825 	 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
3826 	 * have become NULL.
3827 	 */
3828 	if (tond->nl_nch.ncp->nc_vp) {
3829 		if (fromnd->nl_nch.ncp->nc_vp == NULL) {
3830 			error = ENOENT;
3831 		} else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
3832 			if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
3833 				error = ENOTDIR;
3834 		} else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
3835 			error = EISDIR;
3836 		}
3837 	}
3838 
3839 	/*
3840 	 * You cannot rename a source into itself or a subdirectory of itself.
3841 	 * We check this by travsersing the target directory upwards looking
3842 	 * for a match against the source.
3843 	 *
3844 	 * XXX MPSAFE
3845 	 */
3846 	if (error == 0) {
3847 		for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
3848 			if (fromnd->nl_nch.ncp == ncp) {
3849 				error = EINVAL;
3850 				break;
3851 			}
3852 		}
3853 	}
3854 
3855 	cache_drop(&fnchd);
3856 	cache_drop(&tnchd);
3857 
3858 	/*
3859 	 * Even though the namespaces are different, they may still represent
3860 	 * hardlinks to the same file.  The filesystem might have a hard time
3861 	 * with this so we issue a NREMOVE of the source instead of a NRENAME
3862 	 * when we detect the situation.
3863 	 */
3864 	if (error == 0) {
3865 		fdvp = fromnd->nl_dvp;
3866 		tdvp = tond->nl_dvp;
3867 		if (fdvp == NULL || tdvp == NULL) {
3868 			error = EPERM;
3869 		} else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
3870 			error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
3871 					    fromnd->nl_cred);
3872 		} else {
3873 			error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
3874 					    fdvp, tdvp, tond->nl_cred);
3875 		}
3876 	}
3877 	return (error);
3878 }
3879 
3880 /*
3881  * rename_args(char *from, char *to)
3882  *
3883  * Rename files.  Source and destination must either both be directories,
3884  * or both not be directories.  If target is a directory, it must be empty.
3885  */
3886 int
3887 sys_rename(struct rename_args *uap)
3888 {
3889 	struct nlookupdata fromnd, tond;
3890 	int error;
3891 
3892 	do {
3893 		error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
3894 		if (error == 0) {
3895 			error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
3896 			if (error == 0)
3897 				error = kern_rename(&fromnd, &tond);
3898 			nlookup_done(&tond);
3899 		}
3900 		nlookup_done(&fromnd);
3901 	} while (error == EAGAIN);
3902 	return (error);
3903 }
3904 
3905 /*
3906  * renameat_args(int oldfd, char *old, int newfd, char *new)
3907  *
3908  * Rename files using paths relative to the directories associated with
3909  * oldfd and newfd.  Source and destination must either both be directories,
3910  * or both not be directories.  If target is a directory, it must be empty.
3911  */
3912 int
3913 sys_renameat(struct renameat_args *uap)
3914 {
3915 	struct nlookupdata oldnd, newnd;
3916 	struct file *oldfp, *newfp;
3917 	int error;
3918 
3919 	do {
3920 		error = nlookup_init_at(&oldnd, &oldfp,
3921 					uap->oldfd, uap->old,
3922 					UIO_USERSPACE, 0);
3923 		if (error == 0) {
3924 			error = nlookup_init_at(&newnd, &newfp,
3925 						uap->newfd, uap->new,
3926 						UIO_USERSPACE, 0);
3927 			if (error == 0)
3928 				error = kern_rename(&oldnd, &newnd);
3929 			nlookup_done_at(&newnd, newfp);
3930 		}
3931 		nlookup_done_at(&oldnd, oldfp);
3932 	} while (error == EAGAIN);
3933 	return (error);
3934 }
3935 
3936 int
3937 kern_mkdir(struct nlookupdata *nd, int mode)
3938 {
3939 	struct thread *td = curthread;
3940 	struct proc *p = td->td_proc;
3941 	struct vnode *vp;
3942 	struct vattr vattr;
3943 	int error;
3944 
3945 	bwillinode(1);
3946 	nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP;
3947 	if ((error = nlookup(nd)) != 0)
3948 		return (error);
3949 
3950 	if (nd->nl_nch.ncp->nc_vp)
3951 		return (EEXIST);
3952 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
3953 		return (error);
3954 	VATTR_NULL(&vattr);
3955 	vattr.va_type = VDIR;
3956 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3957 
3958 	vp = NULL;
3959 	error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr);
3960 	if (error == 0)
3961 		vput(vp);
3962 	return (error);
3963 }
3964 
3965 /*
3966  * mkdir_args(char *path, int mode)
3967  *
3968  * Make a directory file.
3969  */
3970 int
3971 sys_mkdir(struct mkdir_args *uap)
3972 {
3973 	struct nlookupdata nd;
3974 	int error;
3975 
3976 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3977 	if (error == 0)
3978 		error = kern_mkdir(&nd, uap->mode);
3979 	nlookup_done(&nd);
3980 	return (error);
3981 }
3982 
3983 /*
3984  * mkdirat_args(int fd, char *path, mode_t mode)
3985  *
3986  * Make a directory file.  The path is relative to the directory associated
3987  * with fd.
3988  */
3989 int
3990 sys_mkdirat(struct mkdirat_args *uap)
3991 {
3992 	struct nlookupdata nd;
3993 	struct file *fp;
3994 	int error;
3995 
3996 	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
3997 	if (error == 0)
3998 		error = kern_mkdir(&nd, uap->mode);
3999 	nlookup_done_at(&nd, fp);
4000 	return (error);
4001 }
4002 
4003 int
4004 kern_rmdir(struct nlookupdata *nd)
4005 {
4006 	int error;
4007 
4008 	bwillinode(1);
4009 	nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
4010 	if ((error = nlookup(nd)) != 0)
4011 		return (error);
4012 
4013 	/*
4014 	 * Do not allow directories representing mount points to be
4015 	 * deleted, even if empty.  Check write perms on mount point
4016 	 * in case the vnode is aliased (aka nullfs).
4017 	 */
4018 	if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
4019 		return (EBUSY);
4020 	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4021 		return (error);
4022 	error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
4023 	return (error);
4024 }
4025 
4026 /*
4027  * rmdir_args(char *path)
4028  *
4029  * Remove a directory file.
4030  */
4031 int
4032 sys_rmdir(struct rmdir_args *uap)
4033 {
4034 	struct nlookupdata nd;
4035 	int error;
4036 
4037 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
4038 	if (error == 0)
4039 		error = kern_rmdir(&nd);
4040 	nlookup_done(&nd);
4041 	return (error);
4042 }
4043 
4044 int
4045 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res,
4046 		   enum uio_seg direction)
4047 {
4048 	struct thread *td = curthread;
4049 	struct proc *p = td->td_proc;
4050 	struct vnode *vp;
4051 	struct file *fp;
4052 	struct uio auio;
4053 	struct iovec aiov;
4054 	off_t loff;
4055 	int error, eofflag;
4056 
4057 	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
4058 		return (error);
4059 	if ((fp->f_flag & FREAD) == 0) {
4060 		error = EBADF;
4061 		goto done;
4062 	}
4063 	vp = (struct vnode *)fp->f_data;
4064 unionread:
4065 	if (vp->v_type != VDIR) {
4066 		error = EINVAL;
4067 		goto done;
4068 	}
4069 	aiov.iov_base = buf;
4070 	aiov.iov_len = count;
4071 	auio.uio_iov = &aiov;
4072 	auio.uio_iovcnt = 1;
4073 	auio.uio_rw = UIO_READ;
4074 	auio.uio_segflg = direction;
4075 	auio.uio_td = td;
4076 	auio.uio_resid = count;
4077 	loff = auio.uio_offset = fp->f_offset;
4078 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
4079 	fp->f_offset = auio.uio_offset;
4080 	if (error)
4081 		goto done;
4082 	if (count == auio.uio_resid) {
4083 		if (union_dircheckp) {
4084 			error = union_dircheckp(td, &vp, fp);
4085 			if (error == -1)
4086 				goto unionread;
4087 			if (error)
4088 				goto done;
4089 		}
4090 #if 0
4091 		if ((vp->v_flag & VROOT) &&
4092 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
4093 			struct vnode *tvp = vp;
4094 			vp = vp->v_mount->mnt_vnodecovered;
4095 			vref(vp);
4096 			fp->f_data = vp;
4097 			fp->f_offset = 0;
4098 			vrele(tvp);
4099 			goto unionread;
4100 		}
4101 #endif
4102 	}
4103 
4104 	/*
4105 	 * WARNING!  *basep may not be wide enough to accomodate the
4106 	 * seek offset.   XXX should we hack this to return the upper 32 bits
4107 	 * for offsets greater then 4G?
4108 	 */
4109 	if (basep) {
4110 		*basep = (long)loff;
4111 	}
4112 	*res = count - auio.uio_resid;
4113 done:
4114 	fdrop(fp);
4115 	return (error);
4116 }
4117 
4118 /*
4119  * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
4120  *
4121  * Read a block of directory entries in a file system independent format.
4122  */
4123 int
4124 sys_getdirentries(struct getdirentries_args *uap)
4125 {
4126 	long base;
4127 	int error;
4128 
4129 	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
4130 				   &uap->sysmsg_result, UIO_USERSPACE);
4131 
4132 	if (error == 0 && uap->basep)
4133 		error = copyout(&base, uap->basep, sizeof(*uap->basep));
4134 	return (error);
4135 }
4136 
4137 /*
4138  * getdents_args(int fd, char *buf, size_t count)
4139  */
4140 int
4141 sys_getdents(struct getdents_args *uap)
4142 {
4143 	int error;
4144 
4145 	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
4146 				   &uap->sysmsg_result, UIO_USERSPACE);
4147 
4148 	return (error);
4149 }
4150 
4151 /*
4152  * Set the mode mask for creation of filesystem nodes.
4153  *
4154  * umask(int newmask)
4155  */
4156 int
4157 sys_umask(struct umask_args *uap)
4158 {
4159 	struct thread *td = curthread;
4160 	struct proc *p = td->td_proc;
4161 	struct filedesc *fdp;
4162 
4163 	fdp = p->p_fd;
4164 	uap->sysmsg_result = fdp->fd_cmask;
4165 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4166 	return (0);
4167 }
4168 
4169 /*
4170  * revoke(char *path)
4171  *
4172  * Void all references to file by ripping underlying filesystem
4173  * away from vnode.
4174  */
4175 int
4176 sys_revoke(struct revoke_args *uap)
4177 {
4178 	struct nlookupdata nd;
4179 	struct vattr vattr;
4180 	struct vnode *vp;
4181 	struct ucred *cred;
4182 	int error;
4183 
4184 	vp = NULL;
4185 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4186 	if (error == 0)
4187 		error = nlookup(&nd);
4188 	if (error == 0)
4189 		error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
4190 	cred = crhold(nd.nl_cred);
4191 	nlookup_done(&nd);
4192 	if (error == 0) {
4193 		if (error == 0)
4194 			error = VOP_GETATTR(vp, &vattr);
4195 		if (error == 0 && cred->cr_uid != vattr.va_uid)
4196 			error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0);
4197 		if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) {
4198 			if (vcount(vp) > 0)
4199 				error = vrevoke(vp, cred);
4200 		} else if (error == 0) {
4201 			error = vrevoke(vp, cred);
4202 		}
4203 		vrele(vp);
4204 	}
4205 	if (cred)
4206 		crfree(cred);
4207 	return (error);
4208 }
4209 
4210 /*
4211  * getfh_args(char *fname, fhandle_t *fhp)
4212  *
4213  * Get (NFS) file handle
4214  *
4215  * NOTE: We use the fsid of the covering mount, even if it is a nullfs
4216  * mount.  This allows nullfs mounts to be explicitly exported.
4217  *
4218  * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
4219  *
4220  * 	    nullfs mounts of subdirectories are not safe.  That is, it will
4221  *	    work, but you do not really have protection against access to
4222  *	    the related parent directories.
4223  */
4224 int
4225 sys_getfh(struct getfh_args *uap)
4226 {
4227 	struct thread *td = curthread;
4228 	struct nlookupdata nd;
4229 	fhandle_t fh;
4230 	struct vnode *vp;
4231 	struct mount *mp;
4232 	int error;
4233 
4234 	/*
4235 	 * Must be super user
4236 	 */
4237 	if ((error = priv_check(td, PRIV_ROOT)) != 0)
4238 		return (error);
4239 
4240 	vp = NULL;
4241 	error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
4242 	if (error == 0)
4243 		error = nlookup(&nd);
4244 	if (error == 0)
4245 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4246 	mp = nd.nl_nch.mount;
4247 	nlookup_done(&nd);
4248 	if (error == 0) {
4249 		bzero(&fh, sizeof(fh));
4250 		fh.fh_fsid = mp->mnt_stat.f_fsid;
4251 		error = VFS_VPTOFH(vp, &fh.fh_fid);
4252 		vput(vp);
4253 		if (error == 0)
4254 			error = copyout(&fh, uap->fhp, sizeof(fh));
4255 	}
4256 	return (error);
4257 }
4258 
4259 /*
4260  * fhopen_args(const struct fhandle *u_fhp, int flags)
4261  *
4262  * syscall for the rpc.lockd to use to translate a NFS file handle into
4263  * an open descriptor.
4264  *
4265  * warning: do not remove the priv_check() call or this becomes one giant
4266  * security hole.
4267  */
4268 int
4269 sys_fhopen(struct fhopen_args *uap)
4270 {
4271 	struct thread *td = curthread;
4272 	struct filedesc *fdp = td->td_proc->p_fd;
4273 	struct mount *mp;
4274 	struct vnode *vp;
4275 	struct fhandle fhp;
4276 	struct vattr vat;
4277 	struct vattr *vap = &vat;
4278 	struct flock lf;
4279 	int fmode, mode, error = 0, type;
4280 	struct file *nfp;
4281 	struct file *fp;
4282 	int indx;
4283 
4284 	/*
4285 	 * Must be super user
4286 	 */
4287 	error = priv_check(td, PRIV_ROOT);
4288 	if (error)
4289 		return (error);
4290 
4291 	fmode = FFLAGS(uap->flags);
4292 
4293 	/*
4294 	 * Why not allow a non-read/write open for our lockd?
4295 	 */
4296 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4297 		return (EINVAL);
4298 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4299 	if (error)
4300 		return(error);
4301 
4302 	/*
4303 	 * Find the mount point
4304 	 */
4305 	mp = vfs_getvfs(&fhp.fh_fsid);
4306 	if (mp == NULL) {
4307 		error = ESTALE;
4308 		goto  done;
4309 	}
4310 	/* now give me my vnode, it gets returned to me locked */
4311 	error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
4312 	if (error)
4313 		goto done;
4314  	/*
4315 	 * from now on we have to make sure not
4316 	 * to forget about the vnode
4317 	 * any error that causes an abort must vput(vp)
4318 	 * just set error = err and 'goto bad;'.
4319 	 */
4320 
4321 	/*
4322 	 * from vn_open
4323 	 */
4324 	if (vp->v_type == VLNK) {
4325 		error = EMLINK;
4326 		goto bad;
4327 	}
4328 	if (vp->v_type == VSOCK) {
4329 		error = EOPNOTSUPP;
4330 		goto bad;
4331 	}
4332 	mode = 0;
4333 	if (fmode & (FWRITE | O_TRUNC)) {
4334 		if (vp->v_type == VDIR) {
4335 			error = EISDIR;
4336 			goto bad;
4337 		}
4338 		error = vn_writechk(vp, NULL);
4339 		if (error)
4340 			goto bad;
4341 		mode |= VWRITE;
4342 	}
4343 	if (fmode & FREAD)
4344 		mode |= VREAD;
4345 	if (mode) {
4346 		error = VOP_ACCESS(vp, mode, td->td_ucred);
4347 		if (error)
4348 			goto bad;
4349 	}
4350 	if (fmode & O_TRUNC) {
4351 		vn_unlock(vp);				/* XXX */
4352 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4353 		VATTR_NULL(vap);
4354 		vap->va_size = 0;
4355 		error = VOP_SETATTR(vp, vap, td->td_ucred);
4356 		if (error)
4357 			goto bad;
4358 	}
4359 
4360 	/*
4361 	 * VOP_OPEN needs the file pointer so it can potentially override
4362 	 * it.
4363 	 *
4364 	 * WARNING! no f_nchandle will be associated when fhopen()ing a
4365 	 * directory.  XXX
4366 	 */
4367 	if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0)
4368 		goto bad;
4369 	fp = nfp;
4370 
4371 	error = VOP_OPEN(vp, fmode, td->td_ucred, fp);
4372 	if (error) {
4373 		/*
4374 		 * setting f_ops this way prevents VOP_CLOSE from being
4375 		 * called or fdrop() releasing the vp from v_data.   Since
4376 		 * the VOP_OPEN failed we don't want to VOP_CLOSE.
4377 		 */
4378 		fp->f_ops = &badfileops;
4379 		fp->f_data = NULL;
4380 		goto bad_drop;
4381 	}
4382 
4383 	/*
4384 	 * The fp is given its own reference, we still have our ref and lock.
4385 	 *
4386 	 * Assert that all regular files must be created with a VM object.
4387 	 */
4388 	if (vp->v_type == VREG && vp->v_object == NULL) {
4389 		kprintf("fhopen: regular file did not have VM object: %p\n", vp);
4390 		goto bad_drop;
4391 	}
4392 
4393 	/*
4394 	 * The open was successful.  Handle any locking requirements.
4395 	 */
4396 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4397 		lf.l_whence = SEEK_SET;
4398 		lf.l_start = 0;
4399 		lf.l_len = 0;
4400 		if (fmode & O_EXLOCK)
4401 			lf.l_type = F_WRLCK;
4402 		else
4403 			lf.l_type = F_RDLCK;
4404 		if (fmode & FNONBLOCK)
4405 			type = 0;
4406 		else
4407 			type = F_WAIT;
4408 		vn_unlock(vp);
4409 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4410 			/*
4411 			 * release our private reference.
4412 			 */
4413 			fsetfd(fdp, NULL, indx);
4414 			fdrop(fp);
4415 			vrele(vp);
4416 			goto done;
4417 		}
4418 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4419 		fp->f_flag |= FHASLOCK;
4420 	}
4421 
4422 	/*
4423 	 * Clean up.  Associate the file pointer with the previously
4424 	 * reserved descriptor and return it.
4425 	 */
4426 	vput(vp);
4427 	fsetfd(fdp, fp, indx);
4428 	fdrop(fp);
4429 	uap->sysmsg_result = indx;
4430 	if (uap->flags & O_CLOEXEC)
4431 		error = fsetfdflags(fdp, indx, UF_EXCLOSE);
4432 	return (error);
4433 
4434 bad_drop:
4435 	fsetfd(fdp, NULL, indx);
4436 	fdrop(fp);
4437 bad:
4438 	vput(vp);
4439 done:
4440 	return (error);
4441 }
4442 
4443 /*
4444  * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
4445  */
4446 int
4447 sys_fhstat(struct fhstat_args *uap)
4448 {
4449 	struct thread *td = curthread;
4450 	struct stat sb;
4451 	fhandle_t fh;
4452 	struct mount *mp;
4453 	struct vnode *vp;
4454 	int error;
4455 
4456 	/*
4457 	 * Must be super user
4458 	 */
4459 	error = priv_check(td, PRIV_ROOT);
4460 	if (error)
4461 		return (error);
4462 
4463 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4464 	if (error)
4465 		return (error);
4466 
4467 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4468 		error = ESTALE;
4469 	if (error == 0) {
4470 		if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) {
4471 			error = vn_stat(vp, &sb, td->td_ucred);
4472 			vput(vp);
4473 		}
4474 	}
4475 	if (error == 0)
4476 		error = copyout(&sb, uap->sb, sizeof(sb));
4477 	return (error);
4478 }
4479 
4480 /*
4481  * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
4482  */
4483 int
4484 sys_fhstatfs(struct fhstatfs_args *uap)
4485 {
4486 	struct thread *td = curthread;
4487 	struct proc *p = td->td_proc;
4488 	struct statfs *sp;
4489 	struct mount *mp;
4490 	struct vnode *vp;
4491 	struct statfs sb;
4492 	char *fullpath, *freepath;
4493 	fhandle_t fh;
4494 	int error;
4495 
4496 	/*
4497 	 * Must be super user
4498 	 */
4499 	if ((error = priv_check(td, PRIV_ROOT)))
4500 		return (error);
4501 
4502 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4503 		return (error);
4504 
4505 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4506 		error = ESTALE;
4507 		goto done;
4508 	}
4509 	if (p != NULL && !chroot_visible_mnt(mp, p)) {
4510 		error = ESTALE;
4511 		goto done;
4512 	}
4513 
4514 	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0)
4515 		goto done;
4516 	mp = vp->v_mount;
4517 	sp = &mp->mnt_stat;
4518 	vput(vp);
4519 	if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0)
4520 		goto done;
4521 
4522 	error = mount_path(p, mp, &fullpath, &freepath);
4523 	if (error)
4524 		goto done;
4525 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
4526 	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
4527 	kfree(freepath, M_TEMP);
4528 
4529 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4530 	if (priv_check(td, PRIV_ROOT)) {
4531 		bcopy(sp, &sb, sizeof(sb));
4532 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4533 		sp = &sb;
4534 	}
4535 	error = copyout(sp, uap->buf, sizeof(*sp));
4536 done:
4537 	return (error);
4538 }
4539 
4540 /*
4541  * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf)
4542  */
4543 int
4544 sys_fhstatvfs(struct fhstatvfs_args *uap)
4545 {
4546 	struct thread *td = curthread;
4547 	struct proc *p = td->td_proc;
4548 	struct statvfs *sp;
4549 	struct mount *mp;
4550 	struct vnode *vp;
4551 	fhandle_t fh;
4552 	int error;
4553 
4554 	/*
4555 	 * Must be super user
4556 	 */
4557 	if ((error = priv_check(td, PRIV_ROOT)))
4558 		return (error);
4559 
4560 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4561 		return (error);
4562 
4563 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4564 		error = ESTALE;
4565 		goto done;
4566 	}
4567 	if (p != NULL && !chroot_visible_mnt(mp, p)) {
4568 		error = ESTALE;
4569 		goto done;
4570 	}
4571 
4572 	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
4573 		goto done;
4574 	mp = vp->v_mount;
4575 	sp = &mp->mnt_vstat;
4576 	vput(vp);
4577 	if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0)
4578 		goto done;
4579 
4580 	sp->f_flag = 0;
4581 	if (mp->mnt_flag & MNT_RDONLY)
4582 		sp->f_flag |= ST_RDONLY;
4583 	if (mp->mnt_flag & MNT_NOSUID)
4584 		sp->f_flag |= ST_NOSUID;
4585 	error = copyout(sp, uap->buf, sizeof(*sp));
4586 done:
4587 	return (error);
4588 }
4589 
4590 
4591 /*
4592  * Syscall to push extended attribute configuration information into the
4593  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4594  * a command (int cmd), and attribute name and misc data.  For now, the
4595  * attribute name is left in userspace for consumption by the VFS_op.
4596  * It will probably be changed to be copied into sysspace by the
4597  * syscall in the future, once issues with various consumers of the
4598  * attribute code have raised their hands.
4599  *
4600  * Currently this is used only by UFS Extended Attributes.
4601  */
4602 int
4603 sys_extattrctl(struct extattrctl_args *uap)
4604 {
4605 	struct nlookupdata nd;
4606 	struct vnode *vp;
4607 	char attrname[EXTATTR_MAXNAMELEN];
4608 	int error;
4609 	size_t size;
4610 
4611 	attrname[0] = 0;
4612 	vp = NULL;
4613 	error = 0;
4614 
4615 	if (error == 0 && uap->filename) {
4616 		error = nlookup_init(&nd, uap->filename, UIO_USERSPACE,
4617 				     NLC_FOLLOW);
4618 		if (error == 0)
4619 			error = nlookup(&nd);
4620 		if (error == 0)
4621 			error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
4622 		nlookup_done(&nd);
4623 	}
4624 
4625 	if (error == 0 && uap->attrname) {
4626 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4627 				  &size);
4628 	}
4629 
4630 	if (error == 0) {
4631 		error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4632 		if (error == 0)
4633 			error = nlookup(&nd);
4634 		if (error == 0)
4635 			error = ncp_writechk(&nd.nl_nch);
4636 		if (error == 0) {
4637 			error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp,
4638 					       uap->attrnamespace,
4639 					       uap->attrname, nd.nl_cred);
4640 		}
4641 		nlookup_done(&nd);
4642 	}
4643 
4644 	return (error);
4645 }
4646 
4647 /*
4648  * Syscall to get a named extended attribute on a file or directory.
4649  */
4650 int
4651 sys_extattr_set_file(struct extattr_set_file_args *uap)
4652 {
4653 	char attrname[EXTATTR_MAXNAMELEN];
4654 	struct nlookupdata nd;
4655 	struct vnode *vp;
4656 	struct uio auio;
4657 	struct iovec aiov;
4658 	int error;
4659 
4660 	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
4661 	if (error)
4662 		return (error);
4663 
4664 	vp = NULL;
4665 
4666 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4667 	if (error == 0)
4668 		error = nlookup(&nd);
4669 	if (error == 0)
4670 		error = ncp_writechk(&nd.nl_nch);
4671 	if (error == 0)
4672 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4673 	if (error) {
4674 		nlookup_done(&nd);
4675 		return (error);
4676 	}
4677 
4678 	bzero(&auio, sizeof(auio));
4679 	aiov.iov_base = uap->data;
4680 	aiov.iov_len = uap->nbytes;
4681 	auio.uio_iov = &aiov;
4682 	auio.uio_iovcnt = 1;
4683 	auio.uio_offset = 0;
4684 	auio.uio_resid = uap->nbytes;
4685 	auio.uio_rw = UIO_WRITE;
4686 	auio.uio_td = curthread;
4687 
4688 	error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname,
4689 			       &auio, nd.nl_cred);
4690 
4691 	vput(vp);
4692 	nlookup_done(&nd);
4693 	return (error);
4694 }
4695 
4696 /*
4697  * Syscall to get a named extended attribute on a file or directory.
4698  */
4699 int
4700 sys_extattr_get_file(struct extattr_get_file_args *uap)
4701 {
4702 	char attrname[EXTATTR_MAXNAMELEN];
4703 	struct nlookupdata nd;
4704 	struct uio auio;
4705 	struct iovec aiov;
4706 	struct vnode *vp;
4707 	int error;
4708 
4709 	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
4710 	if (error)
4711 		return (error);
4712 
4713 	vp = NULL;
4714 
4715 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4716 	if (error == 0)
4717 		error = nlookup(&nd);
4718 	if (error == 0)
4719 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4720 	if (error) {
4721 		nlookup_done(&nd);
4722 		return (error);
4723 	}
4724 
4725 	bzero(&auio, sizeof(auio));
4726 	aiov.iov_base = uap->data;
4727 	aiov.iov_len = uap->nbytes;
4728 	auio.uio_iov = &aiov;
4729 	auio.uio_iovcnt = 1;
4730 	auio.uio_offset = 0;
4731 	auio.uio_resid = uap->nbytes;
4732 	auio.uio_rw = UIO_READ;
4733 	auio.uio_td = curthread;
4734 
4735 	error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname,
4736 				&auio, nd.nl_cred);
4737 	uap->sysmsg_result = uap->nbytes - auio.uio_resid;
4738 
4739 	vput(vp);
4740 	nlookup_done(&nd);
4741 	return(error);
4742 }
4743 
4744 /*
4745  * Syscall to delete a named extended attribute from a file or directory.
4746  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
4747  */
4748 int
4749 sys_extattr_delete_file(struct extattr_delete_file_args *uap)
4750 {
4751 	char attrname[EXTATTR_MAXNAMELEN];
4752 	struct nlookupdata nd;
4753 	struct vnode *vp;
4754 	int error;
4755 
4756 	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
4757 	if (error)
4758 		return(error);
4759 
4760 	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4761 	if (error == 0)
4762 		error = nlookup(&nd);
4763 	if (error == 0)
4764 		error = ncp_writechk(&nd.nl_nch);
4765 	if (error == 0) {
4766 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4767 		if (error == 0) {
4768 			error = VOP_SETEXTATTR(vp, uap->attrnamespace,
4769 					       attrname, NULL, nd.nl_cred);
4770 			vput(vp);
4771 		}
4772 	}
4773 	nlookup_done(&nd);
4774 	return(error);
4775 }
4776 
4777 /*
4778  * Determine if the mount is visible to the process.
4779  */
4780 static int
4781 chroot_visible_mnt(struct mount *mp, struct proc *p)
4782 {
4783 	struct nchandle nch;
4784 
4785 	/*
4786 	 * Traverse from the mount point upwards.  If we hit the process
4787 	 * root then the mount point is visible to the process.
4788 	 */
4789 	nch = mp->mnt_ncmountpt;
4790 	while (nch.ncp) {
4791 		if (nch.mount == p->p_fd->fd_nrdir.mount &&
4792 		    nch.ncp == p->p_fd->fd_nrdir.ncp) {
4793 			return(1);
4794 		}
4795 		if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
4796 			nch = nch.mount->mnt_ncmounton;
4797 		} else {
4798 			nch.ncp = nch.ncp->nc_parent;
4799 		}
4800 	}
4801 
4802 	/*
4803 	 * If the mount point is not visible to the process, but the
4804 	 * process root is in a subdirectory of the mount, return
4805 	 * TRUE anyway.
4806 	 */
4807 	if (p->p_fd->fd_nrdir.mount == mp)
4808 		return(1);
4809 
4810 	return(0);
4811 }
4812 
4813