xref: /netbsd-src/sys/kern/vfs_syscalls.c (revision aaf4ece63a859a04e37cf3a7229b5fab0157cc06)
1 /*	$NetBSD: vfs_syscalls.c,v 1.235 2005/12/12 16:26:33 elad Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)vfs_syscalls.c	8.42 (Berkeley) 7/31/95
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.235 2005/12/12 16:26:33 elad Exp $");
41 
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_ktrace.h"
45 #include "opt_verified_exec.h"
46 #include "fss.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/namei.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/stat.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/uio.h>
59 #include <sys/malloc.h>
60 #include <sys/dirent.h>
61 #include <sys/sysctl.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64 #ifdef KTRACE
65 #include <sys/ktrace.h>
66 #endif
67 #ifdef VERIFIED_EXEC
68 #include <sys/verified_exec.h>
69 #endif /* VERIFIED_EXEC */
70 
71 #include <miscfs/genfs/genfs.h>
72 #include <miscfs/syncfs/syncfs.h>
73 
74 #ifdef COMPAT_30
75 #include "opt_nfsserver.h"
76 #include <nfs/rpcv2.h>
77 #include <nfs/nfsproto.h>
78 #include <nfs/nfs.h>
79 #include <nfs/nfs_var.h>
80 #endif
81 
82 #if NFSS > 0
83 #include <dev/fssvar.h>
84 #endif
85 
86 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
87 
88 static int change_dir(struct nameidata *, struct lwp *);
89 static int change_flags(struct vnode *, u_long, struct lwp *);
90 static int change_mode(struct vnode *, int, struct lwp *l);
91 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
92 static int change_utimes(struct vnode *vp, const struct timeval *,
93 	       struct lwp *l);
94 static int rename_files(const char *, const char *, struct lwp *, int);
95 
96 void checkdirs(struct vnode *);
97 
98 int dovfsusermount = 0;
99 
100 /*
101  * Virtual File System System Calls
102  */
103 
104 /*
105  * Mount a file system.
106  */
107 
108 #if defined(COMPAT_09) || defined(COMPAT_43)
109 /*
110  * This table is used to maintain compatibility with 4.3BSD
111  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
112  *
113  * Do not modify this table. It should only contain filesystems
114  * supported by NetBSD 0.9 and 4.3BSD.
115  */
116 const char * const mountcompatnames[] = {
117 	NULL,		/* 0 = MOUNT_NONE */
118 	MOUNT_FFS,	/* 1 = MOUNT_UFS */
119 	MOUNT_NFS,	/* 2 */
120 	MOUNT_MFS,	/* 3 */
121 	MOUNT_MSDOS,	/* 4 */
122 	MOUNT_CD9660,	/* 5 = MOUNT_ISOFS */
123 	MOUNT_FDESC,	/* 6 */
124 	MOUNT_KERNFS,	/* 7 */
125 	NULL,		/* 8 = MOUNT_DEVFS */
126 	MOUNT_AFS,	/* 9 */
127 };
128 const int nmountcompatnames = sizeof(mountcompatnames) /
129     sizeof(mountcompatnames[0]);
130 #endif /* COMPAT_09 || COMPAT_43 */
131 
132 /* ARGSUSED */
133 int
134 sys_mount(struct lwp *l, void *v, register_t *retval)
135 {
136 	struct sys_mount_args /* {
137 		syscallarg(const char *) type;
138 		syscallarg(const char *) path;
139 		syscallarg(int) flags;
140 		syscallarg(void *) data;
141 	} */ *uap = v;
142 	struct proc *p = l->l_proc;
143 	struct vnode *vp;
144 	struct mount *mp;
145 	int error, flag = 0;
146 	char fstypename[MFSNAMELEN];
147 	struct vattr va;
148 	struct nameidata nd;
149 	struct vfsops *vfs;
150 
151 	/*
152 	 * if MNT_GETARGS is specified, it should be only flag.
153 	 */
154 
155 	if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
156 	    (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
157 		return EINVAL;
158 	}
159 
160 	if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
161 	    (error = suser(p->p_ucred, &p->p_acflag)))
162 		return (error);
163 	/*
164 	 * Get vnode to be covered
165 	 */
166 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
167 	    SCARG(uap, path), l);
168 	if ((error = namei(&nd)) != 0)
169 		return (error);
170 	vp = nd.ni_vp;
171 	/*
172 	 * A lookup in VFS_MOUNT might result in an attempt to
173 	 * lock this vnode again, so make the lock recursive.
174 	 */
175 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
176 	if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
177 		if ((vp->v_flag & VROOT) == 0) {
178 			vput(vp);
179 			return (EINVAL);
180 		}
181 		mp = vp->v_mount;
182 		flag = mp->mnt_flag;
183 		vfs = mp->mnt_op;
184 		/*
185 		 * We only allow the filesystem to be reloaded if it
186 		 * is currently mounted read-only.
187 		 */
188 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
189 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
190 			vput(vp);
191 			return (EOPNOTSUPP);	/* Needs translation */
192 		}
193 		/*
194 		 * In "highly secure" mode, don't let the caller do anything
195 		 * but downgrade a filesystem from read-write to read-only.
196 		 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
197 		 */
198 		if (securelevel >= 2 &&
199 		    SCARG(uap, flags) != MNT_GETARGS &&
200 		    SCARG(uap, flags) !=
201 		    (mp->mnt_flag | MNT_RDONLY |
202 		     MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
203 			vput(vp);
204 			return (EPERM);
205 		}
206 		mp->mnt_flag |= SCARG(uap, flags) &
207 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
208 		/*
209 		 * Only root, or the user that did the original mount is
210 		 * permitted to update it.
211 		 */
212 		if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
213 		    mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
214 		    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
215 			vput(vp);
216 			return (error);
217 		}
218 		/*
219 		 * Do not allow NFS export by non-root users. For non-root
220 		 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
221 		 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
222 		 */
223 		if (p->p_ucred->cr_uid != 0) {
224 			if (SCARG(uap, flags) & MNT_EXPORTED) {
225 				vput(vp);
226 				return (EPERM);
227 			}
228 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
229 			if (flag & MNT_NOEXEC)
230 				SCARG(uap, flags) |= MNT_NOEXEC;
231 		}
232 		if (vfs_busy(mp, LK_NOWAIT, 0)) {
233 			vput(vp);
234 			return (EPERM);
235 		}
236 		goto update;
237 	} else {
238 		if (securelevel >= 2) {
239 			vput(vp);
240 			return (EPERM);
241 		}
242 	}
243 	/*
244 	 * If the user is not root, ensure that they own the directory
245 	 * onto which we are attempting to mount.
246 	 */
247 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0 ||
248 	    (va.va_uid != p->p_ucred->cr_uid &&
249 		(error = suser(p->p_ucred, &p->p_acflag)) != 0)) {
250 		vput(vp);
251 		return (error);
252 	}
253 	/*
254 	 * Do not allow NFS export by non-root users. For non-root users,
255 	 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
256 	 * mount point is already MNT_NOEXEC.
257 	 */
258 	if (p->p_ucred->cr_uid != 0) {
259 		if (SCARG(uap, flags) & MNT_EXPORTED) {
260 			vput(vp);
261 			return (EPERM);
262 		}
263 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
264 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
265 			SCARG(uap, flags) |= MNT_NOEXEC;
266 	}
267 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, l, 0, 0)) != 0) {
268 		vput(vp);
269 		return (error);
270 	}
271 	if (vp->v_type != VDIR) {
272 		vput(vp);
273 		return (ENOTDIR);
274 	}
275 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
276 	if (error) {
277 #if defined(COMPAT_09) || defined(COMPAT_43)
278 		/*
279 		 * Historically, filesystem types were identified by numbers.
280 		 * If we get an integer for the filesystem type instead of a
281 		 * string, we check to see if it matches one of the historic
282 		 * filesystem types.
283 		 */
284 		u_long fsindex = (u_long)SCARG(uap, type);
285 		if (fsindex >= nmountcompatnames ||
286 		    mountcompatnames[fsindex] == NULL) {
287 			vput(vp);
288 			return (ENODEV);
289 		}
290 		strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
291 #else
292 		vput(vp);
293 		return (error);
294 #endif
295 	}
296 #ifdef	COMPAT_10
297 	/* Accept `ufs' as an alias for `ffs'. */
298 	if (!strncmp(fstypename, "ufs", MFSNAMELEN))
299 		strncpy(fstypename, "ffs", MFSNAMELEN);
300 #endif
301 	if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
302 		vput(vp);
303 		return (ENODEV);
304 	}
305 	if (vp->v_mountedhere != NULL) {
306 		vput(vp);
307 		return (EBUSY);
308 	}
309 
310 	/*
311 	 * Allocate and initialize the file system.
312 	 */
313 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
314 		M_MOUNT, M_WAITOK);
315 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
316 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
317 	simple_lock_init(&mp->mnt_slock);
318 	(void)vfs_busy(mp, LK_NOWAIT, 0);
319 	mp->mnt_op = vfs;
320 	vfs->vfs_refcount++;
321 	mp->mnt_vnodecovered = vp;
322 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
323 	mp->mnt_unmounter = NULL;
324 	mp->mnt_leaf = mp;
325 
326 	/*
327 	 * The underlying file system may refuse the mount for
328 	 * various reasons.  Allow the user to force it to happen.
329 	 */
330 	mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
331  update:
332 	if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
333 		/*
334 		 * Set the mount level flags.
335 		 */
336 		if (SCARG(uap, flags) & MNT_RDONLY)
337 			mp->mnt_flag |= MNT_RDONLY;
338 		else if (mp->mnt_flag & MNT_RDONLY)
339 			mp->mnt_iflag |= IMNT_WANTRDWR;
340 		mp->mnt_flag &=
341 		  ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
342 		    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
343 		    MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
344 		    MNT_MAGICLINKS);
345 		mp->mnt_flag |= SCARG(uap, flags) &
346 		   (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
347 		    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
348 		    MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
349 		    MNT_IGNORE | MNT_MAGICLINKS);
350 	}
351 	/*
352 	 * Mount the filesystem.
353 	 */
354 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
355 	if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
356 #if defined(COMPAT_30) && defined(NFSSERVER)
357 		if (mp->mnt_flag & MNT_UPDATE && error != 0) {
358 			int error2;
359 
360 			/* Update failed; let's try and see if it was an
361 			 * export request. */
362 			error2 = nfs_update_exports_30(mp, SCARG(uap, path),
363 			    SCARG(uap, data), l);
364 
365 			/* Only update error code if the export request was
366 			 * understood but some problem occurred while
367 			 * processing it. */
368 			if (error2 != EJUSTRETURN)
369 				error = error2;
370 		}
371 #endif
372 		if (mp->mnt_iflag & IMNT_WANTRDWR)
373 			mp->mnt_flag &= ~MNT_RDONLY;
374 		if (error)
375 			mp->mnt_flag = flag;
376 		mp->mnt_flag &=~
377 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
378 		mp->mnt_iflag &=~ IMNT_WANTRDWR;
379 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
380 			if (mp->mnt_syncer == NULL)
381 				error = vfs_allocate_syncvnode(mp);
382 		} else {
383 			if (mp->mnt_syncer != NULL)
384 				vfs_deallocate_syncvnode(mp);
385 		}
386 		vfs_unbusy(mp);
387 		VOP_UNLOCK(vp, 0);
388 		vrele(vp);
389 		return (error);
390 	}
391 	/*
392 	 * Put the new filesystem on the mount list after root.
393 	 */
394 	cache_purge(vp);
395 	if (!error) {
396 		mp->mnt_flag &=~
397 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
398 		mp->mnt_iflag &=~ IMNT_WANTRDWR;
399 		vp->v_mountedhere = mp;
400 		simple_lock(&mountlist_slock);
401 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
402 		simple_unlock(&mountlist_slock);
403 		checkdirs(vp);
404 		VOP_UNLOCK(vp, 0);
405 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
406 			error = vfs_allocate_syncvnode(mp);
407 		vfs_unbusy(mp);
408 		(void) VFS_STATVFS(mp, &mp->mnt_stat, l);
409 		if ((error = VFS_START(mp, 0, l)))
410 			vrele(vp);
411 	} else {
412 		vp->v_mountedhere = (struct mount *)0;
413 		vfs->vfs_refcount--;
414 		vfs_unbusy(mp);
415 		free(mp, M_MOUNT);
416 		vput(vp);
417 	}
418 	return (error);
419 }
420 
421 /*
422  * Scan all active processes to see if any of them have a current
423  * or root directory onto which the new filesystem has just been
424  * mounted. If so, replace them with the new mount point.
425  */
426 void
427 checkdirs(struct vnode *olddp)
428 {
429 	struct cwdinfo *cwdi;
430 	struct vnode *newdp;
431 	struct proc *p;
432 
433 	if (olddp->v_usecount == 1)
434 		return;
435 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
436 		panic("mount: lost mount");
437 	proclist_lock_read();
438 	PROCLIST_FOREACH(p, &allproc) {
439 		cwdi = p->p_cwdi;
440 		if (!cwdi)
441 			continue;
442 		if (cwdi->cwdi_cdir == olddp) {
443 			vrele(cwdi->cwdi_cdir);
444 			VREF(newdp);
445 			cwdi->cwdi_cdir = newdp;
446 		}
447 		if (cwdi->cwdi_rdir == olddp) {
448 			vrele(cwdi->cwdi_rdir);
449 			VREF(newdp);
450 			cwdi->cwdi_rdir = newdp;
451 		}
452 	}
453 	proclist_unlock_read();
454 	if (rootvnode == olddp) {
455 		vrele(rootvnode);
456 		VREF(newdp);
457 		rootvnode = newdp;
458 	}
459 	vput(newdp);
460 }
461 
462 /*
463  * Unmount a file system.
464  *
465  * Note: unmount takes a path to the vnode mounted on as argument,
466  * not special file (as before).
467  */
468 /* ARGSUSED */
469 int
470 sys_unmount(struct lwp *l, void *v, register_t *retval)
471 {
472 	struct sys_unmount_args /* {
473 		syscallarg(const char *) path;
474 		syscallarg(int) flags;
475 	} */ *uap = v;
476 	struct proc *p = l->l_proc;
477 	struct vnode *vp;
478 	struct mount *mp;
479 	int error;
480 	struct nameidata nd;
481 
482 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
483 	    SCARG(uap, path), l);
484 	if ((error = namei(&nd)) != 0)
485 		return (error);
486 	vp = nd.ni_vp;
487 	mp = vp->v_mount;
488 
489 	/*
490 	 * Only root, or the user that did the original mount is
491 	 * permitted to unmount this filesystem.
492 	 */
493 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
494 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
495 		vput(vp);
496 		return (error);
497 	}
498 
499 	/*
500 	 * Don't allow unmounting the root file system.
501 	 */
502 	if (mp->mnt_flag & MNT_ROOTFS) {
503 		vput(vp);
504 		return (EINVAL);
505 	}
506 
507 	/*
508 	 * Must be the root of the filesystem
509 	 */
510 	if ((vp->v_flag & VROOT) == 0) {
511 		vput(vp);
512 		return (EINVAL);
513 	}
514 	vput(vp);
515 
516 	/*
517 	 * XXX Freeze syncer.  Must do this before locking the
518 	 * mount point.  See dounmount() for details.
519 	 */
520 	lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
521 
522 	if (vfs_busy(mp, 0, 0)) {
523 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
524 		return (EBUSY);
525 	}
526 
527 	return (dounmount(mp, SCARG(uap, flags), l));
528 }
529 
530 /*
531  * Do the actual file system unmount. File system is assumed to have been
532  * marked busy by the caller.
533  */
534 int
535 dounmount(struct mount *mp, int flags, struct lwp *l)
536 {
537 	struct vnode *coveredvp;
538 	int error;
539 	int async;
540 	int used_syncer;
541 
542 	simple_lock(&mountlist_slock);
543 	vfs_unbusy(mp);
544 	used_syncer = (mp->mnt_syncer != NULL);
545 
546 	/*
547 	 * XXX Syncer must be frozen when we get here.  This should really
548 	 * be done on a per-mountpoint basis, but especially the softdep
549 	 * code possibly called from the syncer doesn't exactly work on a
550 	 * per-mountpoint basis, so the softdep code would become a maze
551 	 * of vfs_busy() calls.
552 	 *
553 	 * The caller of dounmount() must acquire syncer_lock because
554 	 * the syncer itself acquires locks in syncer_lock -> vfs_busy
555 	 * order, and we must preserve that order to avoid deadlock.
556 	 *
557 	 * So, if the file system did not use the syncer, now is
558 	 * the time to release the syncer_lock.
559 	 */
560 	if (used_syncer == 0)
561 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
562 
563 	mp->mnt_iflag |= IMNT_UNMOUNT;
564 	mp->mnt_unmounter = l;
565 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
566 	vn_start_write(NULL, &mp, V_WAIT);
567 
568 	async = mp->mnt_flag & MNT_ASYNC;
569 	mp->mnt_flag &= ~MNT_ASYNC;
570 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
571 	if (mp->mnt_syncer != NULL)
572 		vfs_deallocate_syncvnode(mp);
573 	error = 0;
574 	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
575 #if NFSS > 0
576 		error = fss_umount_hook(mp, (flags & MNT_FORCE));
577 #endif
578 		if (error == 0)
579 			error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_ucred, l);
580 	}
581 	if (error == 0 || (flags & MNT_FORCE))
582 		error = VFS_UNMOUNT(mp, flags, l);
583 	vn_finished_write(mp, 0);
584 	simple_lock(&mountlist_slock);
585 	if (error) {
586 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
587 			(void) vfs_allocate_syncvnode(mp);
588 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
589 		mp->mnt_unmounter = NULL;
590 		mp->mnt_flag |= async;
591 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
592 		    &mountlist_slock);
593 		if (used_syncer)
594 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
595 		simple_lock(&mp->mnt_slock);
596 		while (mp->mnt_wcnt > 0) {
597 			wakeup(mp);
598 			ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
599 				0, &mp->mnt_slock);
600 		}
601 		simple_unlock(&mp->mnt_slock);
602 		return (error);
603 	}
604 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
605 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
606 		coveredvp->v_mountedhere = NULL;
607 		vrele(coveredvp);
608 	}
609 	mp->mnt_op->vfs_refcount--;
610 	if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
611 		panic("unmount: dangling vnode");
612 	mp->mnt_iflag |= IMNT_GONE;
613 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
614 	if (used_syncer)
615 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
616 	simple_lock(&mp->mnt_slock);
617 	while (mp->mnt_wcnt > 0) {
618 		wakeup(mp);
619 		ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
620 	}
621 	simple_unlock(&mp->mnt_slock);
622 	vfs_hooks_unmount(mp);
623 	free(mp, M_MOUNT);
624 	return (0);
625 }
626 
627 /*
628  * Sync each mounted filesystem.
629  */
630 #ifdef DEBUG
631 int syncprt = 0;
632 struct ctldebug debug0 = { "syncprt", &syncprt };
633 #endif
634 
635 /* ARGSUSED */
636 int
637 sys_sync(struct lwp *l, void *v, register_t *retval)
638 {
639 	struct mount *mp, *nmp;
640 	int asyncflag;
641 	struct proc *p = l == NULL ? &proc0 : l->l_proc;
642 
643 	simple_lock(&mountlist_slock);
644 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
645 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
646 			nmp = mp->mnt_list.cqe_prev;
647 			continue;
648 		}
649 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
650 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
651 			asyncflag = mp->mnt_flag & MNT_ASYNC;
652 			mp->mnt_flag &= ~MNT_ASYNC;
653 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, l);
654 			if (asyncflag)
655 				 mp->mnt_flag |= MNT_ASYNC;
656 			vn_finished_write(mp, 0);
657 		}
658 		simple_lock(&mountlist_slock);
659 		nmp = mp->mnt_list.cqe_prev;
660 		vfs_unbusy(mp);
661 
662 	}
663 	simple_unlock(&mountlist_slock);
664 #ifdef DEBUG
665 	if (syncprt)
666 		vfs_bufstats();
667 #endif /* DEBUG */
668 	return (0);
669 }
670 
671 /*
672  * Change filesystem quotas.
673  */
674 /* ARGSUSED */
675 int
676 sys_quotactl(struct lwp *l, void *v, register_t *retval)
677 {
678 	struct sys_quotactl_args /* {
679 		syscallarg(const char *) path;
680 		syscallarg(int) cmd;
681 		syscallarg(int) uid;
682 		syscallarg(caddr_t) arg;
683 	} */ *uap = v;
684 	struct mount *mp;
685 	int error;
686 	struct nameidata nd;
687 
688 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
689 	if ((error = namei(&nd)) != 0)
690 		return (error);
691 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
692 	vrele(nd.ni_vp);
693 	if (error)
694 		return (error);
695 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
696 	    SCARG(uap, arg), l);
697 	vn_finished_write(mp, 0);
698 	return (error);
699 }
700 
701 int
702 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
703     int root)
704 {
705 	struct cwdinfo *cwdi = l->l_proc->p_cwdi;
706 	int error = 0;
707 
708 	/*
709 	 * If MNT_NOWAIT or MNT_LAZY is specified, do not
710 	 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
711 	 * overrides MNT_NOWAIT.
712 	 */
713 	if (flags == MNT_NOWAIT	|| flags == MNT_LAZY ||
714 	    (flags != MNT_WAIT && flags != 0)) {
715 		memcpy(sp, &mp->mnt_stat, sizeof(*sp));
716 		goto done;
717 	}
718 
719 	/* Get the filesystem stats now */
720 	memset(sp, 0, sizeof(*sp));
721 	if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
722 		return error;
723 	}
724 
725 	if (cwdi->cwdi_rdir == NULL)
726 		(void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
727 done:
728 	if (cwdi->cwdi_rdir != NULL) {
729 		size_t len;
730 		char *bp;
731 		char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
732 		if (!path)
733 			return ENOMEM;
734 
735 		bp = path + MAXPATHLEN;
736 		*--bp = '\0';
737 		error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
738 		    MAXPATHLEN / 2, 0, l);
739 		if (error) {
740 			free(path, M_TEMP);
741 			return error;
742 		}
743 		len = strlen(bp);
744 		/*
745 		 * for mount points that are below our root, we can see
746 		 * them, so we fix up the pathname and return them. The
747 		 * rest we cannot see, so we don't allow viewing the
748 		 * data.
749 		 */
750 		if (strncmp(bp, sp->f_mntonname, len) == 0) {
751 			strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
752 			    sizeof(sp->f_mntonname));
753 			if (sp->f_mntonname[0] == '\0')
754 				(void)strlcpy(sp->f_mntonname, "/",
755 				    sizeof(sp->f_mntonname));
756 		} else {
757 			if (root)
758 				(void)strlcpy(sp->f_mntonname, "/",
759 				    sizeof(sp->f_mntonname));
760 			else
761 				error = EPERM;
762 		}
763 		free(path, M_TEMP);
764 	}
765 	sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
766 	return error;
767 }
768 
769 /*
770  * Get filesystem statistics.
771  */
772 /* ARGSUSED */
773 int
774 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
775 {
776 	struct sys_statvfs1_args /* {
777 		syscallarg(const char *) path;
778 		syscallarg(struct statvfs *) buf;
779 		syscallarg(int) flags;
780 	} */ *uap = v;
781 	struct mount *mp;
782 	struct statvfs sbuf;
783 	int error;
784 	struct nameidata nd;
785 
786 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
787 	if ((error = namei(&nd)) != 0)
788 		return error;
789 	mp = nd.ni_vp->v_mount;
790 	vrele(nd.ni_vp);
791 	if ((error = dostatvfs(mp, &sbuf, l, SCARG(uap, flags), 1)) != 0)
792 		return error;
793 	return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
794 }
795 
796 /*
797  * Get filesystem statistics.
798  */
799 /* ARGSUSED */
800 int
801 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
802 {
803 	struct sys_fstatvfs1_args /* {
804 		syscallarg(int) fd;
805 		syscallarg(struct statvfs *) buf;
806 		syscallarg(int) flags;
807 	} */ *uap = v;
808 	struct proc *p = l->l_proc;
809 	struct file *fp;
810 	struct mount *mp;
811 	struct statvfs sbuf;
812 	int error;
813 
814 	/* getvnode() will use the descriptor for us */
815 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
816 		return (error);
817 	mp = ((struct vnode *)fp->f_data)->v_mount;
818 	if ((error = dostatvfs(mp, &sbuf, l, SCARG(uap, flags), 1)) != 0)
819 		goto out;
820 	error = copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
821  out:
822 	FILE_UNUSE(fp, l);
823 	return error;
824 }
825 
826 
827 /*
828  * Get statistics on all filesystems.
829  */
830 int
831 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
832 {
833 	struct sys_getvfsstat_args /* {
834 		syscallarg(struct statvfs *) buf;
835 		syscallarg(size_t) bufsize;
836 		syscallarg(int) flags;
837 	} */ *uap = v;
838 	int root = 0;
839 	struct proc *p = l->l_proc;
840 	struct mount *mp, *nmp;
841 	struct statvfs sbuf;
842 	struct statvfs *sfsp;
843 	size_t count, maxcount;
844 	int error = 0;
845 
846 	maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
847 	sfsp = SCARG(uap, buf);
848 	simple_lock(&mountlist_slock);
849 	count = 0;
850 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
851 	     mp = nmp) {
852 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
853 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
854 			continue;
855 		}
856 		if (sfsp && count < maxcount) {
857 			error = dostatvfs(mp, &sbuf, l, SCARG(uap, flags), 0);
858 			if (error) {
859 				simple_lock(&mountlist_slock);
860 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
861 				vfs_unbusy(mp);
862 				continue;
863 			}
864 			error = copyout(&sbuf, sfsp, sizeof(*sfsp));
865 			if (error) {
866 				vfs_unbusy(mp);
867 				return (error);
868 			}
869 			sfsp++;
870 			root |= strcmp(sbuf.f_mntonname, "/") == 0;
871 		}
872 		count++;
873 		simple_lock(&mountlist_slock);
874 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
875 		vfs_unbusy(mp);
876 	}
877 	simple_unlock(&mountlist_slock);
878 	if (root == 0 && p->p_cwdi->cwdi_rdir) {
879 		/*
880 		 * fake a root entry
881 		 */
882 		if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, &sbuf, l,
883 		    SCARG(uap, flags), 1)) != 0)
884 			return error;
885 		if (sfsp)
886 			error = copyout(&sbuf, sfsp, sizeof(*sfsp));
887 		count++;
888 	}
889 	if (sfsp && count > maxcount)
890 		*retval = maxcount;
891 	else
892 		*retval = count;
893 	return error;
894 }
895 
896 /*
897  * Change current working directory to a given file descriptor.
898  */
899 /* ARGSUSED */
900 int
901 sys_fchdir(struct lwp *l, void *v, register_t *retval)
902 {
903 	struct sys_fchdir_args /* {
904 		syscallarg(int) fd;
905 	} */ *uap = v;
906 	struct proc *p = l->l_proc;
907 	struct filedesc *fdp = p->p_fd;
908 	struct cwdinfo *cwdi = p->p_cwdi;
909 	struct vnode *vp, *tdp;
910 	struct mount *mp;
911 	struct file *fp;
912 	int error;
913 
914 	/* getvnode() will use the descriptor for us */
915 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
916 		return (error);
917 	vp = (struct vnode *)fp->f_data;
918 
919 	VREF(vp);
920 	vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
921 	if (vp->v_type != VDIR)
922 		error = ENOTDIR;
923 	else
924 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, l);
925 	while (!error && (mp = vp->v_mountedhere) != NULL) {
926 		if (vfs_busy(mp, 0, 0))
927 			continue;
928 		error = VFS_ROOT(mp, &tdp);
929 		vfs_unbusy(mp);
930 		if (error)
931 			break;
932 		vput(vp);
933 		vp = tdp;
934 	}
935 	if (error) {
936 		vput(vp);
937 		goto out;
938 	}
939 	VOP_UNLOCK(vp, 0);
940 
941 	/*
942 	 * Disallow changing to a directory not under the process's
943 	 * current root directory (if there is one).
944 	 */
945 	if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
946 		vrele(vp);
947 		error = EPERM;	/* operation not permitted */
948 		goto out;
949 	}
950 
951 	vrele(cwdi->cwdi_cdir);
952 	cwdi->cwdi_cdir = vp;
953  out:
954 	FILE_UNUSE(fp, l);
955 	return (error);
956 }
957 
958 /*
959  * Change this process's notion of the root directory to a given file
960  * descriptor.
961  */
962 int
963 sys_fchroot(struct lwp *l, void *v, register_t *retval)
964 {
965 	struct sys_fchroot_args *uap = v;
966 	struct proc *p = l->l_proc;
967 	struct filedesc *fdp = p->p_fd;
968 	struct cwdinfo *cwdi = p->p_cwdi;
969 	struct vnode	*vp;
970 	struct file	*fp;
971 	int		 error;
972 
973 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
974 		return error;
975 	/* getvnode() will use the descriptor for us */
976 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
977 		return error;
978 	vp = (struct vnode *) fp->f_data;
979 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
980 	if (vp->v_type != VDIR)
981 		error = ENOTDIR;
982 	else
983 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, l);
984 	VOP_UNLOCK(vp, 0);
985 	if (error)
986 		goto out;
987 	VREF(vp);
988 
989 	/*
990 	 * Prevent escaping from chroot by putting the root under
991 	 * the working directory.  Silently chdir to / if we aren't
992 	 * already there.
993 	 */
994 	if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
995 		/*
996 		 * XXX would be more failsafe to change directory to a
997 		 * deadfs node here instead
998 		 */
999 		vrele(cwdi->cwdi_cdir);
1000 		VREF(vp);
1001 		cwdi->cwdi_cdir = vp;
1002 	}
1003 
1004 	if (cwdi->cwdi_rdir != NULL)
1005 		vrele(cwdi->cwdi_rdir);
1006 	cwdi->cwdi_rdir = vp;
1007  out:
1008 	FILE_UNUSE(fp, l);
1009 	return (error);
1010 }
1011 
1012 /*
1013  * Change current working directory (``.'').
1014  */
1015 /* ARGSUSED */
1016 int
1017 sys_chdir(struct lwp *l, void *v, register_t *retval)
1018 {
1019 	struct sys_chdir_args /* {
1020 		syscallarg(const char *) path;
1021 	} */ *uap = v;
1022 	struct proc *p = l->l_proc;
1023 	struct cwdinfo *cwdi = p->p_cwdi;
1024 	int error;
1025 	struct nameidata nd;
1026 
1027 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1028 	    SCARG(uap, path), l);
1029 	if ((error = change_dir(&nd, l)) != 0)
1030 		return (error);
1031 	vrele(cwdi->cwdi_cdir);
1032 	cwdi->cwdi_cdir = nd.ni_vp;
1033 	return (0);
1034 }
1035 
1036 /*
1037  * Change notion of root (``/'') directory.
1038  */
1039 /* ARGSUSED */
1040 int
1041 sys_chroot(struct lwp *l, void *v, register_t *retval)
1042 {
1043 	struct sys_chroot_args /* {
1044 		syscallarg(const char *) path;
1045 	} */ *uap = v;
1046 	struct proc *p = l->l_proc;
1047 	struct cwdinfo *cwdi = p->p_cwdi;
1048 	struct vnode *vp;
1049 	int error;
1050 	struct nameidata nd;
1051 
1052 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1053 		return (error);
1054 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1055 	    SCARG(uap, path), l);
1056 	if ((error = change_dir(&nd, l)) != 0)
1057 		return (error);
1058 	if (cwdi->cwdi_rdir != NULL)
1059 		vrele(cwdi->cwdi_rdir);
1060 	vp = nd.ni_vp;
1061 	cwdi->cwdi_rdir = vp;
1062 
1063 	/*
1064 	 * Prevent escaping from chroot by putting the root under
1065 	 * the working directory.  Silently chdir to / if we aren't
1066 	 * already there.
1067 	 */
1068 	if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1069 		/*
1070 		 * XXX would be more failsafe to change directory to a
1071 		 * deadfs node here instead
1072 		 */
1073 		vrele(cwdi->cwdi_cdir);
1074 		VREF(vp);
1075 		cwdi->cwdi_cdir = vp;
1076 	}
1077 
1078 	return (0);
1079 }
1080 
1081 /*
1082  * Common routine for chroot and chdir.
1083  */
1084 static int
1085 change_dir(struct nameidata *ndp, struct lwp *l)
1086 {
1087 	struct vnode *vp;
1088 	int error;
1089 
1090 	if ((error = namei(ndp)) != 0)
1091 		return (error);
1092 	vp = ndp->ni_vp;
1093 	if (vp->v_type != VDIR)
1094 		error = ENOTDIR;
1095 	else
1096 		error = VOP_ACCESS(vp, VEXEC, l->l_proc->p_ucred, l);
1097 
1098 	if (error)
1099 		vput(vp);
1100 	else
1101 		VOP_UNLOCK(vp, 0);
1102 	return (error);
1103 }
1104 
1105 /*
1106  * Check permissions, allocate an open file structure,
1107  * and call the device open routine if any.
1108  */
1109 int
1110 sys_open(struct lwp *l, void *v, register_t *retval)
1111 {
1112 	struct sys_open_args /* {
1113 		syscallarg(const char *) path;
1114 		syscallarg(int) flags;
1115 		syscallarg(int) mode;
1116 	} */ *uap = v;
1117 	struct proc *p = l->l_proc;
1118 	struct cwdinfo *cwdi = p->p_cwdi;
1119 	struct filedesc *fdp = p->p_fd;
1120 	struct file *fp;
1121 	struct vnode *vp;
1122 	int flags, cmode;
1123 	int type, indx, error;
1124 	struct flock lf;
1125 	struct nameidata nd;
1126 
1127 	flags = FFLAGS(SCARG(uap, flags));
1128 	if ((flags & (FREAD | FWRITE)) == 0)
1129 		return (EINVAL);
1130 	/* falloc() will use the file descriptor for us */
1131 	if ((error = falloc(p, &fp, &indx)) != 0)
1132 		return (error);
1133 	cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1134 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1135 	l->l_dupfd = -indx - 1;			/* XXX check for fdopen */
1136 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1137 		FILE_UNUSE(fp, l);
1138 		fdp->fd_ofiles[indx] = NULL;
1139 		ffree(fp);
1140 		if ((error == EDUPFD || error == EMOVEFD) &&
1141 		    l->l_dupfd >= 0 &&			/* XXX from fdopen */
1142 		    (error =
1143 			dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1144 			*retval = indx;
1145 			return (0);
1146 		}
1147 		if (error == ERESTART)
1148 			error = EINTR;
1149 		fdremove(fdp, indx);
1150 		return (error);
1151 	}
1152 	l->l_dupfd = 0;
1153 	vp = nd.ni_vp;
1154 	fp->f_flag = flags & FMASK;
1155 	fp->f_type = DTYPE_VNODE;
1156 	fp->f_ops = &vnops;
1157 	fp->f_data = vp;
1158 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1159 		lf.l_whence = SEEK_SET;
1160 		lf.l_start = 0;
1161 		lf.l_len = 0;
1162 		if (flags & O_EXLOCK)
1163 			lf.l_type = F_WRLCK;
1164 		else
1165 			lf.l_type = F_RDLCK;
1166 		type = F_FLOCK;
1167 		if ((flags & FNONBLOCK) == 0)
1168 			type |= F_WAIT;
1169 		VOP_UNLOCK(vp, 0);
1170 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1171 		if (error) {
1172 			(void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1173 			FILE_UNUSE(fp, l);
1174 			ffree(fp);
1175 			fdremove(fdp, indx);
1176 			return (error);
1177 		}
1178 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1179 		fp->f_flag |= FHASLOCK;
1180 	}
1181 	VOP_UNLOCK(vp, 0);
1182 	*retval = indx;
1183 	FILE_SET_MATURE(fp);
1184 	FILE_UNUSE(fp, l);
1185 	return (0);
1186 }
1187 
1188 /*
1189  * Get file handle system call
1190  */
1191 int
1192 sys_getfh(struct lwp *l, void *v, register_t *retval)
1193 {
1194 	struct sys_getfh_args /* {
1195 		syscallarg(char *) fname;
1196 		syscallarg(fhandle_t *) fhp;
1197 	} */ *uap = v;
1198 	struct proc *p = l->l_proc;
1199 	struct vnode *vp;
1200 	fhandle_t fh;
1201 	int error;
1202 	struct nameidata nd;
1203 
1204 	/*
1205 	 * Must be super user
1206 	 */
1207 	error = suser(p->p_ucred, &p->p_acflag);
1208 	if (error)
1209 		return (error);
1210 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1211 	    SCARG(uap, fname), l);
1212 	error = namei(&nd);
1213 	if (error)
1214 		return (error);
1215 	vp = nd.ni_vp;
1216 	if (vp->v_mount->mnt_op->vfs_vptofh == NULL)
1217 		return EOPNOTSUPP;
1218 	memset(&fh, 0, sizeof(fh));
1219 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsidx;
1220 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1221 	vput(vp);
1222 	if (error)
1223 		return (error);
1224 	error = copyout(&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh));
1225 	return (error);
1226 }
1227 
1228 /*
1229  * Open a file given a file handle.
1230  *
1231  * Check permissions, allocate an open file structure,
1232  * and call the device open routine if any.
1233  */
1234 int
1235 sys_fhopen(struct lwp *l, void *v, register_t *retval)
1236 {
1237 	struct sys_fhopen_args /* {
1238 		syscallarg(const fhandle_t *) fhp;
1239 		syscallarg(int) flags;
1240 	} */ *uap = v;
1241 	struct proc *p = l->l_proc;
1242 	struct filedesc *fdp = p->p_fd;
1243 	struct file *fp;
1244 	struct vnode *vp = NULL;
1245 	struct mount *mp;
1246 	struct ucred *cred = p->p_ucred;
1247 	int flags;
1248 	struct file *nfp;
1249 	int type, indx, error=0;
1250 	struct flock lf;
1251 	struct vattr va;
1252 	fhandle_t fh;
1253 
1254 	/*
1255 	 * Must be super user
1256 	 */
1257 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1258 		return (error);
1259 
1260 	flags = FFLAGS(SCARG(uap, flags));
1261 	if ((flags & (FREAD | FWRITE)) == 0)
1262 		return (EINVAL);
1263 	if ((flags & O_CREAT))
1264 		return (EINVAL);
1265 	/* falloc() will use the file descriptor for us */
1266 	if ((error = falloc(p, &nfp, &indx)) != 0)
1267 		return (error);
1268 	fp = nfp;
1269 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1270 		goto bad;
1271 
1272 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1273 		error = ESTALE;
1274 		goto bad;
1275 	}
1276 
1277 	if (mp->mnt_op->vfs_fhtovp == NULL) {
1278 		error = EOPNOTSUPP;
1279 		goto bad;
1280 	}
1281 
1282 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1283 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1284 		goto bad;
1285 	}
1286 
1287 	/* Now do an effective vn_open */
1288 
1289 	if (vp->v_type == VSOCK) {
1290 		error = EOPNOTSUPP;
1291 		goto bad;
1292 	}
1293 	if (flags & FREAD) {
1294 		if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1295 			goto bad;
1296 	}
1297 	if (flags & (FWRITE | O_TRUNC)) {
1298 		if (vp->v_type == VDIR) {
1299 			error = EISDIR;
1300 			goto bad;
1301 		}
1302 		if ((error = vn_writechk(vp)) != 0 ||
1303 		    (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1304 			goto bad;
1305 	}
1306 	if (flags & O_TRUNC) {
1307 		if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1308 			goto bad;
1309 		VOP_UNLOCK(vp, 0);			/* XXX */
1310 		VOP_LEASE(vp, l, cred, LEASE_WRITE);
1311 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
1312 		VATTR_NULL(&va);
1313 		va.va_size = 0;
1314 		error = VOP_SETATTR(vp, &va, cred, l);
1315 		vn_finished_write(mp, 0);
1316 		if (error)
1317 			goto bad;
1318 	}
1319 	if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1320 		goto bad;
1321 	if (vp->v_type == VREG &&
1322 	    uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1323 		error = EIO;
1324 		goto bad;
1325 	}
1326 	if (flags & FWRITE)
1327 		vp->v_writecount++;
1328 
1329 	/* done with modified vn_open, now finish what sys_open does. */
1330 
1331 	fp->f_flag = flags & FMASK;
1332 	fp->f_type = DTYPE_VNODE;
1333 	fp->f_ops = &vnops;
1334 	fp->f_data = vp;
1335 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1336 		lf.l_whence = SEEK_SET;
1337 		lf.l_start = 0;
1338 		lf.l_len = 0;
1339 		if (flags & O_EXLOCK)
1340 			lf.l_type = F_WRLCK;
1341 		else
1342 			lf.l_type = F_RDLCK;
1343 		type = F_FLOCK;
1344 		if ((flags & FNONBLOCK) == 0)
1345 			type |= F_WAIT;
1346 		VOP_UNLOCK(vp, 0);
1347 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1348 		if (error) {
1349 			(void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1350 			FILE_UNUSE(fp, l);
1351 			ffree(fp);
1352 			fdremove(fdp, indx);
1353 			return (error);
1354 		}
1355 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1356 		fp->f_flag |= FHASLOCK;
1357 	}
1358 	VOP_UNLOCK(vp, 0);
1359 	*retval = indx;
1360 	FILE_SET_MATURE(fp);
1361 	FILE_UNUSE(fp, l);
1362 	return (0);
1363 
1364 bad:
1365 	FILE_UNUSE(fp, l);
1366 	ffree(fp);
1367 	fdremove(fdp, indx);
1368 	if (vp != NULL)
1369 		vput(vp);
1370 	return (error);
1371 }
1372 
1373 /* ARGSUSED */
1374 int
1375 sys_fhstat(struct lwp *l, void *v, register_t *retval)
1376 {
1377 	struct sys_fhstat_args /* {
1378 		syscallarg(const fhandle_t *) fhp;
1379 		syscallarg(struct stat *) sb;
1380 	} */ *uap = v;
1381 	struct proc *p = l->l_proc;
1382 	struct stat sb;
1383 	int error;
1384 	fhandle_t fh;
1385 	struct mount *mp;
1386 	struct vnode *vp;
1387 
1388 	/*
1389 	 * Must be super user
1390 	 */
1391 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1392 		return (error);
1393 
1394 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1395 		return (error);
1396 
1397 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1398 		return (ESTALE);
1399 	if (mp->mnt_op->vfs_fhtovp == NULL)
1400 		return EOPNOTSUPP;
1401 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1402 		return (error);
1403 	error = vn_stat(vp, &sb, l);
1404 	vput(vp);
1405 	if (error)
1406 		return (error);
1407 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1408 	return (error);
1409 }
1410 
1411 /* ARGSUSED */
1412 int
1413 sys_fhstatvfs1(struct lwp *l, void *v, register_t *retval)
1414 {
1415 	struct sys_fhstatvfs1_args /* {
1416 		syscallarg(const fhandle_t *) fhp;
1417 		syscallarg(struct statvfs *) buf;
1418 		syscallarg(int)	flags;
1419 	} */ *uap = v;
1420 	struct proc *p = l->l_proc;
1421 	struct statvfs sbuf;
1422 	fhandle_t fh;
1423 	struct mount *mp;
1424 	struct vnode *vp;
1425 	int error;
1426 
1427 	/*
1428 	 * Must be super user
1429 	 */
1430 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1431 		return error;
1432 
1433 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1434 		return error;
1435 
1436 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1437 		return ESTALE;
1438 	if (mp->mnt_op->vfs_fhtovp == NULL)
1439 		return EOPNOTSUPP;
1440 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1441 		return error;
1442 
1443 	mp = vp->v_mount;
1444 	if ((error = dostatvfs(mp, &sbuf, l, SCARG(uap, flags), 1)) != 0) {
1445 		vput(vp);
1446 		return error;
1447 	}
1448 	vput(vp);
1449 	return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
1450 }
1451 
1452 /*
1453  * Create a special file.
1454  */
1455 /* ARGSUSED */
1456 int
1457 sys_mknod(struct lwp *l, void *v, register_t *retval)
1458 {
1459 	struct sys_mknod_args /* {
1460 		syscallarg(const char *) path;
1461 		syscallarg(int) mode;
1462 		syscallarg(int) dev;
1463 	} */ *uap = v;
1464 	struct proc *p = l->l_proc;
1465 	struct vnode *vp;
1466 	struct mount *mp;
1467 	struct vattr vattr;
1468 	int error;
1469 	int whiteout = 0;
1470 	struct nameidata nd;
1471 
1472 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1473 		return (error);
1474 restart:
1475 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1476 	if ((error = namei(&nd)) != 0)
1477 		return (error);
1478 	vp = nd.ni_vp;
1479 	if (vp != NULL)
1480 		error = EEXIST;
1481 	else {
1482 		VATTR_NULL(&vattr);
1483 		vattr.va_mode =
1484 		    (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1485 		vattr.va_rdev = SCARG(uap, dev);
1486 		whiteout = 0;
1487 
1488 		switch (SCARG(uap, mode) & S_IFMT) {
1489 		case S_IFMT:	/* used by badsect to flag bad sectors */
1490 			vattr.va_type = VBAD;
1491 			break;
1492 		case S_IFCHR:
1493 			vattr.va_type = VCHR;
1494 			break;
1495 		case S_IFBLK:
1496 			vattr.va_type = VBLK;
1497 			break;
1498 		case S_IFWHT:
1499 			whiteout = 1;
1500 			break;
1501 		default:
1502 			error = EINVAL;
1503 			break;
1504 		}
1505 	}
1506 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1507 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1508 		if (nd.ni_dvp == vp)
1509 			vrele(nd.ni_dvp);
1510 		else
1511 			vput(nd.ni_dvp);
1512 		if (vp)
1513 			vrele(vp);
1514 		if ((error = vn_start_write(NULL, &mp,
1515 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1516 			return (error);
1517 		goto restart;
1518 	}
1519 	if (!error) {
1520 		VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1521 		if (whiteout) {
1522 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1523 			if (error)
1524 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1525 			vput(nd.ni_dvp);
1526 		} else {
1527 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1528 						&nd.ni_cnd, &vattr);
1529 			if (error == 0)
1530 				vput(nd.ni_vp);
1531 		}
1532 	} else {
1533 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1534 		if (nd.ni_dvp == vp)
1535 			vrele(nd.ni_dvp);
1536 		else
1537 			vput(nd.ni_dvp);
1538 		if (vp)
1539 			vrele(vp);
1540 	}
1541 	vn_finished_write(mp, 0);
1542 	return (error);
1543 }
1544 
1545 /*
1546  * Create a named pipe.
1547  */
1548 /* ARGSUSED */
1549 int
1550 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1551 {
1552 	struct sys_mkfifo_args /* {
1553 		syscallarg(const char *) path;
1554 		syscallarg(int) mode;
1555 	} */ *uap = v;
1556 	struct proc *p = l->l_proc;
1557 	struct mount *mp;
1558 	struct vattr vattr;
1559 	int error;
1560 	struct nameidata nd;
1561 
1562 restart:
1563 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1564 	if ((error = namei(&nd)) != 0)
1565 		return (error);
1566 	if (nd.ni_vp != NULL) {
1567 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1568 		if (nd.ni_dvp == nd.ni_vp)
1569 			vrele(nd.ni_dvp);
1570 		else
1571 			vput(nd.ni_dvp);
1572 		vrele(nd.ni_vp);
1573 		return (EEXIST);
1574 	}
1575 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1576 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1577 		if (nd.ni_dvp == nd.ni_vp)
1578 			vrele(nd.ni_dvp);
1579 		else
1580 			vput(nd.ni_dvp);
1581 		if (nd.ni_vp)
1582 			vrele(nd.ni_vp);
1583 		if ((error = vn_start_write(NULL, &mp,
1584 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1585 			return (error);
1586 		goto restart;
1587 	}
1588 	VATTR_NULL(&vattr);
1589 	vattr.va_type = VFIFO;
1590 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1591 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1592 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1593 	if (error == 0)
1594 		vput(nd.ni_vp);
1595 	vn_finished_write(mp, 0);
1596 	return (error);
1597 }
1598 
1599 /*
1600  * Make a hard file link.
1601  */
1602 /* ARGSUSED */
1603 int
1604 sys_link(struct lwp *l, void *v, register_t *retval)
1605 {
1606 	struct sys_link_args /* {
1607 		syscallarg(const char *) path;
1608 		syscallarg(const char *) link;
1609 	} */ *uap = v;
1610 	struct proc *p = l->l_proc;
1611 	struct vnode *vp;
1612 	struct mount *mp;
1613 	struct nameidata nd;
1614 	int error;
1615 
1616 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1617 	if ((error = namei(&nd)) != 0)
1618 		return (error);
1619 	vp = nd.ni_vp;
1620 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1621 		vrele(vp);
1622 		return (error);
1623 	}
1624 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1625 	if ((error = namei(&nd)) != 0)
1626 		goto out;
1627 	if (nd.ni_vp) {
1628 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1629 		if (nd.ni_dvp == nd.ni_vp)
1630 			vrele(nd.ni_dvp);
1631 		else
1632 			vput(nd.ni_dvp);
1633 		vrele(nd.ni_vp);
1634 		error = EEXIST;
1635 		goto out;
1636 	}
1637 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1638 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
1639 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1640 out:
1641 	vrele(vp);
1642 	vn_finished_write(mp, 0);
1643 	return (error);
1644 }
1645 
1646 /*
1647  * Make a symbolic link.
1648  */
1649 /* ARGSUSED */
1650 int
1651 sys_symlink(struct lwp *l, void *v, register_t *retval)
1652 {
1653 	struct sys_symlink_args /* {
1654 		syscallarg(const char *) path;
1655 		syscallarg(const char *) link;
1656 	} */ *uap = v;
1657 	struct proc *p = l->l_proc;
1658 	struct mount *mp;
1659 	struct vattr vattr;
1660 	char *path;
1661 	int error;
1662 	struct nameidata nd;
1663 
1664 	path = PNBUF_GET();
1665 	error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1666 	if (error)
1667 		goto out;
1668 restart:
1669 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1670 	if ((error = namei(&nd)) != 0)
1671 		goto out;
1672 	if (nd.ni_vp) {
1673 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1674 		if (nd.ni_dvp == nd.ni_vp)
1675 			vrele(nd.ni_dvp);
1676 		else
1677 			vput(nd.ni_dvp);
1678 		vrele(nd.ni_vp);
1679 		error = EEXIST;
1680 		goto out;
1681 	}
1682 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1683 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1684 		if (nd.ni_dvp == nd.ni_vp)
1685 			vrele(nd.ni_dvp);
1686 		else
1687 			vput(nd.ni_dvp);
1688 		if ((error = vn_start_write(NULL, &mp,
1689 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1690 			return (error);
1691 		goto restart;
1692 	}
1693 	VATTR_NULL(&vattr);
1694 	vattr.va_type = VLNK;
1695 	vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1696 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1697 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1698 	if (error == 0)
1699 		vput(nd.ni_vp);
1700 	vn_finished_write(mp, 0);
1701 out:
1702 	PNBUF_PUT(path);
1703 	return (error);
1704 }
1705 
1706 /*
1707  * Delete a whiteout from the filesystem.
1708  */
1709 /* ARGSUSED */
1710 int
1711 sys_undelete(struct lwp *l, void *v, register_t *retval)
1712 {
1713 	struct sys_undelete_args /* {
1714 		syscallarg(const char *) path;
1715 	} */ *uap = v;
1716 	struct proc *p = l->l_proc;
1717 	int error;
1718 	struct mount *mp;
1719 	struct nameidata nd;
1720 
1721 restart:
1722 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1723 	    SCARG(uap, path), l);
1724 	error = namei(&nd);
1725 	if (error)
1726 		return (error);
1727 
1728 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1729 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1730 		if (nd.ni_dvp == nd.ni_vp)
1731 			vrele(nd.ni_dvp);
1732 		else
1733 			vput(nd.ni_dvp);
1734 		if (nd.ni_vp)
1735 			vrele(nd.ni_vp);
1736 		return (EEXIST);
1737 	}
1738 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1739 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1740 		if (nd.ni_dvp == nd.ni_vp)
1741 			vrele(nd.ni_dvp);
1742 		else
1743 			vput(nd.ni_dvp);
1744 		if ((error = vn_start_write(NULL, &mp,
1745 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1746 			return (error);
1747 		goto restart;
1748 	}
1749 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1750 	if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
1751 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1752 	vput(nd.ni_dvp);
1753 	vn_finished_write(mp, 0);
1754 	return (error);
1755 }
1756 
1757 /*
1758  * Delete a name from the filesystem.
1759  */
1760 /* ARGSUSED */
1761 int
1762 sys_unlink(struct lwp *l, void *v, register_t *retval)
1763 {
1764 	struct sys_unlink_args /* {
1765 		syscallarg(const char *) path;
1766 	} */ *uap = v;
1767 	struct proc *p = l->l_proc;
1768 	struct mount *mp;
1769 	struct vnode *vp;
1770 	int error;
1771 	struct nameidata nd;
1772 
1773 restart:
1774 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1775 	    SCARG(uap, path), l);
1776 	if ((error = namei(&nd)) != 0)
1777 		return (error);
1778 	vp = nd.ni_vp;
1779 
1780 	/*
1781 	 * The root of a mounted filesystem cannot be deleted.
1782 	 */
1783 	if (vp->v_flag & VROOT) {
1784 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1785 		if (nd.ni_dvp == vp)
1786 			vrele(nd.ni_dvp);
1787 		else
1788 			vput(nd.ni_dvp);
1789 		vput(vp);
1790 		error = EBUSY;
1791 		goto out;
1792 	}
1793 
1794 #ifdef VERIFIED_EXEC
1795 	/* Handle remove requests for veriexec entries. */
1796 	if ((error = veriexec_removechk(l, vp, nd.ni_dirp)) != 0) {
1797 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1798 		if (nd.ni_dvp == vp)
1799 			vrele(nd.ni_dvp);
1800 		else
1801 			vput(nd.ni_dvp);
1802 		vput(vp);
1803 		goto out;
1804 	}
1805 #endif /* VERIFIED_EXEC */
1806 
1807 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1808 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1809 		if (nd.ni_dvp == vp)
1810 			vrele(nd.ni_dvp);
1811 		else
1812 			vput(nd.ni_dvp);
1813 		vput(vp);
1814 		if ((error = vn_start_write(NULL, &mp,
1815 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1816 			return (error);
1817 		goto restart;
1818 	}
1819 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
1820 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
1821 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1822 	vn_finished_write(mp, 0);
1823 out:
1824 	return (error);
1825 }
1826 
1827 /*
1828  * Reposition read/write file offset.
1829  */
1830 int
1831 sys_lseek(struct lwp *l, void *v, register_t *retval)
1832 {
1833 	struct sys_lseek_args /* {
1834 		syscallarg(int) fd;
1835 		syscallarg(int) pad;
1836 		syscallarg(off_t) offset;
1837 		syscallarg(int) whence;
1838 	} */ *uap = v;
1839 	struct proc *p = l->l_proc;
1840 	struct ucred *cred = p->p_ucred;
1841 	struct filedesc *fdp = p->p_fd;
1842 	struct file *fp;
1843 	struct vnode *vp;
1844 	struct vattr vattr;
1845 	off_t newoff;
1846 	int error;
1847 
1848 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1849 		return (EBADF);
1850 
1851 	FILE_USE(fp);
1852 
1853 	vp = (struct vnode *)fp->f_data;
1854 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1855 		error = ESPIPE;
1856 		goto out;
1857 	}
1858 
1859 	switch (SCARG(uap, whence)) {
1860 	case SEEK_CUR:
1861 		newoff = fp->f_offset + SCARG(uap, offset);
1862 		break;
1863 	case SEEK_END:
1864 		error = VOP_GETATTR(vp, &vattr, cred, l);
1865 		if (error)
1866 			goto out;
1867 		newoff = SCARG(uap, offset) + vattr.va_size;
1868 		break;
1869 	case SEEK_SET:
1870 		newoff = SCARG(uap, offset);
1871 		break;
1872 	default:
1873 		error = EINVAL;
1874 		goto out;
1875 	}
1876 	if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
1877 		goto out;
1878 
1879 	*(off_t *)retval = fp->f_offset = newoff;
1880  out:
1881 	FILE_UNUSE(fp, l);
1882 	return (error);
1883 }
1884 
1885 /*
1886  * Positional read system call.
1887  */
1888 int
1889 sys_pread(struct lwp *l, void *v, register_t *retval)
1890 {
1891 	struct sys_pread_args /* {
1892 		syscallarg(int) fd;
1893 		syscallarg(void *) buf;
1894 		syscallarg(size_t) nbyte;
1895 		syscallarg(off_t) offset;
1896 	} */ *uap = v;
1897 	struct proc *p = l->l_proc;
1898 	struct filedesc *fdp = p->p_fd;
1899 	struct file *fp;
1900 	struct vnode *vp;
1901 	off_t offset;
1902 	int error, fd = SCARG(uap, fd);
1903 
1904 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1905 		return (EBADF);
1906 
1907 	if ((fp->f_flag & FREAD) == 0) {
1908 		simple_unlock(&fp->f_slock);
1909 		return (EBADF);
1910 	}
1911 
1912 	FILE_USE(fp);
1913 
1914 	vp = (struct vnode *)fp->f_data;
1915 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1916 		error = ESPIPE;
1917 		goto out;
1918 	}
1919 
1920 	offset = SCARG(uap, offset);
1921 
1922 	/*
1923 	 * XXX This works because no file systems actually
1924 	 * XXX take any action on the seek operation.
1925 	 */
1926 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1927 		goto out;
1928 
1929 	/* dofileread() will unuse the descriptor for us */
1930 	return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
1931 	    &offset, 0, retval));
1932 
1933  out:
1934 	FILE_UNUSE(fp, l);
1935 	return (error);
1936 }
1937 
1938 /*
1939  * Positional scatter read system call.
1940  */
1941 int
1942 sys_preadv(struct lwp *l, void *v, register_t *retval)
1943 {
1944 	struct sys_preadv_args /* {
1945 		syscallarg(int) fd;
1946 		syscallarg(const struct iovec *) iovp;
1947 		syscallarg(int) iovcnt;
1948 		syscallarg(off_t) offset;
1949 	} */ *uap = v;
1950 	struct proc *p = l->l_proc;
1951 	struct filedesc *fdp = p->p_fd;
1952 	struct file *fp;
1953 	struct vnode *vp;
1954 	off_t offset;
1955 	int error, fd = SCARG(uap, fd);
1956 
1957 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1958 		return (EBADF);
1959 
1960 	if ((fp->f_flag & FREAD) == 0) {
1961 		simple_unlock(&fp->f_slock);
1962 		return (EBADF);
1963 	}
1964 
1965 	FILE_USE(fp);
1966 
1967 	vp = (struct vnode *)fp->f_data;
1968 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1969 		error = ESPIPE;
1970 		goto out;
1971 	}
1972 
1973 	offset = SCARG(uap, offset);
1974 
1975 	/*
1976 	 * XXX This works because no file systems actually
1977 	 * XXX take any action on the seek operation.
1978 	 */
1979 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1980 		goto out;
1981 
1982 	/* dofilereadv() will unuse the descriptor for us */
1983 	return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
1984 	    &offset, 0, retval));
1985 
1986  out:
1987 	FILE_UNUSE(fp, l);
1988 	return (error);
1989 }
1990 
1991 /*
1992  * Positional write system call.
1993  */
1994 int
1995 sys_pwrite(struct lwp *l, void *v, register_t *retval)
1996 {
1997 	struct sys_pwrite_args /* {
1998 		syscallarg(int) fd;
1999 		syscallarg(const void *) buf;
2000 		syscallarg(size_t) nbyte;
2001 		syscallarg(off_t) offset;
2002 	} */ *uap = v;
2003 	struct proc *p = l->l_proc;
2004 	struct filedesc *fdp = p->p_fd;
2005 	struct file *fp;
2006 	struct vnode *vp;
2007 	off_t offset;
2008 	int error, fd = SCARG(uap, fd);
2009 
2010 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2011 		return (EBADF);
2012 
2013 	if ((fp->f_flag & FWRITE) == 0) {
2014 		simple_unlock(&fp->f_slock);
2015 		return (EBADF);
2016 	}
2017 
2018 	FILE_USE(fp);
2019 
2020 	vp = (struct vnode *)fp->f_data;
2021 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2022 		error = ESPIPE;
2023 		goto out;
2024 	}
2025 
2026 	offset = SCARG(uap, offset);
2027 
2028 	/*
2029 	 * XXX This works because no file systems actually
2030 	 * XXX take any action on the seek operation.
2031 	 */
2032 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2033 		goto out;
2034 
2035 	/* dofilewrite() will unuse the descriptor for us */
2036 	return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2037 	    &offset, 0, retval));
2038 
2039  out:
2040 	FILE_UNUSE(fp, l);
2041 	return (error);
2042 }
2043 
2044 /*
2045  * Positional gather write system call.
2046  */
2047 int
2048 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2049 {
2050 	struct sys_pwritev_args /* {
2051 		syscallarg(int) fd;
2052 		syscallarg(const struct iovec *) iovp;
2053 		syscallarg(int) iovcnt;
2054 		syscallarg(off_t) offset;
2055 	} */ *uap = v;
2056 	struct proc *p = l->l_proc;
2057 	struct filedesc *fdp = p->p_fd;
2058 	struct file *fp;
2059 	struct vnode *vp;
2060 	off_t offset;
2061 	int error, fd = SCARG(uap, fd);
2062 
2063 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2064 		return (EBADF);
2065 
2066 	if ((fp->f_flag & FWRITE) == 0) {
2067 		simple_unlock(&fp->f_slock);
2068 		return (EBADF);
2069 	}
2070 
2071 	FILE_USE(fp);
2072 
2073 	vp = (struct vnode *)fp->f_data;
2074 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2075 		error = ESPIPE;
2076 		goto out;
2077 	}
2078 
2079 	offset = SCARG(uap, offset);
2080 
2081 	/*
2082 	 * XXX This works because no file systems actually
2083 	 * XXX take any action on the seek operation.
2084 	 */
2085 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2086 		goto out;
2087 
2088 	/* dofilewritev() will unuse the descriptor for us */
2089 	return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2090 	    &offset, 0, retval));
2091 
2092  out:
2093 	FILE_UNUSE(fp, l);
2094 	return (error);
2095 }
2096 
2097 /*
2098  * Check access permissions.
2099  */
2100 int
2101 sys_access(struct lwp *l, void *v, register_t *retval)
2102 {
2103 	struct sys_access_args /* {
2104 		syscallarg(const char *) path;
2105 		syscallarg(int) flags;
2106 	} */ *uap = v;
2107 	struct proc *p = l->l_proc;
2108 	struct ucred *cred;
2109 	struct vnode *vp;
2110 	int error, flags;
2111 	struct nameidata nd;
2112 
2113 	cred = crdup(p->p_ucred);
2114 	cred->cr_uid = p->p_cred->p_ruid;
2115 	cred->cr_gid = p->p_cred->p_rgid;
2116 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2117 	    SCARG(uap, path), l);
2118 	/* Override default credentials */
2119 	nd.ni_cnd.cn_cred = cred;
2120 	if ((error = namei(&nd)) != 0)
2121 		goto out;
2122 	vp = nd.ni_vp;
2123 
2124 	/* Flags == 0 means only check for existence. */
2125 	if (SCARG(uap, flags)) {
2126 		flags = 0;
2127 		if (SCARG(uap, flags) & R_OK)
2128 			flags |= VREAD;
2129 		if (SCARG(uap, flags) & W_OK)
2130 			flags |= VWRITE;
2131 		if (SCARG(uap, flags) & X_OK)
2132 			flags |= VEXEC;
2133 
2134 		error = VOP_ACCESS(vp, flags, cred, l);
2135 		if (!error && (flags & VWRITE))
2136 			error = vn_writechk(vp);
2137 	}
2138 	vput(vp);
2139 out:
2140 	crfree(cred);
2141 	return (error);
2142 }
2143 
2144 /*
2145  * Get file status; this version follows links.
2146  */
2147 /* ARGSUSED */
2148 int
2149 sys___stat30(struct lwp *l, void *v, register_t *retval)
2150 {
2151 	struct sys___stat30_args /* {
2152 		syscallarg(const char *) path;
2153 		syscallarg(struct stat *) ub;
2154 	} */ *uap = v;
2155 	struct stat sb;
2156 	int error;
2157 	struct nameidata nd;
2158 
2159 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2160 	    SCARG(uap, path), l);
2161 	if ((error = namei(&nd)) != 0)
2162 		return (error);
2163 	error = vn_stat(nd.ni_vp, &sb, l);
2164 	vput(nd.ni_vp);
2165 	if (error)
2166 		return (error);
2167 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2168 	return (error);
2169 }
2170 
2171 /*
2172  * Get file status; this version does not follow links.
2173  */
2174 /* ARGSUSED */
2175 int
2176 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2177 {
2178 	struct sys___lstat30_args /* {
2179 		syscallarg(const char *) path;
2180 		syscallarg(struct stat *) ub;
2181 	} */ *uap = v;
2182 	struct stat sb;
2183 	int error;
2184 	struct nameidata nd;
2185 
2186 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2187 	    SCARG(uap, path), l);
2188 	if ((error = namei(&nd)) != 0)
2189 		return (error);
2190 	error = vn_stat(nd.ni_vp, &sb, l);
2191 	vput(nd.ni_vp);
2192 	if (error)
2193 		return (error);
2194 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2195 	return (error);
2196 }
2197 
2198 /*
2199  * Get configurable pathname variables.
2200  */
2201 /* ARGSUSED */
2202 int
2203 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2204 {
2205 	struct sys_pathconf_args /* {
2206 		syscallarg(const char *) path;
2207 		syscallarg(int) name;
2208 	} */ *uap = v;
2209 	int error;
2210 	struct nameidata nd;
2211 
2212 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2213 	    SCARG(uap, path), l);
2214 	if ((error = namei(&nd)) != 0)
2215 		return (error);
2216 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2217 	vput(nd.ni_vp);
2218 	return (error);
2219 }
2220 
2221 /*
2222  * Return target name of a symbolic link.
2223  */
2224 /* ARGSUSED */
2225 int
2226 sys_readlink(struct lwp *l, void *v, register_t *retval)
2227 {
2228 	struct sys_readlink_args /* {
2229 		syscallarg(const char *) path;
2230 		syscallarg(char *) buf;
2231 		syscallarg(size_t) count;
2232 	} */ *uap = v;
2233 	struct proc *p = l->l_proc;
2234 	struct vnode *vp;
2235 	struct iovec aiov;
2236 	struct uio auio;
2237 	int error;
2238 	struct nameidata nd;
2239 
2240 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2241 	    SCARG(uap, path), l);
2242 	if ((error = namei(&nd)) != 0)
2243 		return (error);
2244 	vp = nd.ni_vp;
2245 	if (vp->v_type != VLNK)
2246 		error = EINVAL;
2247 	else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2248 	    (error = VOP_ACCESS(vp, VREAD, p->p_ucred, l)) == 0) {
2249 		aiov.iov_base = SCARG(uap, buf);
2250 		aiov.iov_len = SCARG(uap, count);
2251 		auio.uio_iov = &aiov;
2252 		auio.uio_iovcnt = 1;
2253 		auio.uio_offset = 0;
2254 		auio.uio_rw = UIO_READ;
2255 		auio.uio_segflg = UIO_USERSPACE;
2256 		auio.uio_lwp = l;
2257 		auio.uio_resid = SCARG(uap, count);
2258 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2259 	}
2260 	vput(vp);
2261 	*retval = SCARG(uap, count) - auio.uio_resid;
2262 	return (error);
2263 }
2264 
2265 /*
2266  * Change flags of a file given a path name.
2267  */
2268 /* ARGSUSED */
2269 int
2270 sys_chflags(struct lwp *l, void *v, register_t *retval)
2271 {
2272 	struct sys_chflags_args /* {
2273 		syscallarg(const char *) path;
2274 		syscallarg(u_long) flags;
2275 	} */ *uap = v;
2276 	struct vnode *vp;
2277 	int error;
2278 	struct nameidata nd;
2279 
2280 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2281 	if ((error = namei(&nd)) != 0)
2282 		return (error);
2283 	vp = nd.ni_vp;
2284 	error = change_flags(vp, SCARG(uap, flags), l);
2285 	vput(vp);
2286 	return (error);
2287 }
2288 
2289 /*
2290  * Change flags of a file given a file descriptor.
2291  */
2292 /* ARGSUSED */
2293 int
2294 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2295 {
2296 	struct sys_fchflags_args /* {
2297 		syscallarg(int) fd;
2298 		syscallarg(u_long) flags;
2299 	} */ *uap = v;
2300 	struct proc *p = l->l_proc;
2301 	struct vnode *vp;
2302 	struct file *fp;
2303 	int error;
2304 
2305 	/* getvnode() will use the descriptor for us */
2306 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2307 		return (error);
2308 	vp = (struct vnode *)fp->f_data;
2309 	error = change_flags(vp, SCARG(uap, flags), l);
2310 	VOP_UNLOCK(vp, 0);
2311 	FILE_UNUSE(fp, l);
2312 	return (error);
2313 }
2314 
2315 /*
2316  * Change flags of a file given a path name; this version does
2317  * not follow links.
2318  */
2319 int
2320 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2321 {
2322 	struct sys_lchflags_args /* {
2323 		syscallarg(const char *) path;
2324 		syscallarg(u_long) flags;
2325 	} */ *uap = v;
2326 	struct vnode *vp;
2327 	int error;
2328 	struct nameidata nd;
2329 
2330 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2331 	if ((error = namei(&nd)) != 0)
2332 		return (error);
2333 	vp = nd.ni_vp;
2334 	error = change_flags(vp, SCARG(uap, flags), l);
2335 	vput(vp);
2336 	return (error);
2337 }
2338 
2339 /*
2340  * Common routine to change flags of a file.
2341  */
2342 int
2343 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2344 {
2345 	struct proc *p = l->l_proc;
2346 	struct mount *mp;
2347 	struct vattr vattr;
2348 	int error;
2349 
2350 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2351 		return (error);
2352 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2353 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2354 	/*
2355 	 * Non-superusers cannot change the flags on devices, even if they
2356 	 * own them.
2357 	 */
2358 	if (suser(p->p_ucred, &p->p_acflag) != 0) {
2359 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, l)) != 0)
2360 			goto out;
2361 		if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2362 			error = EINVAL;
2363 			goto out;
2364 		}
2365 	}
2366 	VATTR_NULL(&vattr);
2367 	vattr.va_flags = flags;
2368 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, l);
2369 out:
2370 	vn_finished_write(mp, 0);
2371 	return (error);
2372 }
2373 
2374 /*
2375  * Change mode of a file given path name; this version follows links.
2376  */
2377 /* ARGSUSED */
2378 int
2379 sys_chmod(struct lwp *l, void *v, register_t *retval)
2380 {
2381 	struct sys_chmod_args /* {
2382 		syscallarg(const char *) path;
2383 		syscallarg(int) mode;
2384 	} */ *uap = v;
2385 	int error;
2386 	struct nameidata nd;
2387 
2388 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2389 	if ((error = namei(&nd)) != 0)
2390 		return (error);
2391 
2392 	error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2393 
2394 	vrele(nd.ni_vp);
2395 	return (error);
2396 }
2397 
2398 /*
2399  * Change mode of a file given a file descriptor.
2400  */
2401 /* ARGSUSED */
2402 int
2403 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2404 {
2405 	struct sys_fchmod_args /* {
2406 		syscallarg(int) fd;
2407 		syscallarg(int) mode;
2408 	} */ *uap = v;
2409 	struct proc *p = l->l_proc;
2410 	struct file *fp;
2411 	int error;
2412 
2413 	/* getvnode() will use the descriptor for us */
2414 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2415 		return (error);
2416 
2417 	error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2418 	FILE_UNUSE(fp, l);
2419 	return (error);
2420 }
2421 
2422 /*
2423  * Change mode of a file given path name; this version does not follow links.
2424  */
2425 /* ARGSUSED */
2426 int
2427 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2428 {
2429 	struct sys_lchmod_args /* {
2430 		syscallarg(const char *) path;
2431 		syscallarg(int) mode;
2432 	} */ *uap = v;
2433 	int error;
2434 	struct nameidata nd;
2435 
2436 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2437 	if ((error = namei(&nd)) != 0)
2438 		return (error);
2439 
2440 	error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2441 
2442 	vrele(nd.ni_vp);
2443 	return (error);
2444 }
2445 
2446 /*
2447  * Common routine to set mode given a vnode.
2448  */
2449 static int
2450 change_mode(struct vnode *vp, int mode, struct lwp *l)
2451 {
2452 	struct proc *p = l->l_proc;
2453 	struct mount *mp;
2454 	struct vattr vattr;
2455 	int error;
2456 
2457 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2458 		return (error);
2459 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2460 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2461 	VATTR_NULL(&vattr);
2462 	vattr.va_mode = mode & ALLPERMS;
2463 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, l);
2464 	VOP_UNLOCK(vp, 0);
2465 	vn_finished_write(mp, 0);
2466 	return (error);
2467 }
2468 
2469 /*
2470  * Set ownership given a path name; this version follows links.
2471  */
2472 /* ARGSUSED */
2473 int
2474 sys_chown(struct lwp *l, void *v, register_t *retval)
2475 {
2476 	struct sys_chown_args /* {
2477 		syscallarg(const char *) path;
2478 		syscallarg(uid_t) uid;
2479 		syscallarg(gid_t) gid;
2480 	} */ *uap = v;
2481 	int error;
2482 	struct nameidata nd;
2483 
2484 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2485 	if ((error = namei(&nd)) != 0)
2486 		return (error);
2487 
2488 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2489 
2490 	vrele(nd.ni_vp);
2491 	return (error);
2492 }
2493 
2494 /*
2495  * Set ownership given a path name; this version follows links.
2496  * Provides POSIX semantics.
2497  */
2498 /* ARGSUSED */
2499 int
2500 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2501 {
2502 	struct sys_chown_args /* {
2503 		syscallarg(const char *) path;
2504 		syscallarg(uid_t) uid;
2505 		syscallarg(gid_t) gid;
2506 	} */ *uap = v;
2507 	int error;
2508 	struct nameidata nd;
2509 
2510 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2511 	if ((error = namei(&nd)) != 0)
2512 		return (error);
2513 
2514 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2515 
2516 	vrele(nd.ni_vp);
2517 	return (error);
2518 }
2519 
2520 /*
2521  * Set ownership given a file descriptor.
2522  */
2523 /* ARGSUSED */
2524 int
2525 sys_fchown(struct lwp *l, void *v, register_t *retval)
2526 {
2527 	struct sys_fchown_args /* {
2528 		syscallarg(int) fd;
2529 		syscallarg(uid_t) uid;
2530 		syscallarg(gid_t) gid;
2531 	} */ *uap = v;
2532 	struct proc *p = l->l_proc;
2533 	int error;
2534 	struct file *fp;
2535 
2536 	/* getvnode() will use the descriptor for us */
2537 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2538 		return (error);
2539 
2540 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2541 	    SCARG(uap, gid), l, 0);
2542 	FILE_UNUSE(fp, l);
2543 	return (error);
2544 }
2545 
2546 /*
2547  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2548  */
2549 /* ARGSUSED */
2550 int
2551 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2552 {
2553 	struct sys_fchown_args /* {
2554 		syscallarg(int) fd;
2555 		syscallarg(uid_t) uid;
2556 		syscallarg(gid_t) gid;
2557 	} */ *uap = v;
2558 	struct proc *p = l->l_proc;
2559 	int error;
2560 	struct file *fp;
2561 
2562 	/* getvnode() will use the descriptor for us */
2563 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2564 		return (error);
2565 
2566 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2567 	    SCARG(uap, gid), l, 1);
2568 	FILE_UNUSE(fp, l);
2569 	return (error);
2570 }
2571 
2572 /*
2573  * Set ownership given a path name; this version does not follow links.
2574  */
2575 /* ARGSUSED */
2576 int
2577 sys_lchown(struct lwp *l, void *v, register_t *retval)
2578 {
2579 	struct sys_lchown_args /* {
2580 		syscallarg(const char *) path;
2581 		syscallarg(uid_t) uid;
2582 		syscallarg(gid_t) gid;
2583 	} */ *uap = v;
2584 	int error;
2585 	struct nameidata nd;
2586 
2587 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2588 	if ((error = namei(&nd)) != 0)
2589 		return (error);
2590 
2591 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2592 
2593 	vrele(nd.ni_vp);
2594 	return (error);
2595 }
2596 
2597 /*
2598  * Set ownership given a path name; this version does not follow links.
2599  * Provides POSIX/XPG semantics.
2600  */
2601 /* ARGSUSED */
2602 int
2603 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2604 {
2605 	struct sys_lchown_args /* {
2606 		syscallarg(const char *) path;
2607 		syscallarg(uid_t) uid;
2608 		syscallarg(gid_t) gid;
2609 	} */ *uap = v;
2610 	int error;
2611 	struct nameidata nd;
2612 
2613 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2614 	if ((error = namei(&nd)) != 0)
2615 		return (error);
2616 
2617 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2618 
2619 	vrele(nd.ni_vp);
2620 	return (error);
2621 }
2622 
2623 /*
2624  * Common routine to set ownership given a vnode.
2625  */
2626 static int
2627 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2628     int posix_semantics)
2629 {
2630 	struct proc *p = l->l_proc;
2631 	struct mount *mp;
2632 	struct vattr vattr;
2633 	mode_t newmode;
2634 	int error;
2635 
2636 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2637 		return (error);
2638 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2639 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2640 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, l)) != 0)
2641 		goto out;
2642 
2643 #define CHANGED(x) ((int)(x) != -1)
2644 	newmode = vattr.va_mode;
2645 	if (posix_semantics) {
2646 		/*
2647 		 * POSIX/XPG semantics: if the caller is not the super-user,
2648 		 * clear set-user-id and set-group-id bits.  Both POSIX and
2649 		 * the XPG consider the behaviour for calls by the super-user
2650 		 * implementation-defined; we leave the set-user-id and set-
2651 		 * group-id settings intact in that case.
2652 		 */
2653 		if (suser(p->p_ucred, NULL) != 0)
2654 			newmode &= ~(S_ISUID | S_ISGID);
2655 	} else {
2656 		/*
2657 		 * NetBSD semantics: when changing owner and/or group,
2658 		 * clear the respective bit(s).
2659 		 */
2660 		if (CHANGED(uid))
2661 			newmode &= ~S_ISUID;
2662 		if (CHANGED(gid))
2663 			newmode &= ~S_ISGID;
2664 	}
2665 	/* Update va_mode iff altered. */
2666 	if (vattr.va_mode == newmode)
2667 		newmode = VNOVAL;
2668 
2669 	VATTR_NULL(&vattr);
2670 	vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2671 	vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2672 	vattr.va_mode = newmode;
2673 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, l);
2674 #undef CHANGED
2675 
2676 out:
2677 	VOP_UNLOCK(vp, 0);
2678 	vn_finished_write(mp, 0);
2679 	return (error);
2680 }
2681 
2682 /*
2683  * Set the access and modification times given a path name; this
2684  * version follows links.
2685  */
2686 /* ARGSUSED */
2687 int
2688 sys_utimes(struct lwp *l, void *v, register_t *retval)
2689 {
2690 	struct sys_utimes_args /* {
2691 		syscallarg(const char *) path;
2692 		syscallarg(const struct timeval *) tptr;
2693 	} */ *uap = v;
2694 	int error;
2695 	struct nameidata nd;
2696 
2697 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2698 	if ((error = namei(&nd)) != 0)
2699 		return (error);
2700 
2701 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2702 
2703 	vrele(nd.ni_vp);
2704 	return (error);
2705 }
2706 
2707 /*
2708  * Set the access and modification times given a file descriptor.
2709  */
2710 /* ARGSUSED */
2711 int
2712 sys_futimes(struct lwp *l, void *v, register_t *retval)
2713 {
2714 	struct sys_futimes_args /* {
2715 		syscallarg(int) fd;
2716 		syscallarg(const struct timeval *) tptr;
2717 	} */ *uap = v;
2718 	struct proc *p = l->l_proc;
2719 	int error;
2720 	struct file *fp;
2721 
2722 	/* getvnode() will use the descriptor for us */
2723 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2724 		return (error);
2725 
2726 	error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
2727 	FILE_UNUSE(fp, l);
2728 	return (error);
2729 }
2730 
2731 /*
2732  * Set the access and modification times given a path name; this
2733  * version does not follow links.
2734  */
2735 /* ARGSUSED */
2736 int
2737 sys_lutimes(struct lwp *l, void *v, register_t *retval)
2738 {
2739 	struct sys_lutimes_args /* {
2740 		syscallarg(const char *) path;
2741 		syscallarg(const struct timeval *) tptr;
2742 	} */ *uap = v;
2743 	int error;
2744 	struct nameidata nd;
2745 
2746 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2747 	if ((error = namei(&nd)) != 0)
2748 		return (error);
2749 
2750 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2751 
2752 	vrele(nd.ni_vp);
2753 	return (error);
2754 }
2755 
2756 /*
2757  * Common routine to set access and modification times given a vnode.
2758  */
2759 static int
2760 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
2761 {
2762 	struct proc *p = l->l_proc;
2763 	struct mount *mp;
2764 	struct vattr vattr;
2765 	int error;
2766 
2767 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2768 		return (error);
2769 	VATTR_NULL(&vattr);
2770 	if (tptr == NULL) {
2771 		nanotime(&vattr.va_atime);
2772 		vattr.va_mtime = vattr.va_atime;
2773 		vattr.va_vaflags |= VA_UTIMES_NULL;
2774 	} else {
2775 		struct timeval tv[2];
2776 
2777 		error = copyin(tptr, tv, sizeof(tv));
2778 		if (error)
2779 			goto out;
2780 		TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
2781 		TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
2782 	}
2783 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2784 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2785 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, l);
2786 	VOP_UNLOCK(vp, 0);
2787 out:
2788 	vn_finished_write(mp, 0);
2789 	return (error);
2790 }
2791 
2792 /*
2793  * Truncate a file given its path name.
2794  */
2795 /* ARGSUSED */
2796 int
2797 sys_truncate(struct lwp *l, void *v, register_t *retval)
2798 {
2799 	struct sys_truncate_args /* {
2800 		syscallarg(const char *) path;
2801 		syscallarg(int) pad;
2802 		syscallarg(off_t) length;
2803 	} */ *uap = v;
2804 	struct proc *p = l->l_proc;
2805 	struct vnode *vp;
2806 	struct mount *mp;
2807 	struct vattr vattr;
2808 	int error;
2809 	struct nameidata nd;
2810 
2811 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2812 	if ((error = namei(&nd)) != 0)
2813 		return (error);
2814 	vp = nd.ni_vp;
2815 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
2816 		vrele(vp);
2817 		return (error);
2818 	}
2819 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2820 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2821 	if (vp->v_type == VDIR)
2822 		error = EISDIR;
2823 	else if ((error = vn_writechk(vp)) == 0 &&
2824 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, l)) == 0) {
2825 		VATTR_NULL(&vattr);
2826 		vattr.va_size = SCARG(uap, length);
2827 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, l);
2828 	}
2829 	vput(vp);
2830 	vn_finished_write(mp, 0);
2831 	return (error);
2832 }
2833 
2834 /*
2835  * Truncate a file given a file descriptor.
2836  */
2837 /* ARGSUSED */
2838 int
2839 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
2840 {
2841 	struct sys_ftruncate_args /* {
2842 		syscallarg(int) fd;
2843 		syscallarg(int) pad;
2844 		syscallarg(off_t) length;
2845 	} */ *uap = v;
2846 	struct proc *p = l->l_proc;
2847 	struct mount *mp;
2848 	struct vattr vattr;
2849 	struct vnode *vp;
2850 	struct file *fp;
2851 	int error;
2852 
2853 	/* getvnode() will use the descriptor for us */
2854 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2855 		return (error);
2856 	if ((fp->f_flag & FWRITE) == 0) {
2857 		error = EINVAL;
2858 		goto out;
2859 	}
2860 	vp = (struct vnode *)fp->f_data;
2861 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
2862 		FILE_UNUSE(fp, l);
2863 		return (error);
2864 	}
2865 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
2866 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2867 	if (vp->v_type == VDIR)
2868 		error = EISDIR;
2869 	else if ((error = vn_writechk(vp)) == 0) {
2870 		VATTR_NULL(&vattr);
2871 		vattr.va_size = SCARG(uap, length);
2872 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
2873 	}
2874 	VOP_UNLOCK(vp, 0);
2875 	vn_finished_write(mp, 0);
2876  out:
2877 	FILE_UNUSE(fp, l);
2878 	return (error);
2879 }
2880 
2881 /*
2882  * Sync an open file.
2883  */
2884 /* ARGSUSED */
2885 int
2886 sys_fsync(struct lwp *l, void *v, register_t *retval)
2887 {
2888 	struct sys_fsync_args /* {
2889 		syscallarg(int) fd;
2890 	} */ *uap = v;
2891 	struct proc *p = l->l_proc;
2892 	struct vnode *vp;
2893 	struct mount *mp;
2894 	struct file *fp;
2895 	int error;
2896 
2897 	/* getvnode() will use the descriptor for us */
2898 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2899 		return (error);
2900 	vp = (struct vnode *)fp->f_data;
2901 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
2902 		FILE_UNUSE(fp, l);
2903 		return (error);
2904 	}
2905 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2906 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
2907 	if (error == 0 && bioops.io_fsync != NULL &&
2908 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2909 		(*bioops.io_fsync)(vp, 0);
2910 	VOP_UNLOCK(vp, 0);
2911 	vn_finished_write(mp, 0);
2912 	FILE_UNUSE(fp, l);
2913 	return (error);
2914 }
2915 
2916 /*
2917  * Sync a range of file data.  API modeled after that found in AIX.
2918  *
2919  * FDATASYNC indicates that we need only save enough metadata to be able
2920  * to re-read the written data.  Note we duplicate AIX's requirement that
2921  * the file be open for writing.
2922  */
2923 /* ARGSUSED */
2924 int
2925 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
2926 {
2927 	struct sys_fsync_range_args /* {
2928 		syscallarg(int) fd;
2929 		syscallarg(int) flags;
2930 		syscallarg(off_t) start;
2931 		syscallarg(off_t) length;
2932 	} */ *uap = v;
2933 	struct proc *p = l->l_proc;
2934 	struct vnode *vp;
2935 	struct file *fp;
2936 	int flags, nflags;
2937 	off_t s, e, len;
2938 	int error;
2939 
2940 	/* getvnode() will use the descriptor for us */
2941 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2942 		return (error);
2943 
2944 	if ((fp->f_flag & FWRITE) == 0) {
2945 		FILE_UNUSE(fp, l);
2946 		return (EBADF);
2947 	}
2948 
2949 	flags = SCARG(uap, flags);
2950 	if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
2951 	    ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
2952 		return (EINVAL);
2953 	}
2954 	/* Now set up the flags for value(s) to pass to VOP_FSYNC() */
2955 	if (flags & FDATASYNC)
2956 		nflags = FSYNC_DATAONLY | FSYNC_WAIT;
2957 	else
2958 		nflags = FSYNC_WAIT;
2959 	if (flags & FDISKSYNC)
2960 		nflags |= FSYNC_CACHE;
2961 
2962 	len = SCARG(uap, length);
2963 	/* If length == 0, we do the whole file, and s = l = 0 will do that */
2964 	if (len) {
2965 		s = SCARG(uap, start);
2966 		e = s + len;
2967 		if (e < s) {
2968 			FILE_UNUSE(fp, l);
2969 			return (EINVAL);
2970 		}
2971 	} else {
2972 		e = 0;
2973 		s = 0;
2974 	}
2975 
2976 	vp = (struct vnode *)fp->f_data;
2977 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2978 	error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
2979 
2980 	if (error == 0 && bioops.io_fsync != NULL &&
2981 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2982 		(*bioops.io_fsync)(vp, nflags);
2983 
2984 	VOP_UNLOCK(vp, 0);
2985 	FILE_UNUSE(fp, l);
2986 	return (error);
2987 }
2988 
2989 /*
2990  * Sync the data of an open file.
2991  */
2992 /* ARGSUSED */
2993 int
2994 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
2995 {
2996 	struct sys_fdatasync_args /* {
2997 		syscallarg(int) fd;
2998 	} */ *uap = v;
2999 	struct proc *p = l->l_proc;
3000 	struct vnode *vp;
3001 	struct file *fp;
3002 	int error;
3003 
3004 	/* getvnode() will use the descriptor for us */
3005 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3006 		return (error);
3007 	if ((fp->f_flag & FWRITE) == 0) {
3008 		FILE_UNUSE(fp, l);
3009 		return (EBADF);
3010 	}
3011 	vp = (struct vnode *)fp->f_data;
3012 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3013 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3014 	VOP_UNLOCK(vp, 0);
3015 	FILE_UNUSE(fp, l);
3016 	return (error);
3017 }
3018 
3019 /*
3020  * Rename files, (standard) BSD semantics frontend.
3021  */
3022 /* ARGSUSED */
3023 int
3024 sys_rename(struct lwp *l, void *v, register_t *retval)
3025 {
3026 	struct sys_rename_args /* {
3027 		syscallarg(const char *) from;
3028 		syscallarg(const char *) to;
3029 	} */ *uap = v;
3030 
3031 	return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3032 }
3033 
3034 /*
3035  * Rename files, POSIX semantics frontend.
3036  */
3037 /* ARGSUSED */
3038 int
3039 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3040 {
3041 	struct sys___posix_rename_args /* {
3042 		syscallarg(const char *) from;
3043 		syscallarg(const char *) to;
3044 	} */ *uap = v;
3045 
3046 	return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3047 }
3048 
3049 /*
3050  * Rename files.  Source and destination must either both be directories,
3051  * or both not be directories.  If target is a directory, it must be empty.
3052  * If `from' and `to' refer to the same object, the value of the `retain'
3053  * argument is used to determine whether `from' will be
3054  *
3055  * (retain == 0)	deleted unless `from' and `to' refer to the same
3056  *			object in the file system's name space (BSD).
3057  * (retain == 1)	always retained (POSIX).
3058  */
3059 static int
3060 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3061 {
3062 	struct mount *mp = NULL;
3063 	struct vnode *tvp, *fvp, *tdvp;
3064 	struct nameidata fromnd, tond;
3065 	struct proc *p;
3066 	int error;
3067 
3068 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3069 	    from, l);
3070 	if ((error = namei(&fromnd)) != 0)
3071 		return (error);
3072 	fvp = fromnd.ni_vp;
3073 	error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3074 	if (error != 0) {
3075 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3076 		vrele(fromnd.ni_dvp);
3077 		vrele(fvp);
3078 		if (fromnd.ni_startdir)
3079 			vrele(fromnd.ni_startdir);
3080 		PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3081 		return (error);
3082 	}
3083 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3084 	    (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3085 	if ((error = namei(&tond)) != 0) {
3086 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3087 		vrele(fromnd.ni_dvp);
3088 		vrele(fvp);
3089 		goto out1;
3090 	}
3091 	tdvp = tond.ni_dvp;
3092 	tvp = tond.ni_vp;
3093 
3094 	if (tvp != NULL) {
3095 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3096 			error = ENOTDIR;
3097 			goto out;
3098 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3099 			error = EISDIR;
3100 			goto out;
3101 		}
3102 	}
3103 
3104 	if (fvp == tdvp)
3105 		error = EINVAL;
3106 
3107 	/*
3108 	 * Source and destination refer to the same object.
3109 	 */
3110 	if (fvp == tvp) {
3111 		if (retain)
3112 			error = -1;
3113 		else if (fromnd.ni_dvp == tdvp &&
3114 		    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3115 		    !memcmp(fromnd.ni_cnd.cn_nameptr,
3116 		          tond.ni_cnd.cn_nameptr,
3117 		          fromnd.ni_cnd.cn_namelen))
3118 		error = -1;
3119 	}
3120 
3121 #ifdef VERIFIED_EXEC
3122 	if (!error)
3123 		error = veriexec_renamechk(fvp, fromnd.ni_dirp, tond.ni_dirp, l);
3124 #endif /* VERIFIED_EXEC */
3125 
3126 out:
3127 	p = l->l_proc;
3128 	if (!error) {
3129 		VOP_LEASE(tdvp, l, p->p_ucred, LEASE_WRITE);
3130 		if (fromnd.ni_dvp != tdvp)
3131 			VOP_LEASE(fromnd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
3132 		if (tvp) {
3133 			VOP_LEASE(tvp, l, p->p_ucred, LEASE_WRITE);
3134 		}
3135 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3136 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3137 	} else {
3138 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3139 		if (tdvp == tvp)
3140 			vrele(tdvp);
3141 		else
3142 			vput(tdvp);
3143 		if (tvp)
3144 			vput(tvp);
3145 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3146 		vrele(fromnd.ni_dvp);
3147 		vrele(fvp);
3148 	}
3149 	vrele(tond.ni_startdir);
3150 	PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3151 out1:
3152 	vn_finished_write(mp, 0);
3153 	if (fromnd.ni_startdir)
3154 		vrele(fromnd.ni_startdir);
3155 	PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3156 	return (error == -1 ? 0 : error);
3157 }
3158 
3159 /*
3160  * Make a directory file.
3161  */
3162 /* ARGSUSED */
3163 int
3164 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3165 {
3166 	struct sys_mkdir_args /* {
3167 		syscallarg(const char *) path;
3168 		syscallarg(int) mode;
3169 	} */ *uap = v;
3170 	struct proc *p = l->l_proc;
3171 	struct mount *mp;
3172 	struct vnode *vp;
3173 	struct vattr vattr;
3174 	int error;
3175 	struct nameidata nd;
3176 
3177 restart:
3178 	NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3179 	    SCARG(uap, path), l);
3180 	if ((error = namei(&nd)) != 0)
3181 		return (error);
3182 	vp = nd.ni_vp;
3183 	if (vp != NULL) {
3184 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3185 		if (nd.ni_dvp == vp)
3186 			vrele(nd.ni_dvp);
3187 		else
3188 			vput(nd.ni_dvp);
3189 		vrele(vp);
3190 		return (EEXIST);
3191 	}
3192 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3193 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3194 		if (nd.ni_dvp == vp)
3195 			vrele(nd.ni_dvp);
3196 		else
3197 			vput(nd.ni_dvp);
3198 		if ((error = vn_start_write(NULL, &mp,
3199 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3200 			return (error);
3201 		goto restart;
3202 	}
3203 	VATTR_NULL(&vattr);
3204 	vattr.va_type = VDIR;
3205 	vattr.va_mode =
3206 	    (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3207 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
3208 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3209 	if (!error)
3210 		vput(nd.ni_vp);
3211 	vn_finished_write(mp, 0);
3212 	return (error);
3213 }
3214 
3215 /*
3216  * Remove a directory file.
3217  */
3218 /* ARGSUSED */
3219 int
3220 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3221 {
3222 	struct sys_rmdir_args /* {
3223 		syscallarg(const char *) path;
3224 	} */ *uap = v;
3225 	struct proc *p = l->l_proc;
3226 	struct mount *mp;
3227 	struct vnode *vp;
3228 	int error;
3229 	struct nameidata nd;
3230 
3231 restart:
3232 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3233 	    SCARG(uap, path), l);
3234 	if ((error = namei(&nd)) != 0)
3235 		return (error);
3236 	vp = nd.ni_vp;
3237 	if (vp->v_type != VDIR) {
3238 		error = ENOTDIR;
3239 		goto out;
3240 	}
3241 	/*
3242 	 * No rmdir "." please.
3243 	 */
3244 	if (nd.ni_dvp == vp) {
3245 		error = EINVAL;
3246 		goto out;
3247 	}
3248 	/*
3249 	 * The root of a mounted filesystem cannot be deleted.
3250 	 */
3251 	if (vp->v_flag & VROOT) {
3252 		error = EBUSY;
3253 		goto out;
3254 	}
3255 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3256 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3257 		if (nd.ni_dvp == vp)
3258 			vrele(nd.ni_dvp);
3259 		else
3260 			vput(nd.ni_dvp);
3261 		vput(vp);
3262 		if ((error = vn_start_write(NULL, &mp,
3263 		    V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3264 			return (error);
3265 		goto restart;
3266 	}
3267 	VOP_LEASE(nd.ni_dvp, l, p->p_ucred, LEASE_WRITE);
3268 	VOP_LEASE(vp, l, p->p_ucred, LEASE_WRITE);
3269 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3270 	vn_finished_write(mp, 0);
3271 	return (error);
3272 
3273 out:
3274 	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3275 	if (nd.ni_dvp == vp)
3276 		vrele(nd.ni_dvp);
3277 	else
3278 		vput(nd.ni_dvp);
3279 	vput(vp);
3280 	return (error);
3281 }
3282 
3283 /*
3284  * Read a block of directory entries in a file system independent format.
3285  */
3286 int
3287 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3288 {
3289 	struct sys___getdents30_args /* {
3290 		syscallarg(int) fd;
3291 		syscallarg(char *) buf;
3292 		syscallarg(size_t) count;
3293 	} */ *uap = v;
3294 	struct proc *p = l->l_proc;
3295 	struct file *fp;
3296 	int error, done;
3297 
3298 	/* getvnode() will use the descriptor for us */
3299 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3300 		return (error);
3301 	if ((fp->f_flag & FREAD) == 0) {
3302 		error = EBADF;
3303 		goto out;
3304 	}
3305 	error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3306 			SCARG(uap, count), &done, l, 0, 0);
3307 #ifdef KTRACE
3308 	if (!error && KTRPOINT(p, KTR_GENIO)) {
3309 		struct iovec iov;
3310 		iov.iov_base = SCARG(uap, buf);
3311 		iov.iov_len = done;
3312 		ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3313 	}
3314 #endif
3315 	*retval = done;
3316  out:
3317 	FILE_UNUSE(fp, l);
3318 	return (error);
3319 }
3320 
3321 /*
3322  * Set the mode mask for creation of filesystem nodes.
3323  */
3324 int
3325 sys_umask(struct lwp *l, void *v, register_t *retval)
3326 {
3327 	struct sys_umask_args /* {
3328 		syscallarg(mode_t) newmask;
3329 	} */ *uap = v;
3330 	struct proc *p = l->l_proc;
3331 	struct cwdinfo *cwdi;
3332 
3333 	cwdi = p->p_cwdi;
3334 	*retval = cwdi->cwdi_cmask;
3335 	cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3336 	return (0);
3337 }
3338 
3339 /*
3340  * Void all references to file by ripping underlying filesystem
3341  * away from vnode.
3342  */
3343 /* ARGSUSED */
3344 int
3345 sys_revoke(struct lwp *l, void *v, register_t *retval)
3346 {
3347 	struct sys_revoke_args /* {
3348 		syscallarg(const char *) path;
3349 	} */ *uap = v;
3350 	struct proc *p = l->l_proc;
3351 	struct mount *mp;
3352 	struct vnode *vp;
3353 	struct vattr vattr;
3354 	int error;
3355 	struct nameidata nd;
3356 
3357 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3358 	if ((error = namei(&nd)) != 0)
3359 		return (error);
3360 	vp = nd.ni_vp;
3361 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, l)) != 0)
3362 		goto out;
3363 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3364 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0)
3365 		goto out;
3366 	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3367 		goto out;
3368 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3369 		VOP_REVOKE(vp, REVOKEALL);
3370 	vn_finished_write(mp, 0);
3371 out:
3372 	vrele(vp);
3373 	return (error);
3374 }
3375 
3376 /*
3377  * Convert a user file descriptor to a kernel file entry.
3378  */
3379 int
3380 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3381 {
3382 	struct vnode *vp;
3383 	struct file *fp;
3384 
3385 	if ((fp = fd_getfile(fdp, fd)) == NULL)
3386 		return (EBADF);
3387 
3388 	FILE_USE(fp);
3389 
3390 	if (fp->f_type != DTYPE_VNODE) {
3391 		FILE_UNUSE(fp, NULL);
3392 		return (EINVAL);
3393 	}
3394 
3395 	vp = (struct vnode *)fp->f_data;
3396 	if (vp->v_type == VBAD) {
3397 		FILE_UNUSE(fp, NULL);
3398 		return (EBADF);
3399 	}
3400 
3401 	*fpp = fp;
3402 	return (0);
3403 }
3404