xref: /netbsd-src/sys/kern/vfs_syscalls.c (revision 1ffa7b76c40339c17a0fb2a09fac93f287cfc046)
1 /*	$NetBSD: vfs_syscalls.c,v 1.186 2003/04/20 07:06:33 yamt Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)vfs_syscalls.c	8.42 (Berkeley) 7/31/95
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.186 2003/04/20 07:06:33 yamt Exp $");
45 
46 #include "opt_compat_netbsd.h"
47 #include "opt_compat_43.h"
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/dirent.h>
62 #include <sys/sysctl.h>
63 #include <sys/sa.h>
64 #include <sys/syscallargs.h>
65 
66 #include <miscfs/genfs/genfs.h>
67 #include <miscfs/syncfs/syncfs.h>
68 
69 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
70 
71 static int change_dir __P((struct nameidata *, struct proc *));
72 static int change_flags __P((struct vnode *, u_long, struct proc *));
73 static int change_mode __P((struct vnode *, int, struct proc *p));
74 static int change_owner __P((struct vnode *, uid_t, gid_t, struct proc *,
75     int));
76 static int change_utimes __P((struct vnode *vp, const struct timeval *,
77 	       struct proc *p));
78 static int rename_files __P((const char *, const char *, struct proc *, int));
79 static int dostatfs __P((struct mount *, struct statfs *, struct proc *, int,
80     int));
81 
82 void checkdirs __P((struct vnode *));
83 
84 int dovfsusermount = 0;
85 
86 /*
87  * Virtual File System System Calls
88  */
89 
90 /*
91  * Mount a file system.
92  */
93 
94 #if defined(COMPAT_09) || defined(COMPAT_43)
95 /*
96  * This table is used to maintain compatibility with 4.3BSD
97  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
98  *
99  * Do not modify this table. It should only contain filesystems
100  * supported by NetBSD 0.9 and 4.3BSD.
101  */
102 const char * const mountcompatnames[] = {
103 	NULL,		/* 0 = MOUNT_NONE */
104 	MOUNT_FFS,	/* 1 = MOUNT_UFS */
105 	MOUNT_NFS,	/* 2 */
106 	MOUNT_MFS,	/* 3 */
107 	MOUNT_MSDOS,	/* 4 */
108 	MOUNT_CD9660,	/* 5 = MOUNT_ISOFS */
109 	MOUNT_FDESC,	/* 6 */
110 	MOUNT_KERNFS,	/* 7 */
111 	NULL,		/* 8 = MOUNT_DEVFS */
112 	MOUNT_AFS,	/* 9 */
113 };
114 const int nmountcompatnames = sizeof(mountcompatnames) /
115     sizeof(mountcompatnames[0]);
116 #endif /* COMPAT_09 || COMPAT_43 */
117 
118 /* ARGSUSED */
119 int
120 sys_mount(l, v, retval)
121 	struct lwp *l;
122 	void *v;
123 	register_t *retval;
124 {
125 	struct sys_mount_args /* {
126 		syscallarg(const char *) type;
127 		syscallarg(const char *) path;
128 		syscallarg(int) flags;
129 		syscallarg(void *) data;
130 	} */ *uap = v;
131 	struct proc *p = l->l_proc;
132 	struct vnode *vp;
133 	struct mount *mp;
134 	int error, flag = 0;
135 	char fstypename[MFSNAMELEN];
136 	struct vattr va;
137 	struct nameidata nd;
138 	struct vfsops *vfs;
139 
140 	if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
141 	    (error = suser(p->p_ucred, &p->p_acflag)))
142 		return (error);
143 	/*
144 	 * Get vnode to be covered
145 	 */
146 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
147 	    SCARG(uap, path), p);
148 	if ((error = namei(&nd)) != 0)
149 		return (error);
150 	vp = nd.ni_vp;
151 	/*
152 	 * A lookup in VFS_MOUNT might result in an attempt to
153 	 * lock this vnode again, so make the lock recursive.
154 	 */
155 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
156 	if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
157 		if ((vp->v_flag & VROOT) == 0) {
158 			vput(vp);
159 			return (EINVAL);
160 		}
161 		mp = vp->v_mount;
162 		flag = mp->mnt_flag;
163 		vfs = mp->mnt_op;
164 		/*
165 		 * We only allow the filesystem to be reloaded if it
166 		 * is currently mounted read-only.
167 		 */
168 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
169 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
170 			vput(vp);
171 			return (EOPNOTSUPP);	/* Needs translation */
172 		}
173 		/*
174 		 * In "highly secure" mode, don't let the caller do anything
175 		 * but downgrade a filesystem from read-write to read-only.
176 		 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
177 		 */
178 		if (securelevel >= 2 &&
179 		    SCARG(uap, flags) != MNT_GETARGS &&
180 		    SCARG(uap, flags) !=
181 		    (mp->mnt_flag | MNT_RDONLY |
182 		    MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
183 			vput(vp);
184 			return (EPERM);
185 		}
186 		mp->mnt_flag |= SCARG(uap, flags) &
187 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
188 		/*
189 		 * Only root, or the user that did the original mount is
190 		 * permitted to update it.
191 		 */
192 		if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
193 		    mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
194 		    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
195 			vput(vp);
196 			return (error);
197 		}
198 		/*
199 		 * Do not allow NFS export by non-root users. For non-root
200 		 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
201 		 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
202 		 */
203 		if (p->p_ucred->cr_uid != 0) {
204 			if (SCARG(uap, flags) & MNT_EXPORTED) {
205 				vput(vp);
206 				return (EPERM);
207 			}
208 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
209 			if (flag & MNT_NOEXEC)
210 				SCARG(uap, flags) |= MNT_NOEXEC;
211 		}
212 		if (vfs_busy(mp, LK_NOWAIT, 0)) {
213 			vput(vp);
214 			return (EPERM);
215 		}
216 		goto update;
217 	} else {
218 		if (securelevel >= 2) {
219 			vput(vp);
220 			return (EPERM);
221 		}
222 	}
223 	/*
224 	 * If the user is not root, ensure that they own the directory
225 	 * onto which we are attempting to mount.
226 	 */
227 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0 ||
228 	    (va.va_uid != p->p_ucred->cr_uid &&
229 		(error = suser(p->p_ucred, &p->p_acflag)) != 0)) {
230 		vput(vp);
231 		return (error);
232 	}
233 	/*
234 	 * Do not allow NFS export by non-root users. For non-root users,
235 	 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
236 	 * mount point is already MNT_NOEXEC.
237 	 */
238 	if (p->p_ucred->cr_uid != 0) {
239 		if (SCARG(uap, flags) & MNT_EXPORTED) {
240 			vput(vp);
241 			return (EPERM);
242 		}
243 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
244 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
245 			SCARG(uap, flags) |= MNT_NOEXEC;
246 	}
247 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
248 		return (error);
249 	if (vp->v_type != VDIR) {
250 		vput(vp);
251 		return (ENOTDIR);
252 	}
253 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
254 	if (error) {
255 #if defined(COMPAT_09) || defined(COMPAT_43)
256 		/*
257 		 * Historically filesystem types were identified by number.
258 		 * If we get an integer for the filesystem type instead of a
259 		 * string, we check to see if it matches one of the historic
260 		 * filesystem types.
261 		 */
262 		u_long fsindex = (u_long)SCARG(uap, type);
263 		if (fsindex >= nmountcompatnames ||
264 		    mountcompatnames[fsindex] == NULL) {
265 			vput(vp);
266 			return (ENODEV);
267 		}
268 		strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
269 #else
270 		vput(vp);
271 		return (error);
272 #endif
273 	}
274 #ifdef	COMPAT_10
275 	/* Accept `ufs' as an alias for `ffs'. */
276 	if (!strncmp(fstypename, "ufs", MFSNAMELEN))
277 		strncpy(fstypename, "ffs", MFSNAMELEN);
278 #endif
279 	if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
280 		vput(vp);
281 		return (ENODEV);
282 	}
283 	if (vp->v_mountedhere != NULL) {
284 		vput(vp);
285 		return (EBUSY);
286 	}
287 
288 	/*
289 	 * Allocate and initialize the file system.
290 	 */
291 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
292 		M_MOUNT, M_WAITOK);
293 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
294 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
295 	(void)vfs_busy(mp, LK_NOWAIT, 0);
296 	mp->mnt_op = vfs;
297 	vfs->vfs_refcount++;
298 	mp->mnt_vnodecovered = vp;
299 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
300 	mp->mnt_unmounter = NULL;
301 update:
302 	/*
303 	 * Set the mount level flags.
304 	 */
305 	if (SCARG(uap, flags) & MNT_RDONLY)
306 		mp->mnt_flag |= MNT_RDONLY;
307 	else if (mp->mnt_flag & MNT_RDONLY)
308 		mp->mnt_flag |= MNT_WANTRDWR;
309 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
310 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
311 	    MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
312 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
313 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
314 	    MNT_NOCOREDUMP | MNT_IGNORE | MNT_NOATIME | MNT_NODEVMTIME |
315 	    MNT_SYMPERM | MNT_SOFTDEP);
316 	/*
317 	 * Mount the filesystem.
318 	 */
319 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
320 	if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
321 		if (mp->mnt_flag & MNT_WANTRDWR)
322 			mp->mnt_flag &= ~MNT_RDONLY;
323 		if (error || (mp->mnt_flag & MNT_GETARGS))
324 			mp->mnt_flag = flag;
325 		mp->mnt_flag &=~
326 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR |
327 		     MNT_GETARGS);
328 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
329 			if (mp->mnt_syncer == NULL)
330 				error = vfs_allocate_syncvnode(mp);
331 		} else {
332 			if (mp->mnt_syncer != NULL)
333 				vfs_deallocate_syncvnode(mp);
334 		}
335 		vfs_unbusy(mp);
336 		VOP_UNLOCK(vp, 0);
337 		vrele(vp);
338 		return (error);
339 	}
340 	/*
341 	 * Put the new filesystem on the mount list after root.
342 	 */
343 	cache_purge(vp);
344 	if (!error) {
345 		vp->v_mountedhere = mp;
346 		simple_lock(&mountlist_slock);
347 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
348 		simple_unlock(&mountlist_slock);
349 		checkdirs(vp);
350 		VOP_UNLOCK(vp, 0);
351 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
352 			error = vfs_allocate_syncvnode(mp);
353 		vfs_unbusy(mp);
354 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
355 		if ((error = VFS_START(mp, 0, p)))
356 			vrele(vp);
357 	} else {
358 		vp->v_mountedhere = (struct mount *)0;
359 		vfs->vfs_refcount--;
360 		vfs_unbusy(mp);
361 		free(mp, M_MOUNT);
362 		vput(vp);
363 	}
364 	return (error);
365 }
366 
367 /*
368  * Scan all active processes to see if any of them have a current
369  * or root directory onto which the new filesystem has just been
370  * mounted. If so, replace them with the new mount point.
371  */
372 void
373 checkdirs(olddp)
374 	struct vnode *olddp;
375 {
376 	struct cwdinfo *cwdi;
377 	struct vnode *newdp;
378 	struct proc *p;
379 
380 	if (olddp->v_usecount == 1)
381 		return;
382 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
383 		panic("mount: lost mount");
384 	proclist_lock_read();
385 	LIST_FOREACH(p, &allproc, p_list) {
386 		cwdi = p->p_cwdi;
387 		if (cwdi->cwdi_cdir == olddp) {
388 			vrele(cwdi->cwdi_cdir);
389 			VREF(newdp);
390 			cwdi->cwdi_cdir = newdp;
391 		}
392 		if (cwdi->cwdi_rdir == olddp) {
393 			vrele(cwdi->cwdi_rdir);
394 			VREF(newdp);
395 			cwdi->cwdi_rdir = newdp;
396 		}
397 	}
398 	proclist_unlock_read();
399 	if (rootvnode == olddp) {
400 		vrele(rootvnode);
401 		VREF(newdp);
402 		rootvnode = newdp;
403 	}
404 	vput(newdp);
405 }
406 
407 /*
408  * Unmount a file system.
409  *
410  * Note: unmount takes a path to the vnode mounted on as argument,
411  * not special file (as before).
412  */
413 /* ARGSUSED */
414 int
415 sys_unmount(l, v, retval)
416 	struct lwp *l;
417 	void *v;
418 	register_t *retval;
419 {
420 	struct sys_unmount_args /* {
421 		syscallarg(const char *) path;
422 		syscallarg(int) flags;
423 	} */ *uap = v;
424 	struct proc *p = l->l_proc;
425 	struct vnode *vp;
426 	struct mount *mp;
427 	int error;
428 	struct nameidata nd;
429 
430 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
431 	    SCARG(uap, path), p);
432 	if ((error = namei(&nd)) != 0)
433 		return (error);
434 	vp = nd.ni_vp;
435 	mp = vp->v_mount;
436 
437 	/*
438 	 * Only root, or the user that did the original mount is
439 	 * permitted to unmount this filesystem.
440 	 */
441 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
442 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
443 		vput(vp);
444 		return (error);
445 	}
446 
447 	/*
448 	 * Don't allow unmounting the root file system.
449 	 */
450 	if (mp->mnt_flag & MNT_ROOTFS) {
451 		vput(vp);
452 		return (EINVAL);
453 	}
454 
455 	/*
456 	 * Must be the root of the filesystem
457 	 */
458 	if ((vp->v_flag & VROOT) == 0) {
459 		vput(vp);
460 		return (EINVAL);
461 	}
462 	vput(vp);
463 
464 	/*
465 	 * XXX Freeze syncer.  Must do this before locking the
466 	 * mount point.  See dounmount() for details.
467 	 */
468 	lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
469 
470 	if (vfs_busy(mp, 0, 0)) {
471 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
472 		return (EBUSY);
473 	}
474 
475 	return (dounmount(mp, SCARG(uap, flags), p));
476 }
477 
478 /*
479  * Do the actual file system unmount. File system is assumed to have been
480  * marked busy by the caller.
481  */
482 int
483 dounmount(mp, flags, p)
484 	struct mount *mp;
485 	int flags;
486 	struct proc *p;
487 {
488 	struct vnode *coveredvp;
489 	int error;
490 	int async;
491 	int used_syncer;
492 
493 	simple_lock(&mountlist_slock);
494 	vfs_unbusy(mp);
495 	used_syncer = (mp->mnt_syncer != NULL);
496 
497 	/*
498 	 * XXX Syncer must be frozen when we get here.  This should really
499 	 * be done on a per-mountpoint basis, but especially the softdep
500 	 * code possibly called from the syncer doens't exactly work on a
501 	 * per-mountpoint basis, so the softdep code would become a maze
502 	 * of vfs_busy() calls.
503 	 *
504 	 * The caller of dounmount() must acquire syncer_lock because
505 	 * the syncer itself acquires locks in syncer_lock -> vfs_busy
506 	 * order, and we must preserve that order to avoid deadlock.
507 	 *
508 	 * So, if the file system did not use the syncer, now is
509 	 * the time to release the syncer_lock.
510 	 */
511 	if (used_syncer == 0)
512 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
513 
514 	mp->mnt_flag |= MNT_UNMOUNT;
515 	mp->mnt_unmounter = p;
516 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
517 	if (mp->mnt_flag & MNT_EXPUBLIC)
518 		vfs_setpublicfs(NULL, NULL, NULL);
519 	async = mp->mnt_flag & MNT_ASYNC;
520 	mp->mnt_flag &= ~MNT_ASYNC;
521 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
522 	if (mp->mnt_syncer != NULL)
523 		vfs_deallocate_syncvnode(mp);
524 	if (((mp->mnt_flag & MNT_RDONLY) ||
525 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
526 	    (flags & MNT_FORCE))
527 		error = VFS_UNMOUNT(mp, flags, p);
528 	simple_lock(&mountlist_slock);
529 	if (error) {
530 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
531 			(void) vfs_allocate_syncvnode(mp);
532 		mp->mnt_flag &= ~MNT_UNMOUNT;
533 		mp->mnt_unmounter = NULL;
534 		mp->mnt_flag |= async;
535 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
536 		    &mountlist_slock);
537 		if (used_syncer)
538 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
539 		while (mp->mnt_wcnt > 0) {
540 			wakeup(mp);
541 			tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 0);
542 		}
543 		return (error);
544 	}
545 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
546 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
547 		coveredvp->v_mountedhere = NULL;
548 		vrele(coveredvp);
549 	}
550 	mp->mnt_op->vfs_refcount--;
551 	if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
552 		panic("unmount: dangling vnode");
553 	mp->mnt_flag |= MNT_GONE;
554 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
555 	if (used_syncer)
556 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
557 	while(mp->mnt_wcnt > 0) {
558 		wakeup(mp);
559 		tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0);
560 	}
561 	free(mp, M_MOUNT);
562 	return (0);
563 }
564 
565 /*
566  * Sync each mounted filesystem.
567  */
568 #ifdef DEBUG
569 int syncprt = 0;
570 struct ctldebug debug0 = { "syncprt", &syncprt };
571 #endif
572 
573 /* ARGSUSED */
574 int
575 sys_sync(l, v, retval)
576 	struct lwp *l;
577 	void *v;
578 	register_t *retval;
579 {
580 	struct mount *mp, *nmp;
581 	int asyncflag;
582 	struct proc *p = l == NULL ? &proc0 : l->l_proc;
583 
584 	simple_lock(&mountlist_slock);
585 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
586 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
587 			nmp = mp->mnt_list.cqe_prev;
588 			continue;
589 		}
590 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
591 			asyncflag = mp->mnt_flag & MNT_ASYNC;
592 			mp->mnt_flag &= ~MNT_ASYNC;
593 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
594 			if (asyncflag)
595 				 mp->mnt_flag |= MNT_ASYNC;
596 		}
597 		simple_lock(&mountlist_slock);
598 		nmp = mp->mnt_list.cqe_prev;
599 		vfs_unbusy(mp);
600 
601 	}
602 	simple_unlock(&mountlist_slock);
603 #ifdef DEBUG
604 	if (syncprt)
605 		vfs_bufstats();
606 #endif /* DEBUG */
607 	return (0);
608 }
609 
610 /*
611  * Change filesystem quotas.
612  */
613 /* ARGSUSED */
614 int
615 sys_quotactl(l, v, retval)
616 	struct lwp *l;
617 	void *v;
618 	register_t *retval;
619 {
620 	struct sys_quotactl_args /* {
621 		syscallarg(const char *) path;
622 		syscallarg(int) cmd;
623 		syscallarg(int) uid;
624 		syscallarg(caddr_t) arg;
625 	} */ *uap = v;
626 	struct proc *p = l->l_proc;
627 	struct mount *mp;
628 	int error;
629 	struct nameidata nd;
630 
631 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
632 	if ((error = namei(&nd)) != 0)
633 		return (error);
634 	mp = nd.ni_vp->v_mount;
635 	vrele(nd.ni_vp);
636 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
637 	    SCARG(uap, arg), p));
638 }
639 
640 static int
641 dostatfs(struct mount *mp, struct statfs *sp, struct proc *p, int flags,
642     int root)
643 {
644 	struct cwdinfo *cwdi = p->p_cwdi;
645 	int error = 0;
646 
647 	/*
648 	 * If MNT_NOWAIT or MNT_LAZY is specified, do not
649 	 * refresh the fsstat cache. MNT_WAIT or MNT_LAXY
650 	 * overrides MNT_NOWAIT.
651 	 */
652 	if (flags == MNT_NOWAIT	|| flags == MNT_LAZY ||
653 	    (flags != MNT_WAIT && flags != 0)) {
654 		memcpy(sp, &mp->mnt_stat, sizeof(*sp));
655 		goto done;
656 	}
657 
658 	if ((error = VFS_STATFS(mp, sp, p)) != 0) {
659 		return error;
660 	}
661 
662 	if (cwdi->cwdi_rdir == NULL)
663 		(void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
664 done:
665 	if (cwdi->cwdi_rdir != NULL) {
666 		size_t len;
667 		char *bp;
668 		char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
669 		if (!path)
670 			return ENOMEM;
671 
672 		bp = path + MAXPATHLEN;
673 		*--bp = '\0';
674 		error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
675 		    MAXPATHLEN / 2, 0, p);
676 		if (error) {
677 			free(path, M_TEMP);
678 			return error;
679 		}
680 		len = strlen(bp);
681 		/*
682 		 * for mount points that are below our root, we can see
683 		 * them, so we fix up the pathname and return them. The
684 		 * rest we cannot see, so we don't allow viewing the
685 		 * data.
686 		 */
687 		if (strncmp(bp, sp->f_mntonname, len) == 0) {
688 			strcpy(sp->f_mntonname, &sp->f_mntonname[len]);
689 			if (sp->f_mntonname[0] == '\0')
690 			    (void)strcpy(sp->f_mntonname, "/");
691 		} else {
692 			if (root)
693 				(void)strcpy(sp->f_mntonname, "/");
694 			else
695 				error = EPERM;
696 		}
697 		free(path, M_TEMP);
698 	}
699 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
700 	sp->f_oflags = sp->f_flags & 0xffff;
701 	return error;
702 }
703 
704 /*
705  * Get filesystem statistics.
706  */
707 /* ARGSUSED */
708 int
709 sys_statfs(l, v, retval)
710 	struct lwp *l;
711 	void *v;
712 	register_t *retval;
713 {
714 	struct sys_statfs_args /* {
715 		syscallarg(const char *) path;
716 		syscallarg(struct statfs *) buf;
717 	} */ *uap = v;
718 	struct proc *p = l->l_proc;
719 	struct mount *mp;
720 	struct statfs sbuf;
721 	int error;
722 	struct nameidata nd;
723 
724 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
725 	if ((error = namei(&nd)) != 0)
726 		return error;
727 	mp = nd.ni_vp->v_mount;
728 	vrele(nd.ni_vp);
729 	if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
730 		return error;
731 	return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
732 }
733 
734 /*
735  * Get filesystem statistics.
736  */
737 /* ARGSUSED */
738 int
739 sys_fstatfs(l, v, retval)
740 	struct lwp *l;
741 	void *v;
742 	register_t *retval;
743 {
744 	struct sys_fstatfs_args /* {
745 		syscallarg(int) fd;
746 		syscallarg(struct statfs *) buf;
747 	} */ *uap = v;
748 	struct proc *p = l->l_proc;
749 	struct file *fp;
750 	struct mount *mp;
751 	struct statfs sbuf;
752 	int error;
753 
754 	/* getvnode() will use the descriptor for us */
755 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
756 		return (error);
757 	mp = ((struct vnode *)fp->f_data)->v_mount;
758 	if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
759 		goto out;
760 	error = copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
761  out:
762 	FILE_UNUSE(fp, p);
763 	return error;
764 }
765 
766 
767 /*
768  * Get statistics on all filesystems.
769  */
770 int
771 sys_getfsstat(l, v, retval)
772 	struct lwp *l;
773 	void *v;
774 	register_t *retval;
775 {
776 	struct sys_getfsstat_args /* {
777 		syscallarg(struct statfs *) buf;
778 		syscallarg(long) bufsize;
779 		syscallarg(int) flags;
780 	} */ *uap = v;
781 	int root = 0;
782 	struct proc *p = l->l_proc;
783 	struct mount *mp, *nmp;
784 	struct statfs sbuf;
785 	caddr_t sfsp;
786 	long count, maxcount, error = 0;
787 
788 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
789 	sfsp = (caddr_t)SCARG(uap, buf);
790 	simple_lock(&mountlist_slock);
791 	count = 0;
792 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
793 	     mp = nmp) {
794 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
795 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
796 			continue;
797 		}
798 		if (sfsp && count < maxcount) {
799 			error = dostatfs(mp, &sbuf, p, SCARG(uap, flags), 0);
800 			if (error) {
801 				simple_lock(&mountlist_slock);
802 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
803 				vfs_unbusy(mp);
804 				continue;
805 			}
806 			error = copyout(&sbuf, sfsp, sizeof(sbuf));
807 			if (error) {
808 				vfs_unbusy(mp);
809 				return (error);
810 			}
811 			sfsp += sizeof(sbuf);
812 			root |= strcmp(sbuf.f_mntonname, "/") == 0;
813 		}
814 		count++;
815 		simple_lock(&mountlist_slock);
816 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
817 		vfs_unbusy(mp);
818 	}
819 	simple_unlock(&mountlist_slock);
820 	if (root == 0 && p->p_cwdi->cwdi_rdir) {
821 		/*
822 		 * fake a root entry
823 		 */
824 		if ((error = dostatfs(p->p_cwdi->cwdi_rdir->v_mount, &sbuf, p,
825 		    SCARG(uap, flags), 1)) != 0)
826 			return error;
827 		if (sfsp)
828 			error = copyout(&sbuf, sfsp, sizeof(sbuf));
829 		count++;
830 	}
831 	if (sfsp && count > maxcount)
832 		*retval = maxcount;
833 	else
834 		*retval = count;
835 	return error;
836 }
837 
838 /*
839  * Change current working directory to a given file descriptor.
840  */
841 /* ARGSUSED */
842 int
843 sys_fchdir(l, v, retval)
844 	struct lwp *l;
845 	void *v;
846 	register_t *retval;
847 {
848 	struct sys_fchdir_args /* {
849 		syscallarg(int) fd;
850 	} */ *uap = v;
851 	struct proc *p = l->l_proc;
852 	struct filedesc *fdp = p->p_fd;
853 	struct cwdinfo *cwdi = p->p_cwdi;
854 	struct vnode *vp, *tdp;
855 	struct mount *mp;
856 	struct file *fp;
857 	int error;
858 
859 	/* getvnode() will use the descriptor for us */
860 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
861 		return (error);
862 	vp = (struct vnode *)fp->f_data;
863 
864 	VREF(vp);
865 	vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
866 	if (vp->v_type != VDIR)
867 		error = ENOTDIR;
868 	else
869 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
870 	while (!error && (mp = vp->v_mountedhere) != NULL) {
871 		if (vfs_busy(mp, 0, 0))
872 			continue;
873 		error = VFS_ROOT(mp, &tdp);
874 		vfs_unbusy(mp);
875 		if (error)
876 			break;
877 		vput(vp);
878 		vp = tdp;
879 	}
880 	if (error) {
881 		vput(vp);
882 		goto out;
883 	}
884 	VOP_UNLOCK(vp, 0);
885 
886 	/*
887 	 * Disallow changing to a directory not under the process's
888 	 * current root directory (if there is one).
889 	 */
890 	if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, p)) {
891 		vrele(vp);
892 		error = EPERM;	/* operation not permitted */
893 		goto out;
894 	}
895 
896 	vrele(cwdi->cwdi_cdir);
897 	cwdi->cwdi_cdir = vp;
898  out:
899 	FILE_UNUSE(fp, p);
900 	return (error);
901 }
902 
903 /*
904  * Change this process's notion of the root directory to a given file descriptor.
905  */
906 
907 int
908 sys_fchroot(l, v, retval)
909 	struct lwp *l;
910 	void *v;
911 	register_t *retval;
912 {
913 	struct sys_fchroot_args *uap = v;
914 	struct proc *p = l->l_proc;
915 	struct filedesc *fdp = p->p_fd;
916 	struct cwdinfo *cwdi = p->p_cwdi;
917 	struct vnode	*vp;
918 	struct file	*fp;
919 	int		 error;
920 
921 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
922 		return error;
923 	/* getvnode() will use the descriptor for us */
924 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
925 		return error;
926 	vp = (struct vnode *) fp->f_data;
927 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
928 	if (vp->v_type != VDIR)
929 		error = ENOTDIR;
930 	else
931 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
932 	VOP_UNLOCK(vp, 0);
933 	if (error)
934 		goto out;
935 	VREF(vp);
936 
937 	/*
938 	 * Prevent escaping from chroot by putting the root under
939 	 * the working directory.  Silently chdir to / if we aren't
940 	 * already there.
941 	 */
942 	if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
943 		/*
944 		 * XXX would be more failsafe to change directory to a
945 		 * deadfs node here instead
946 		 */
947 		vrele(cwdi->cwdi_cdir);
948 		VREF(vp);
949 		cwdi->cwdi_cdir = vp;
950 	}
951 
952 	if (cwdi->cwdi_rdir != NULL)
953 		vrele(cwdi->cwdi_rdir);
954 	cwdi->cwdi_rdir = vp;
955  out:
956 	FILE_UNUSE(fp, p);
957 	return (error);
958 }
959 
960 
961 
962 /*
963  * Change current working directory (``.'').
964  */
965 /* ARGSUSED */
966 int
967 sys_chdir(l, v, retval)
968 	struct lwp *l;
969 	void *v;
970 	register_t *retval;
971 {
972 	struct sys_chdir_args /* {
973 		syscallarg(const char *) path;
974 	} */ *uap = v;
975 	struct proc *p = l->l_proc;
976 	struct cwdinfo *cwdi = p->p_cwdi;
977 	int error;
978 	struct nameidata nd;
979 
980 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
981 	    SCARG(uap, path), p);
982 	if ((error = change_dir(&nd, p)) != 0)
983 		return (error);
984 	vrele(cwdi->cwdi_cdir);
985 	cwdi->cwdi_cdir = nd.ni_vp;
986 	return (0);
987 }
988 
989 /*
990  * Change notion of root (``/'') directory.
991  */
992 /* ARGSUSED */
993 int
994 sys_chroot(l, v, retval)
995 	struct lwp *l;
996 	void *v;
997 	register_t *retval;
998 {
999 	struct sys_chroot_args /* {
1000 		syscallarg(const char *) path;
1001 	} */ *uap = v;
1002 	struct proc *p = l->l_proc;
1003 	struct cwdinfo *cwdi = p->p_cwdi;
1004 	struct vnode *vp;
1005 	int error;
1006 	struct nameidata nd;
1007 
1008 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1009 		return (error);
1010 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1011 	    SCARG(uap, path), p);
1012 	if ((error = change_dir(&nd, p)) != 0)
1013 		return (error);
1014 	if (cwdi->cwdi_rdir != NULL)
1015 		vrele(cwdi->cwdi_rdir);
1016 	vp = nd.ni_vp;
1017 	cwdi->cwdi_rdir = vp;
1018 
1019 	/*
1020 	 * Prevent escaping from chroot by putting the root under
1021 	 * the working directory.  Silently chdir to / if we aren't
1022 	 * already there.
1023 	 */
1024 	if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
1025 		/*
1026 		 * XXX would be more failsafe to change directory to a
1027 		 * deadfs node here instead
1028 		 */
1029 		vrele(cwdi->cwdi_cdir);
1030 		VREF(vp);
1031 		cwdi->cwdi_cdir = vp;
1032 	}
1033 
1034 	return (0);
1035 }
1036 
1037 /*
1038  * Common routine for chroot and chdir.
1039  */
1040 static int
1041 change_dir(ndp, p)
1042 	struct nameidata *ndp;
1043 	struct proc *p;
1044 {
1045 	struct vnode *vp;
1046 	int error;
1047 
1048 	if ((error = namei(ndp)) != 0)
1049 		return (error);
1050 	vp = ndp->ni_vp;
1051 	if (vp->v_type != VDIR)
1052 		error = ENOTDIR;
1053 	else
1054 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
1055 
1056 	if (error)
1057 		vput(vp);
1058 	else
1059 		VOP_UNLOCK(vp, 0);
1060 	return (error);
1061 }
1062 
1063 /*
1064  * Check permissions, allocate an open file structure,
1065  * and call the device open routine if any.
1066  */
1067 int
1068 sys_open(l, v, retval)
1069 	struct lwp *l;
1070 	void *v;
1071 	register_t *retval;
1072 {
1073 	struct sys_open_args /* {
1074 		syscallarg(const char *) path;
1075 		syscallarg(int) flags;
1076 		syscallarg(int) mode;
1077 	} */ *uap = v;
1078 	struct proc *p = l->l_proc;
1079 	struct cwdinfo *cwdi = p->p_cwdi;
1080 	struct filedesc *fdp = p->p_fd;
1081 	struct file *fp;
1082 	struct vnode *vp;
1083 	int flags, cmode;
1084 	int type, indx, error;
1085 	struct flock lf;
1086 	struct nameidata nd;
1087 
1088 	flags = FFLAGS(SCARG(uap, flags));
1089 	if ((flags & (FREAD | FWRITE)) == 0)
1090 		return (EINVAL);
1091 	/* falloc() will use the file descriptor for us */
1092 	if ((error = falloc(p, &fp, &indx)) != 0)
1093 		return (error);
1094 	cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1095 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1096 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1097 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1098 		FILE_UNUSE(fp, p);
1099 		ffree(fp);
1100 		if ((error == ENODEV || error == ENXIO) &&
1101 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1102 		    (error =
1103 			dupfdopen(p, indx, p->p_dupfd, flags, error)) == 0) {
1104 			*retval = indx;
1105 			return (0);
1106 		}
1107 		if (error == ERESTART)
1108 			error = EINTR;
1109 		fdremove(fdp, indx);
1110 		return (error);
1111 	}
1112 	p->p_dupfd = 0;
1113 	vp = nd.ni_vp;
1114 	fp->f_flag = flags & FMASK;
1115 	fp->f_type = DTYPE_VNODE;
1116 	fp->f_ops = &vnops;
1117 	fp->f_data = vp;
1118 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1119 		lf.l_whence = SEEK_SET;
1120 		lf.l_start = 0;
1121 		lf.l_len = 0;
1122 		if (flags & O_EXLOCK)
1123 			lf.l_type = F_WRLCK;
1124 		else
1125 			lf.l_type = F_RDLCK;
1126 		type = F_FLOCK;
1127 		if ((flags & FNONBLOCK) == 0)
1128 			type |= F_WAIT;
1129 		VOP_UNLOCK(vp, 0);
1130 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1131 		if (error) {
1132 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
1133 			FILE_UNUSE(fp, p);
1134 			ffree(fp);
1135 			fdremove(fdp, indx);
1136 			return (error);
1137 		}
1138 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1139 		fp->f_flag |= FHASLOCK;
1140 	}
1141 	VOP_UNLOCK(vp, 0);
1142 	*retval = indx;
1143 	FILE_SET_MATURE(fp);
1144 	FILE_UNUSE(fp, p);
1145 	return (0);
1146 }
1147 
1148 /*
1149  * Get file handle system call
1150  */
1151 int
1152 sys_getfh(l, v, retval)
1153 	struct lwp *l;
1154 	void *v;
1155 	register_t *retval;
1156 {
1157 	struct sys_getfh_args /* {
1158 		syscallarg(char *) fname;
1159 		syscallarg(fhandle_t *) fhp;
1160 	} */ *uap = v;
1161 	struct proc *p = l->l_proc;
1162 	struct vnode *vp;
1163 	fhandle_t fh;
1164 	int error;
1165 	struct nameidata nd;
1166 
1167 	/*
1168 	 * Must be super user
1169 	 */
1170 	error = suser(p->p_ucred, &p->p_acflag);
1171 	if (error)
1172 		return (error);
1173 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1174 	    SCARG(uap, fname), p);
1175 	error = namei(&nd);
1176 	if (error)
1177 		return (error);
1178 	vp = nd.ni_vp;
1179 	memset(&fh, 0, sizeof(fh));
1180 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1181 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1182 	vput(vp);
1183 	if (error)
1184 		return (error);
1185 	error = copyout(&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh));
1186 	return (error);
1187 }
1188 
1189 /*
1190  * Open a file given a file handle.
1191  *
1192  * Check permissions, allocate an open file structure,
1193  * and call the device open routine if any.
1194  */
1195 int
1196 sys_fhopen(l, v, retval)
1197 	struct lwp *l;
1198 	void *v;
1199 	register_t *retval;
1200 {
1201 	struct sys_fhopen_args /* {
1202 		syscallarg(const fhandle_t *) fhp;
1203 		syscallarg(int) flags;
1204 	} */ *uap = v;
1205 	struct proc *p = l->l_proc;
1206 	struct filedesc *fdp = p->p_fd;
1207 	struct file *fp;
1208 	struct vnode *vp = NULL;
1209 	struct mount *mp;
1210 	struct ucred *cred = p->p_ucred;
1211 	int flags;
1212 	struct file *nfp;
1213 	int type, indx, error=0;
1214 	struct flock lf;
1215 	struct vattr va;
1216 	fhandle_t fh;
1217 
1218 	/*
1219 	 * Must be super user
1220 	 */
1221 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1222 		return (error);
1223 
1224 	flags = FFLAGS(SCARG(uap, flags));
1225 	if ((flags & (FREAD | FWRITE)) == 0)
1226 		return (EINVAL);
1227 	if ((flags & O_CREAT))
1228 		return (EINVAL);
1229 	/* falloc() will use the file descriptor for us */
1230 	if ((error = falloc(p, &nfp, &indx)) != 0)
1231 		return (error);
1232 	fp = nfp;
1233 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1234 		goto bad;
1235 
1236 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1237 		error = ESTALE;
1238 		goto bad;
1239 	}
1240 
1241 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1242 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1243 		goto bad;
1244 	}
1245 
1246 	/* Now do an effective vn_open */
1247 
1248 	if (vp->v_type == VSOCK) {
1249 		error = EOPNOTSUPP;
1250 		goto bad;
1251 	}
1252 	if (flags & FREAD) {
1253 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1254 			goto bad;
1255 	}
1256 	if (flags & (FWRITE | O_TRUNC)) {
1257 		if (vp->v_type == VDIR) {
1258 			error = EISDIR;
1259 			goto bad;
1260 		}
1261 		if ((error = vn_writechk(vp)) != 0 ||
1262 		    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
1263 			goto bad;
1264 	}
1265 	if (flags & O_TRUNC) {
1266 		VOP_UNLOCK(vp, 0);			/* XXX */
1267 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
1268 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
1269 		VATTR_NULL(&va);
1270 		va.va_size = 0;
1271 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1272 			goto bad;
1273 	}
1274 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1275 		goto bad;
1276 	if (vp->v_type == VREG &&
1277 	    uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1278 		error = EIO;
1279 		goto bad;
1280 	}
1281 	if (flags & FWRITE)
1282 		vp->v_writecount++;
1283 
1284 	/* done with modified vn_open, now finish what sys_open does. */
1285 
1286 	fp->f_flag = flags & FMASK;
1287 	fp->f_type = DTYPE_VNODE;
1288 	fp->f_ops = &vnops;
1289 	fp->f_data = vp;
1290 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1291 		lf.l_whence = SEEK_SET;
1292 		lf.l_start = 0;
1293 		lf.l_len = 0;
1294 		if (flags & O_EXLOCK)
1295 			lf.l_type = F_WRLCK;
1296 		else
1297 			lf.l_type = F_RDLCK;
1298 		type = F_FLOCK;
1299 		if ((flags & FNONBLOCK) == 0)
1300 			type |= F_WAIT;
1301 		VOP_UNLOCK(vp, 0);
1302 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1303 		if (error) {
1304 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
1305 			FILE_UNUSE(fp, p);
1306 			ffree(fp);
1307 			fdremove(fdp, indx);
1308 			return (error);
1309 		}
1310 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1311 		fp->f_flag |= FHASLOCK;
1312 	}
1313 	VOP_UNLOCK(vp, 0);
1314 	*retval = indx;
1315 	FILE_SET_MATURE(fp);
1316 	FILE_UNUSE(fp, p);
1317 	return (0);
1318 
1319 bad:
1320 	FILE_UNUSE(fp, p);
1321 	ffree(fp);
1322 	fdremove(fdp, indx);
1323 	if (vp != NULL)
1324 		vput(vp);
1325 	return (error);
1326 }
1327 
1328 /* ARGSUSED */
1329 int
1330 sys_fhstat(l, v, retval)
1331 	struct lwp *l;
1332 	void *v;
1333 	register_t *retval;
1334 {
1335 	struct sys_fhstat_args /* {
1336 		syscallarg(const fhandle_t *) fhp;
1337 		syscallarg(struct stat *) sb;
1338 	} */ *uap = v;
1339 	struct proc *p = l->l_proc;
1340 	struct stat sb;
1341 	int error;
1342 	fhandle_t fh;
1343 	struct mount *mp;
1344 	struct vnode *vp;
1345 
1346 	/*
1347 	 * Must be super user
1348 	 */
1349 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1350 		return (error);
1351 
1352 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1353 		return (error);
1354 
1355 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1356 		return (ESTALE);
1357 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1358 		return (error);
1359 	error = vn_stat(vp, &sb, p);
1360 	vput(vp);
1361 	if (error)
1362 		return (error);
1363 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1364 	return (error);
1365 }
1366 
1367 /* ARGSUSED */
1368 int
1369 sys_fhstatfs(l, v, retval)
1370 	struct lwp *l;
1371 	void *v;
1372 	register_t *retval;
1373 {
1374 	struct sys_fhstatfs_args /*
1375 		syscallarg(const fhandle_t *) fhp;
1376 		syscallarg(struct statfs *) buf;
1377 	} */ *uap = v;
1378 	struct proc *p = l->l_proc;
1379 	struct statfs sbuf;
1380 	fhandle_t fh;
1381 	struct mount *mp;
1382 	struct vnode *vp;
1383 	int error;
1384 
1385 	/*
1386 	 * Must be super user
1387 	 */
1388 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1389 		return (error);
1390 
1391 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1392 		return (error);
1393 
1394 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1395 		return (ESTALE);
1396 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1397 		return (error);
1398 	mp = vp->v_mount;
1399 	vput(vp);
1400 	if ((error = VFS_STATFS(mp, &sbuf, p)) != 0)
1401 		return (error);
1402 	return (copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf)));
1403 }
1404 
1405 /*
1406  * Create a special file.
1407  */
1408 /* ARGSUSED */
1409 int
1410 sys_mknod(l, v, retval)
1411 	struct lwp *l;
1412 	void *v;
1413 	register_t *retval;
1414 {
1415 	struct sys_mknod_args /* {
1416 		syscallarg(const char *) path;
1417 		syscallarg(int) mode;
1418 		syscallarg(int) dev;
1419 	} */ *uap = v;
1420 	struct proc *p = l->l_proc;
1421 	struct vnode *vp;
1422 	struct vattr vattr;
1423 	int error;
1424 	int whiteout = 0;
1425 	struct nameidata nd;
1426 
1427 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1428 		return (error);
1429 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1430 	if ((error = namei(&nd)) != 0)
1431 		return (error);
1432 	vp = nd.ni_vp;
1433 	if (vp != NULL)
1434 		error = EEXIST;
1435 	else {
1436 		VATTR_NULL(&vattr);
1437 		vattr.va_mode =
1438 		    (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1439 		vattr.va_rdev = SCARG(uap, dev);
1440 		whiteout = 0;
1441 
1442 		switch (SCARG(uap, mode) & S_IFMT) {
1443 		case S_IFMT:	/* used by badsect to flag bad sectors */
1444 			vattr.va_type = VBAD;
1445 			break;
1446 		case S_IFCHR:
1447 			vattr.va_type = VCHR;
1448 			break;
1449 		case S_IFBLK:
1450 			vattr.va_type = VBLK;
1451 			break;
1452 		case S_IFWHT:
1453 			whiteout = 1;
1454 			break;
1455 		default:
1456 			error = EINVAL;
1457 			break;
1458 		}
1459 	}
1460 	if (!error) {
1461 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1462 		if (whiteout) {
1463 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1464 			if (error)
1465 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1466 			vput(nd.ni_dvp);
1467 		} else {
1468 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1469 						&nd.ni_cnd, &vattr);
1470 			if (error == 0)
1471 				vput(nd.ni_vp);
1472 		}
1473 	} else {
1474 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1475 		if (nd.ni_dvp == vp)
1476 			vrele(nd.ni_dvp);
1477 		else
1478 			vput(nd.ni_dvp);
1479 		if (vp)
1480 			vrele(vp);
1481 	}
1482 	return (error);
1483 }
1484 
1485 /*
1486  * Create a named pipe.
1487  */
1488 /* ARGSUSED */
1489 int
1490 sys_mkfifo(l, v, retval)
1491 	struct lwp *l;
1492 	void *v;
1493 	register_t *retval;
1494 {
1495 	struct sys_mkfifo_args /* {
1496 		syscallarg(const char *) path;
1497 		syscallarg(int) mode;
1498 	} */ *uap = v;
1499 	struct proc *p = l->l_proc;
1500 	struct vattr vattr;
1501 	int error;
1502 	struct nameidata nd;
1503 
1504 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1505 	if ((error = namei(&nd)) != 0)
1506 		return (error);
1507 	if (nd.ni_vp != NULL) {
1508 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1509 		if (nd.ni_dvp == nd.ni_vp)
1510 			vrele(nd.ni_dvp);
1511 		else
1512 			vput(nd.ni_dvp);
1513 		vrele(nd.ni_vp);
1514 		return (EEXIST);
1515 	}
1516 	VATTR_NULL(&vattr);
1517 	vattr.va_type = VFIFO;
1518 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1519 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1520 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1521 	if (error == 0)
1522 		vput(nd.ni_vp);
1523 	return (error);
1524 }
1525 
1526 /*
1527  * Make a hard file link.
1528  */
1529 /* ARGSUSED */
1530 int
1531 sys_link(l, v, retval)
1532 	struct lwp *l;
1533 	void *v;
1534 	register_t *retval;
1535 {
1536 	struct sys_link_args /* {
1537 		syscallarg(const char *) path;
1538 		syscallarg(const char *) link;
1539 	} */ *uap = v;
1540 	struct proc *p = l->l_proc;
1541 	struct vnode *vp;
1542 	struct nameidata nd;
1543 	int error;
1544 
1545 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1546 	if ((error = namei(&nd)) != 0)
1547 		return (error);
1548 	vp = nd.ni_vp;
1549 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
1550 	if ((error = namei(&nd)) != 0)
1551 		goto out;
1552 	if (nd.ni_vp) {
1553 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1554 		if (nd.ni_dvp == nd.ni_vp)
1555 			vrele(nd.ni_dvp);
1556 		else
1557 			vput(nd.ni_dvp);
1558 		vrele(nd.ni_vp);
1559 		error = EEXIST;
1560 		goto out;
1561 	}
1562 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1563 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1564 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1565 out:
1566 	vrele(vp);
1567 	return (error);
1568 }
1569 
1570 /*
1571  * Make a symbolic link.
1572  */
1573 /* ARGSUSED */
1574 int
1575 sys_symlink(l, v, retval)
1576 	struct lwp *l;
1577 	void *v;
1578 	register_t *retval;
1579 {
1580 	struct sys_symlink_args /* {
1581 		syscallarg(const char *) path;
1582 		syscallarg(const char *) link;
1583 	} */ *uap = v;
1584 	struct proc *p = l->l_proc;
1585 	struct vattr vattr;
1586 	char *path;
1587 	int error;
1588 	struct nameidata nd;
1589 
1590 	path = PNBUF_GET();
1591 	error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1592 	if (error)
1593 		goto out;
1594 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
1595 	if ((error = namei(&nd)) != 0)
1596 		goto out;
1597 	if (nd.ni_vp) {
1598 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1599 		if (nd.ni_dvp == nd.ni_vp)
1600 			vrele(nd.ni_dvp);
1601 		else
1602 			vput(nd.ni_dvp);
1603 		vrele(nd.ni_vp);
1604 		error = EEXIST;
1605 		goto out;
1606 	}
1607 	VATTR_NULL(&vattr);
1608 	vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1609 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1610 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1611 	if (error == 0)
1612 		vput(nd.ni_vp);
1613 out:
1614 	PNBUF_PUT(path);
1615 	return (error);
1616 }
1617 
1618 /*
1619  * Delete a whiteout from the filesystem.
1620  */
1621 /* ARGSUSED */
1622 int
1623 sys_undelete(l, v, retval)
1624 	struct lwp *l;
1625 	void *v;
1626 	register_t *retval;
1627 {
1628 	struct sys_undelete_args /* {
1629 		syscallarg(const char *) path;
1630 	} */ *uap = v;
1631 	struct proc *p = l->l_proc;
1632 	int error;
1633 	struct nameidata nd;
1634 
1635 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1636 	    SCARG(uap, path), p);
1637 	error = namei(&nd);
1638 	if (error)
1639 		return (error);
1640 
1641 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1642 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1643 		if (nd.ni_dvp == nd.ni_vp)
1644 			vrele(nd.ni_dvp);
1645 		else
1646 			vput(nd.ni_dvp);
1647 		if (nd.ni_vp)
1648 			vrele(nd.ni_vp);
1649 		return (EEXIST);
1650 	}
1651 
1652 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1653 	if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
1654 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1655 	vput(nd.ni_dvp);
1656 	return (error);
1657 }
1658 
1659 /*
1660  * Delete a name from the filesystem.
1661  */
1662 /* ARGSUSED */
1663 int
1664 sys_unlink(l, v, retval)
1665 	struct lwp *l;
1666 	void *v;
1667 	register_t *retval;
1668 {
1669 	struct sys_unlink_args /* {
1670 		syscallarg(const char *) path;
1671 	} */ *uap = v;
1672 	struct proc *p = l->l_proc;
1673 	struct vnode *vp;
1674 	int error;
1675 	struct nameidata nd;
1676 
1677 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1678 	    SCARG(uap, path), p);
1679 	if ((error = namei(&nd)) != 0)
1680 		return (error);
1681 	vp = nd.ni_vp;
1682 
1683 	/*
1684 	 * The root of a mounted filesystem cannot be deleted.
1685 	 */
1686 	if (vp->v_flag & VROOT) {
1687 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1688 		if (nd.ni_dvp == vp)
1689 			vrele(nd.ni_dvp);
1690 		else
1691 			vput(nd.ni_dvp);
1692 		vput(vp);
1693 		error = EBUSY;
1694 		goto out;
1695 	}
1696 
1697 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1698 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1699 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1700 out:
1701 	return (error);
1702 }
1703 
1704 /*
1705  * Reposition read/write file offset.
1706  */
1707 int
1708 sys_lseek(l, v, retval)
1709 	struct lwp *l;
1710 	void *v;
1711 	register_t *retval;
1712 {
1713 	struct sys_lseek_args /* {
1714 		syscallarg(int) fd;
1715 		syscallarg(int) pad;
1716 		syscallarg(off_t) offset;
1717 		syscallarg(int) whence;
1718 	} */ *uap = v;
1719 	struct proc *p = l->l_proc;
1720 	struct ucred *cred = p->p_ucred;
1721 	struct filedesc *fdp = p->p_fd;
1722 	struct file *fp;
1723 	struct vnode *vp;
1724 	struct vattr vattr;
1725 	off_t newoff;
1726 	int error;
1727 
1728 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1729 		return (EBADF);
1730 
1731 	FILE_USE(fp);
1732 
1733 	vp = (struct vnode *)fp->f_data;
1734 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1735 		error = ESPIPE;
1736 		goto out;
1737 	}
1738 
1739 	switch (SCARG(uap, whence)) {
1740 	case SEEK_CUR:
1741 		newoff = fp->f_offset + SCARG(uap, offset);
1742 		break;
1743 	case SEEK_END:
1744 		error = VOP_GETATTR(vp, &vattr, cred, p);
1745 		if (error)
1746 			goto out;
1747 		newoff = SCARG(uap, offset) + vattr.va_size;
1748 		break;
1749 	case SEEK_SET:
1750 		newoff = SCARG(uap, offset);
1751 		break;
1752 	default:
1753 		error = EINVAL;
1754 		goto out;
1755 	}
1756 	if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
1757 		goto out;
1758 
1759 	*(off_t *)retval = fp->f_offset = newoff;
1760  out:
1761 	FILE_UNUSE(fp, p);
1762 	return (error);
1763 }
1764 
1765 /*
1766  * Positional read system call.
1767  */
1768 int
1769 sys_pread(l, v, retval)
1770 	struct lwp *l;
1771 	void *v;
1772 	register_t *retval;
1773 {
1774 	struct sys_pread_args /* {
1775 		syscallarg(int) fd;
1776 		syscallarg(void *) buf;
1777 		syscallarg(size_t) nbyte;
1778 		syscallarg(off_t) offset;
1779 	} */ *uap = v;
1780 	struct proc *p = l->l_proc;
1781 	struct filedesc *fdp = p->p_fd;
1782 	struct file *fp;
1783 	struct vnode *vp;
1784 	off_t offset;
1785 	int error, fd = SCARG(uap, fd);
1786 
1787 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1788 		return (EBADF);
1789 
1790 	if ((fp->f_flag & FREAD) == 0) {
1791 		simple_unlock(&fp->f_slock);
1792 		return (EBADF);
1793 	}
1794 
1795 	FILE_USE(fp);
1796 
1797 	vp = (struct vnode *)fp->f_data;
1798 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1799 		error = ESPIPE;
1800 		goto out;
1801 	}
1802 
1803 	offset = SCARG(uap, offset);
1804 
1805 	/*
1806 	 * XXX This works because no file systems actually
1807 	 * XXX take any action on the seek operation.
1808 	 */
1809 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1810 		goto out;
1811 
1812 	/* dofileread() will unuse the descriptor for us */
1813 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
1814 	    &offset, 0, retval));
1815 
1816  out:
1817 	FILE_UNUSE(fp, p);
1818 	return (error);
1819 }
1820 
1821 /*
1822  * Positional scatter read system call.
1823  */
1824 int
1825 sys_preadv(l, v, retval)
1826 	struct lwp *l;
1827 	void *v;
1828 	register_t *retval;
1829 {
1830 	struct sys_preadv_args /* {
1831 		syscallarg(int) fd;
1832 		syscallarg(const struct iovec *) iovp;
1833 		syscallarg(int) iovcnt;
1834 		syscallarg(off_t) offset;
1835 	} */ *uap = v;
1836 	struct proc *p = l->l_proc;
1837 	struct filedesc *fdp = p->p_fd;
1838 	struct file *fp;
1839 	struct vnode *vp;
1840 	off_t offset;
1841 	int error, fd = SCARG(uap, fd);
1842 
1843 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1844 		return (EBADF);
1845 
1846 	if ((fp->f_flag & FREAD) == 0) {
1847 		simple_unlock(&fp->f_slock);
1848 		return (EBADF);
1849 	}
1850 
1851 	FILE_USE(fp);
1852 
1853 	vp = (struct vnode *)fp->f_data;
1854 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1855 		error = ESPIPE;
1856 		goto out;
1857 	}
1858 
1859 	offset = SCARG(uap, offset);
1860 
1861 	/*
1862 	 * XXX This works because no file systems actually
1863 	 * XXX take any action on the seek operation.
1864 	 */
1865 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1866 		goto out;
1867 
1868 	/* dofilereadv() will unuse the descriptor for us */
1869 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
1870 	    &offset, 0, retval));
1871 
1872  out:
1873 	FILE_UNUSE(fp, p);
1874 	return (error);
1875 }
1876 
1877 /*
1878  * Positional write system call.
1879  */
1880 int
1881 sys_pwrite(l, v, retval)
1882 	struct lwp *l;
1883 	void *v;
1884 	register_t *retval;
1885 {
1886 	struct sys_pwrite_args /* {
1887 		syscallarg(int) fd;
1888 		syscallarg(const void *) buf;
1889 		syscallarg(size_t) nbyte;
1890 		syscallarg(off_t) offset;
1891 	} */ *uap = v;
1892 	struct proc *p = l->l_proc;
1893 	struct filedesc *fdp = p->p_fd;
1894 	struct file *fp;
1895 	struct vnode *vp;
1896 	off_t offset;
1897 	int error, fd = SCARG(uap, fd);
1898 
1899 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1900 		return (EBADF);
1901 
1902 	if ((fp->f_flag & FWRITE) == 0) {
1903 		simple_unlock(&fp->f_slock);
1904 		return (EBADF);
1905 	}
1906 
1907 	FILE_USE(fp);
1908 
1909 	vp = (struct vnode *)fp->f_data;
1910 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1911 		error = ESPIPE;
1912 		goto out;
1913 	}
1914 
1915 	offset = SCARG(uap, offset);
1916 
1917 	/*
1918 	 * XXX This works because no file systems actually
1919 	 * XXX take any action on the seek operation.
1920 	 */
1921 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1922 		goto out;
1923 
1924 	/* dofilewrite() will unuse the descriptor for us */
1925 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
1926 	    &offset, 0, retval));
1927 
1928  out:
1929 	FILE_UNUSE(fp, p);
1930 	return (error);
1931 }
1932 
1933 /*
1934  * Positional gather write system call.
1935  */
1936 int
1937 sys_pwritev(l, v, retval)
1938 	struct lwp *l;
1939 	void *v;
1940 	register_t *retval;
1941 {
1942 	struct sys_pwritev_args /* {
1943 		syscallarg(int) fd;
1944 		syscallarg(const struct iovec *) iovp;
1945 		syscallarg(int) iovcnt;
1946 		syscallarg(off_t) offset;
1947 	} */ *uap = v;
1948 	struct proc *p = l->l_proc;
1949 	struct filedesc *fdp = p->p_fd;
1950 	struct file *fp;
1951 	struct vnode *vp;
1952 	off_t offset;
1953 	int error, fd = SCARG(uap, fd);
1954 
1955 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1956 		return (EBADF);
1957 
1958 	if ((fp->f_flag & FWRITE) == 0) {
1959 		simple_unlock(&fp->f_slock);
1960 		return (EBADF);
1961 	}
1962 
1963 	FILE_USE(fp);
1964 
1965 	vp = (struct vnode *)fp->f_data;
1966 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1967 		error = ESPIPE;
1968 		goto out;
1969 	}
1970 
1971 	offset = SCARG(uap, offset);
1972 
1973 	/*
1974 	 * XXX This works because no file systems actually
1975 	 * XXX take any action on the seek operation.
1976 	 */
1977 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1978 		goto out;
1979 
1980 	/* dofilewritev() will unuse the descriptor for us */
1981 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
1982 	    &offset, 0, retval));
1983 
1984  out:
1985 	FILE_UNUSE(fp, p);
1986 	return (error);
1987 }
1988 
1989 /*
1990  * Check access permissions.
1991  */
1992 int
1993 sys_access(l, v, retval)
1994 	struct lwp *l;
1995 	void *v;
1996 	register_t *retval;
1997 {
1998 	struct sys_access_args /* {
1999 		syscallarg(const char *) path;
2000 		syscallarg(int) flags;
2001 	} */ *uap = v;
2002 	struct proc *p = l->l_proc;
2003 	struct ucred *cred = crget();
2004 	struct vnode *vp;
2005 	int error, flags;
2006 	struct nameidata nd;
2007 
2008 	(void)memcpy(cred, p->p_ucred, sizeof(*cred));
2009 	cred->cr_ref = 1;
2010 	cred->cr_uid = p->p_cred->p_ruid;
2011 	cred->cr_gid = p->p_cred->p_rgid;
2012 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2013 	    SCARG(uap, path), p);
2014 	/* Override default credentials */
2015 	nd.ni_cnd.cn_cred = cred;
2016 	if ((error = namei(&nd)) != 0)
2017 		goto out;
2018 	vp = nd.ni_vp;
2019 
2020 	/* Flags == 0 means only check for existence. */
2021 	if (SCARG(uap, flags)) {
2022 		flags = 0;
2023 		if (SCARG(uap, flags) & R_OK)
2024 			flags |= VREAD;
2025 		if (SCARG(uap, flags) & W_OK)
2026 			flags |= VWRITE;
2027 		if (SCARG(uap, flags) & X_OK)
2028 			flags |= VEXEC;
2029 
2030 		error = VOP_ACCESS(vp, flags, cred, p);
2031 		if (!error && (flags & VWRITE))
2032 			error = vn_writechk(vp);
2033 	}
2034 	vput(vp);
2035 out:
2036 	crfree(cred);
2037 	return (error);
2038 }
2039 
2040 /*
2041  * Get file status; this version follows links.
2042  */
2043 /* ARGSUSED */
2044 int
2045 sys___stat13(l, v, retval)
2046 	struct lwp *l;
2047 	void *v;
2048 	register_t *retval;
2049 {
2050 	struct sys___stat13_args /* {
2051 		syscallarg(const char *) path;
2052 		syscallarg(struct stat *) ub;
2053 	} */ *uap = v;
2054 	struct proc *p = l->l_proc;
2055 	struct stat sb;
2056 	int error;
2057 	struct nameidata nd;
2058 
2059 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2060 	    SCARG(uap, path), p);
2061 	if ((error = namei(&nd)) != 0)
2062 		return (error);
2063 	error = vn_stat(nd.ni_vp, &sb, p);
2064 	vput(nd.ni_vp);
2065 	if (error)
2066 		return (error);
2067 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2068 	return (error);
2069 }
2070 
2071 /*
2072  * Get file status; this version does not follow links.
2073  */
2074 /* ARGSUSED */
2075 int
2076 sys___lstat13(l, v, retval)
2077 	struct lwp *l;
2078 	void *v;
2079 	register_t *retval;
2080 {
2081 	struct sys___lstat13_args /* {
2082 		syscallarg(const char *) path;
2083 		syscallarg(struct stat *) ub;
2084 	} */ *uap = v;
2085 	struct proc *p = l->l_proc;
2086 	struct stat sb;
2087 	int error;
2088 	struct nameidata nd;
2089 
2090 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2091 	    SCARG(uap, path), p);
2092 	if ((error = namei(&nd)) != 0)
2093 		return (error);
2094 	error = vn_stat(nd.ni_vp, &sb, p);
2095 	vput(nd.ni_vp);
2096 	if (error)
2097 		return (error);
2098 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2099 	return (error);
2100 }
2101 
2102 /*
2103  * Get configurable pathname variables.
2104  */
2105 /* ARGSUSED */
2106 int
2107 sys_pathconf(l, v, retval)
2108 	struct lwp *l;
2109 	void *v;
2110 	register_t *retval;
2111 {
2112 	struct sys_pathconf_args /* {
2113 		syscallarg(const char *) path;
2114 		syscallarg(int) name;
2115 	} */ *uap = v;
2116 	struct proc *p = l->l_proc;
2117 	int error;
2118 	struct nameidata nd;
2119 
2120 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2121 	    SCARG(uap, path), p);
2122 	if ((error = namei(&nd)) != 0)
2123 		return (error);
2124 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2125 	vput(nd.ni_vp);
2126 	return (error);
2127 }
2128 
2129 /*
2130  * Return target name of a symbolic link.
2131  */
2132 /* ARGSUSED */
2133 int
2134 sys_readlink(l, v, retval)
2135 	struct lwp *l;
2136 	void *v;
2137 	register_t *retval;
2138 {
2139 	struct sys_readlink_args /* {
2140 		syscallarg(const char *) path;
2141 		syscallarg(char *) buf;
2142 		syscallarg(size_t) count;
2143 	} */ *uap = v;
2144 	struct proc *p = l->l_proc;
2145 	struct vnode *vp;
2146 	struct iovec aiov;
2147 	struct uio auio;
2148 	int error;
2149 	struct nameidata nd;
2150 
2151 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2152 	    SCARG(uap, path), p);
2153 	if ((error = namei(&nd)) != 0)
2154 		return (error);
2155 	vp = nd.ni_vp;
2156 	if (vp->v_type != VLNK)
2157 		error = EINVAL;
2158 	else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2159 	    (error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) == 0) {
2160 		aiov.iov_base = SCARG(uap, buf);
2161 		aiov.iov_len = SCARG(uap, count);
2162 		auio.uio_iov = &aiov;
2163 		auio.uio_iovcnt = 1;
2164 		auio.uio_offset = 0;
2165 		auio.uio_rw = UIO_READ;
2166 		auio.uio_segflg = UIO_USERSPACE;
2167 		auio.uio_procp = p;
2168 		auio.uio_resid = SCARG(uap, count);
2169 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2170 	}
2171 	vput(vp);
2172 	*retval = SCARG(uap, count) - auio.uio_resid;
2173 	return (error);
2174 }
2175 
2176 /*
2177  * Change flags of a file given a path name.
2178  */
2179 /* ARGSUSED */
2180 int
2181 sys_chflags(l, v, retval)
2182 	struct lwp *l;
2183 	void *v;
2184 	register_t *retval;
2185 {
2186 	struct sys_chflags_args /* {
2187 		syscallarg(const char *) path;
2188 		syscallarg(u_long) flags;
2189 	} */ *uap = v;
2190 	struct proc *p = l->l_proc;
2191 	struct vnode *vp;
2192 	int error;
2193 	struct nameidata nd;
2194 
2195 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2196 	if ((error = namei(&nd)) != 0)
2197 		return (error);
2198 	vp = nd.ni_vp;
2199 	error = change_flags(vp, SCARG(uap, flags), p);
2200 	vput(vp);
2201 	return (error);
2202 }
2203 
2204 /*
2205  * Change flags of a file given a file descriptor.
2206  */
2207 /* ARGSUSED */
2208 int
2209 sys_fchflags(l, v, retval)
2210 	struct lwp *l;
2211 	void *v;
2212 	register_t *retval;
2213 {
2214 	struct sys_fchflags_args /* {
2215 		syscallarg(int) fd;
2216 		syscallarg(u_long) flags;
2217 	} */ *uap = v;
2218 	struct proc *p = l->l_proc;
2219 	struct vnode *vp;
2220 	struct file *fp;
2221 	int error;
2222 
2223 	/* getvnode() will use the descriptor for us */
2224 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2225 		return (error);
2226 	vp = (struct vnode *)fp->f_data;
2227 	error = change_flags(vp, SCARG(uap, flags), p);
2228 	VOP_UNLOCK(vp, 0);
2229 	FILE_UNUSE(fp, p);
2230 	return (error);
2231 }
2232 
2233 /*
2234  * Change flags of a file given a path name; this version does
2235  * not follow links.
2236  */
2237 int
2238 sys_lchflags(l, v, retval)
2239 	struct lwp *l;
2240 	void *v;
2241 	register_t *retval;
2242 {
2243 	struct sys_lchflags_args /* {
2244 		syscallarg(const char *) path;
2245 		syscallarg(u_long) flags;
2246 	} */ *uap = v;
2247 	struct proc *p = l->l_proc;
2248 	struct vnode *vp;
2249 	int error;
2250 	struct nameidata nd;
2251 
2252 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2253 	if ((error = namei(&nd)) != 0)
2254 		return (error);
2255 	vp = nd.ni_vp;
2256 	error = change_flags(vp, SCARG(uap, flags), p);
2257 	vput(vp);
2258 	return (error);
2259 }
2260 
2261 /*
2262  * Common routine to change flags of a file.
2263  */
2264 int
2265 change_flags(vp, flags, p)
2266 	struct vnode *vp;
2267 	u_long flags;
2268 	struct proc *p;
2269 {
2270 	struct vattr vattr;
2271 	int error;
2272 
2273 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2274 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2275 	/*
2276 	 * Non-superusers cannot change the flags on devices, even if they
2277 	 * own them.
2278 	 */
2279 	if (suser(p->p_ucred, &p->p_acflag) != 0) {
2280 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2281 			goto out;
2282 		if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2283 			error = EINVAL;
2284 			goto out;
2285 		}
2286 	}
2287 	VATTR_NULL(&vattr);
2288 	vattr.va_flags = flags;
2289 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2290 out:
2291 	return (error);
2292 }
2293 
2294 /*
2295  * Change mode of a file given path name; this version follows links.
2296  */
2297 /* ARGSUSED */
2298 int
2299 sys_chmod(l, v, retval)
2300 	struct lwp *l;
2301 	void *v;
2302 	register_t *retval;
2303 {
2304 	struct sys_chmod_args /* {
2305 		syscallarg(const char *) path;
2306 		syscallarg(int) mode;
2307 	} */ *uap = v;
2308 	struct proc *p = l->l_proc;
2309 	int error;
2310 	struct nameidata nd;
2311 
2312 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2313 	if ((error = namei(&nd)) != 0)
2314 		return (error);
2315 
2316 	error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
2317 
2318 	vrele(nd.ni_vp);
2319 	return (error);
2320 }
2321 
2322 /*
2323  * Change mode of a file given a file descriptor.
2324  */
2325 /* ARGSUSED */
2326 int
2327 sys_fchmod(l, v, retval)
2328 	struct lwp *l;
2329 	void *v;
2330 	register_t *retval;
2331 {
2332 	struct sys_fchmod_args /* {
2333 		syscallarg(int) fd;
2334 		syscallarg(int) mode;
2335 	} */ *uap = v;
2336 	struct proc *p = l->l_proc;
2337 	struct file *fp;
2338 	int error;
2339 
2340 	/* getvnode() will use the descriptor for us */
2341 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2342 		return (error);
2343 
2344 	error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), p);
2345 	FILE_UNUSE(fp, p);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * Change mode of a file given path name; this version does not follow links.
2351  */
2352 /* ARGSUSED */
2353 int
2354 sys_lchmod(l, v, retval)
2355 	struct lwp *l;
2356 	void *v;
2357 	register_t *retval;
2358 {
2359 	struct sys_lchmod_args /* {
2360 		syscallarg(const char *) path;
2361 		syscallarg(int) mode;
2362 	} */ *uap = v;
2363 	struct proc *p = l->l_proc;
2364 	int error;
2365 	struct nameidata nd;
2366 
2367 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2368 	if ((error = namei(&nd)) != 0)
2369 		return (error);
2370 
2371 	error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
2372 
2373 	vrele(nd.ni_vp);
2374 	return (error);
2375 }
2376 
2377 /*
2378  * Common routine to set mode given a vnode.
2379  */
2380 static int
2381 change_mode(vp, mode, p)
2382 	struct vnode *vp;
2383 	int mode;
2384 	struct proc *p;
2385 {
2386 	struct vattr vattr;
2387 	int error;
2388 
2389 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2390 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2391 	VATTR_NULL(&vattr);
2392 	vattr.va_mode = mode & ALLPERMS;
2393 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2394 	VOP_UNLOCK(vp, 0);
2395 	return (error);
2396 }
2397 
2398 /*
2399  * Set ownership given a path name; this version follows links.
2400  */
2401 /* ARGSUSED */
2402 int
2403 sys_chown(l, v, retval)
2404 	struct lwp *l;
2405 	void *v;
2406 	register_t *retval;
2407 {
2408 	struct sys_chown_args /* {
2409 		syscallarg(const char *) path;
2410 		syscallarg(uid_t) uid;
2411 		syscallarg(gid_t) gid;
2412 	} */ *uap = v;
2413 	struct proc *p = l->l_proc;
2414 	int error;
2415 	struct nameidata nd;
2416 
2417 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2418 	if ((error = namei(&nd)) != 0)
2419 		return (error);
2420 
2421 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
2422 
2423 	vrele(nd.ni_vp);
2424 	return (error);
2425 }
2426 
2427 /*
2428  * Set ownership given a path name; this version follows links.
2429  * Provides POSIX semantics.
2430  */
2431 /* ARGSUSED */
2432 int
2433 sys___posix_chown(l, v, retval)
2434 	struct lwp *l;
2435 	void *v;
2436 	register_t *retval;
2437 {
2438 	struct sys_chown_args /* {
2439 		syscallarg(const char *) path;
2440 		syscallarg(uid_t) uid;
2441 		syscallarg(gid_t) gid;
2442 	} */ *uap = v;
2443 	struct proc *p = l->l_proc;
2444 	int error;
2445 	struct nameidata nd;
2446 
2447 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2448 	if ((error = namei(&nd)) != 0)
2449 		return (error);
2450 
2451 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
2452 
2453 	vrele(nd.ni_vp);
2454 	return (error);
2455 }
2456 
2457 /*
2458  * Set ownership given a file descriptor.
2459  */
2460 /* ARGSUSED */
2461 int
2462 sys_fchown(l, v, retval)
2463 	struct lwp *l;
2464 	void *v;
2465 	register_t *retval;
2466 {
2467 	struct sys_fchown_args /* {
2468 		syscallarg(int) fd;
2469 		syscallarg(uid_t) uid;
2470 		syscallarg(gid_t) gid;
2471 	} */ *uap = v;
2472 	struct proc *p = l->l_proc;
2473 	int error;
2474 	struct file *fp;
2475 
2476 	/* getvnode() will use the descriptor for us */
2477 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2478 		return (error);
2479 
2480 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2481 	    SCARG(uap, gid), p, 0);
2482 	FILE_UNUSE(fp, p);
2483 	return (error);
2484 }
2485 
2486 /*
2487  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2488  */
2489 /* ARGSUSED */
2490 int
2491 sys___posix_fchown(l, v, retval)
2492 	struct lwp *l;
2493 	void *v;
2494 	register_t *retval;
2495 {
2496 	struct sys_fchown_args /* {
2497 		syscallarg(int) fd;
2498 		syscallarg(uid_t) uid;
2499 		syscallarg(gid_t) gid;
2500 	} */ *uap = v;
2501 	struct proc *p = l->l_proc;
2502 	int error;
2503 	struct file *fp;
2504 
2505 	/* getvnode() will use the descriptor for us */
2506 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2507 		return (error);
2508 
2509 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2510 	    SCARG(uap, gid), p, 1);
2511 	FILE_UNUSE(fp, p);
2512 	return (error);
2513 }
2514 
2515 /*
2516  * Set ownership given a path name; this version does not follow links.
2517  */
2518 /* ARGSUSED */
2519 int
2520 sys_lchown(l, v, retval)
2521 	struct lwp *l;
2522 	void *v;
2523 	register_t *retval;
2524 {
2525 	struct sys_lchown_args /* {
2526 		syscallarg(const char *) path;
2527 		syscallarg(uid_t) uid;
2528 		syscallarg(gid_t) gid;
2529 	} */ *uap = v;
2530 	struct proc *p = l->l_proc;
2531 	int error;
2532 	struct nameidata nd;
2533 
2534 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2535 	if ((error = namei(&nd)) != 0)
2536 		return (error);
2537 
2538 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
2539 
2540 	vrele(nd.ni_vp);
2541 	return (error);
2542 }
2543 
2544 /*
2545  * Set ownership given a path name; this version does not follow links.
2546  * Provides POSIX/XPG semantics.
2547  */
2548 /* ARGSUSED */
2549 int
2550 sys___posix_lchown(l, v, retval)
2551 	struct lwp *l;
2552 	void *v;
2553 	register_t *retval;
2554 {
2555 	struct sys_lchown_args /* {
2556 		syscallarg(const char *) path;
2557 		syscallarg(uid_t) uid;
2558 		syscallarg(gid_t) gid;
2559 	} */ *uap = v;
2560 	struct proc *p = l->l_proc;
2561 	int error;
2562 	struct nameidata nd;
2563 
2564 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2565 	if ((error = namei(&nd)) != 0)
2566 		return (error);
2567 
2568 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
2569 
2570 	vrele(nd.ni_vp);
2571 	return (error);
2572 }
2573 
2574 /*
2575  * Common routine to set ownership given a vnode.
2576  */
2577 static int
2578 change_owner(vp, uid, gid, p, posix_semantics)
2579 	struct vnode *vp;
2580 	uid_t uid;
2581 	gid_t gid;
2582 	struct proc *p;
2583 	int posix_semantics;
2584 {
2585 	struct vattr vattr;
2586 	mode_t newmode;
2587 	int error;
2588 
2589 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2590 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2591 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2592 		goto out;
2593 
2594 #define CHANGED(x) ((int)(x) != -1)
2595 	newmode = vattr.va_mode;
2596 	if (posix_semantics) {
2597 		/*
2598 		 * POSIX/XPG semantics: if the caller is not the super-user,
2599 		 * clear set-user-id and set-group-id bits.  Both POSIX and
2600 		 * the XPG consider the behaviour for calls by the super-user
2601 		 * implementation-defined; we leave the set-user-id and set-
2602 		 * group-id settings intact in that case.
2603 		 */
2604 		if (suser(p->p_ucred, NULL) != 0)
2605 			newmode &= ~(S_ISUID | S_ISGID);
2606 	} else {
2607 		/*
2608 		 * NetBSD semantics: when changing owner and/or group,
2609 		 * clear the respective bit(s).
2610 		 */
2611 		if (CHANGED(uid))
2612 			newmode &= ~S_ISUID;
2613 		if (CHANGED(gid))
2614 			newmode &= ~S_ISGID;
2615 	}
2616 	/* Update va_mode iff altered. */
2617 	if (vattr.va_mode == newmode)
2618 		newmode = VNOVAL;
2619 
2620 	VATTR_NULL(&vattr);
2621 	vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2622 	vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2623 	vattr.va_mode = newmode;
2624 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2625 #undef CHANGED
2626 
2627 out:
2628 	VOP_UNLOCK(vp, 0);
2629 	return (error);
2630 }
2631 
2632 /*
2633  * Set the access and modification times given a path name; this
2634  * version follows links.
2635  */
2636 /* ARGSUSED */
2637 int
2638 sys_utimes(l, v, retval)
2639 	struct lwp *l;
2640 	void *v;
2641 	register_t *retval;
2642 {
2643 	struct sys_utimes_args /* {
2644 		syscallarg(const char *) path;
2645 		syscallarg(const struct timeval *) tptr;
2646 	} */ *uap = v;
2647 	struct proc *p = l->l_proc;
2648 	int error;
2649 	struct nameidata nd;
2650 
2651 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2652 	if ((error = namei(&nd)) != 0)
2653 		return (error);
2654 
2655 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
2656 
2657 	vrele(nd.ni_vp);
2658 	return (error);
2659 }
2660 
2661 /*
2662  * Set the access and modification times given a file descriptor.
2663  */
2664 /* ARGSUSED */
2665 int
2666 sys_futimes(l, v, retval)
2667 	struct lwp *l;
2668 	void *v;
2669 	register_t *retval;
2670 {
2671 	struct sys_futimes_args /* {
2672 		syscallarg(int) fd;
2673 		syscallarg(const struct timeval *) tptr;
2674 	} */ *uap = v;
2675 	struct proc *p = l->l_proc;
2676 	int error;
2677 	struct file *fp;
2678 
2679 	/* getvnode() will use the descriptor for us */
2680 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2681 		return (error);
2682 
2683 	error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), p);
2684 	FILE_UNUSE(fp, p);
2685 	return (error);
2686 }
2687 
2688 /*
2689  * Set the access and modification times given a path name; this
2690  * version does not follow links.
2691  */
2692 /* ARGSUSED */
2693 int
2694 sys_lutimes(l, v, retval)
2695 	struct lwp *l;
2696 	void *v;
2697 	register_t *retval;
2698 {
2699 	struct sys_lutimes_args /* {
2700 		syscallarg(const char *) path;
2701 		syscallarg(const struct timeval *) tptr;
2702 	} */ *uap = v;
2703 	struct proc *p = l->l_proc;
2704 	int error;
2705 	struct nameidata nd;
2706 
2707 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2708 	if ((error = namei(&nd)) != 0)
2709 		return (error);
2710 
2711 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
2712 
2713 	vrele(nd.ni_vp);
2714 	return (error);
2715 }
2716 
2717 /*
2718  * Common routine to set access and modification times given a vnode.
2719  */
2720 static int
2721 change_utimes(vp, tptr, p)
2722 	struct vnode *vp;
2723 	const struct timeval *tptr;
2724 	struct proc *p;
2725 {
2726 	struct timeval tv[2];
2727 	struct vattr vattr;
2728 	int error;
2729 
2730 	VATTR_NULL(&vattr);
2731 	if (tptr == NULL) {
2732 		microtime(&tv[0]);
2733 		tv[1] = tv[0];
2734 		vattr.va_vaflags |= VA_UTIMES_NULL;
2735 	} else {
2736 		error = copyin(tptr, tv, sizeof(tv));
2737 		if (error)
2738 			return (error);
2739 	}
2740 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2741 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2742 	vattr.va_atime.tv_sec = tv[0].tv_sec;
2743 	vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
2744 	vattr.va_mtime.tv_sec = tv[1].tv_sec;
2745 	vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
2746 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2747 	VOP_UNLOCK(vp, 0);
2748 	return (error);
2749 }
2750 
2751 /*
2752  * Truncate a file given its path name.
2753  */
2754 /* ARGSUSED */
2755 int
2756 sys_truncate(l, v, retval)
2757 	struct lwp *l;
2758 	void *v;
2759 	register_t *retval;
2760 {
2761 	struct sys_truncate_args /* {
2762 		syscallarg(const char *) path;
2763 		syscallarg(int) pad;
2764 		syscallarg(off_t) length;
2765 	} */ *uap = v;
2766 	struct proc *p = l->l_proc;
2767 	struct vnode *vp;
2768 	struct vattr vattr;
2769 	int error;
2770 	struct nameidata nd;
2771 
2772 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2773 	if ((error = namei(&nd)) != 0)
2774 		return (error);
2775 	vp = nd.ni_vp;
2776 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2777 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2778 	if (vp->v_type == VDIR)
2779 		error = EISDIR;
2780 	else if ((error = vn_writechk(vp)) == 0 &&
2781 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2782 		VATTR_NULL(&vattr);
2783 		vattr.va_size = SCARG(uap, length);
2784 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2785 	}
2786 	vput(vp);
2787 	return (error);
2788 }
2789 
2790 /*
2791  * Truncate a file given a file descriptor.
2792  */
2793 /* ARGSUSED */
2794 int
2795 sys_ftruncate(l, v, retval)
2796 	struct lwp *l;
2797 	void *v;
2798 	register_t *retval;
2799 {
2800 	struct sys_ftruncate_args /* {
2801 		syscallarg(int) fd;
2802 		syscallarg(int) pad;
2803 		syscallarg(off_t) length;
2804 	} */ *uap = v;
2805 	struct proc *p = l->l_proc;
2806 	struct vattr vattr;
2807 	struct vnode *vp;
2808 	struct file *fp;
2809 	int error;
2810 
2811 	/* getvnode() will use the descriptor for us */
2812 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2813 		return (error);
2814 	if ((fp->f_flag & FWRITE) == 0) {
2815 		error = EINVAL;
2816 		goto out;
2817 	}
2818 	vp = (struct vnode *)fp->f_data;
2819 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2820 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2821 	if (vp->v_type == VDIR)
2822 		error = EISDIR;
2823 	else if ((error = vn_writechk(vp)) == 0) {
2824 		VATTR_NULL(&vattr);
2825 		vattr.va_size = SCARG(uap, length);
2826 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2827 	}
2828 	VOP_UNLOCK(vp, 0);
2829  out:
2830 	FILE_UNUSE(fp, p);
2831 	return (error);
2832 }
2833 
2834 /*
2835  * Sync an open file.
2836  */
2837 /* ARGSUSED */
2838 int
2839 sys_fsync(l, v, retval)
2840 	struct lwp *l;
2841 	void *v;
2842 	register_t *retval;
2843 {
2844 	struct sys_fsync_args /* {
2845 		syscallarg(int) fd;
2846 	} */ *uap = v;
2847 	struct proc *p = l->l_proc;
2848 	struct vnode *vp;
2849 	struct file *fp;
2850 	int error;
2851 
2852 	/* getvnode() will use the descriptor for us */
2853 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2854 		return (error);
2855 	vp = (struct vnode *)fp->f_data;
2856 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2857 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, p);
2858 	if (error == 0 && bioops.io_fsync != NULL &&
2859 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2860 		(*bioops.io_fsync)(vp);
2861 	VOP_UNLOCK(vp, 0);
2862 	FILE_UNUSE(fp, p);
2863 	return (error);
2864 }
2865 
2866 /*
2867  * Sync the data of an open file.
2868  */
2869 /* ARGSUSED */
2870 int
2871 sys_fdatasync(l, v, retval)
2872 	struct lwp *l;
2873 	void *v;
2874 	register_t *retval;
2875 {
2876 	struct sys_fdatasync_args /* {
2877 		syscallarg(int) fd;
2878 	} */ *uap = v;
2879 	struct proc *p = l->l_proc;
2880 	struct vnode *vp;
2881 	struct file *fp;
2882 	int error;
2883 
2884 	/* getvnode() will use the descriptor for us */
2885 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2886 		return (error);
2887 	vp = (struct vnode *)fp->f_data;
2888 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2889 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, p);
2890 	VOP_UNLOCK(vp, 0);
2891 	FILE_UNUSE(fp, p);
2892 	return (error);
2893 }
2894 
2895 /*
2896  * Rename files, (standard) BSD semantics frontend.
2897  */
2898 /* ARGSUSED */
2899 int
2900 sys_rename(l, v, retval)
2901 	struct lwp *l;
2902 	void *v;
2903 	register_t *retval;
2904 {
2905 	struct sys_rename_args /* {
2906 		syscallarg(const char *) from;
2907 		syscallarg(const char *) to;
2908 	} */ *uap = v;
2909 	struct proc *p = l->l_proc;
2910 
2911 	return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 0));
2912 }
2913 
2914 /*
2915  * Rename files, POSIX semantics frontend.
2916  */
2917 /* ARGSUSED */
2918 int
2919 sys___posix_rename(l, v, retval)
2920 	struct lwp *l;
2921 	void *v;
2922 	register_t *retval;
2923 {
2924 	struct sys___posix_rename_args /* {
2925 		syscallarg(const char *) from;
2926 		syscallarg(const char *) to;
2927 	} */ *uap = v;
2928 	struct proc *p = l->l_proc;
2929 
2930 	return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 1));
2931 }
2932 
2933 /*
2934  * Rename files.  Source and destination must either both be directories,
2935  * or both not be directories.  If target is a directory, it must be empty.
2936  * If `from' and `to' refer to the same object, the value of the `retain'
2937  * argument is used to determine whether `from' will be
2938  *
2939  * (retain == 0)	deleted unless `from' and `to' refer to the same
2940  *			object in the file system's name space (BSD).
2941  * (retain == 1)	always retained (POSIX).
2942  */
2943 static int
2944 rename_files(from, to, p, retain)
2945 	const char *from, *to;
2946 	struct proc *p;
2947 	int retain;
2948 {
2949 	struct vnode *tvp, *fvp, *tdvp;
2950 	struct nameidata fromnd, tond;
2951 	int error;
2952 
2953 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2954 	    from, p);
2955 	if ((error = namei(&fromnd)) != 0)
2956 		return (error);
2957 	fvp = fromnd.ni_vp;
2958 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
2959 	    UIO_USERSPACE, to, p);
2960 	if ((error = namei(&tond)) != 0) {
2961 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2962 		vrele(fromnd.ni_dvp);
2963 		vrele(fvp);
2964 		goto out1;
2965 	}
2966 	tdvp = tond.ni_dvp;
2967 	tvp = tond.ni_vp;
2968 
2969 	if (tvp != NULL) {
2970 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2971 			error = ENOTDIR;
2972 			goto out;
2973 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2974 			error = EISDIR;
2975 			goto out;
2976 		}
2977 	}
2978 
2979 	if (fvp == tdvp)
2980 		error = EINVAL;
2981 
2982 	/*
2983 	 * Source and destination refer to the same object.
2984 	 */
2985 	if (fvp == tvp) {
2986 		if (retain)
2987 			error = -1;
2988 		else if (fromnd.ni_dvp == tdvp &&
2989 		    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2990 		    !memcmp(fromnd.ni_cnd.cn_nameptr,
2991 		          tond.ni_cnd.cn_nameptr,
2992 		          fromnd.ni_cnd.cn_namelen))
2993 		error = -1;
2994 	}
2995 
2996 out:
2997 	if (!error) {
2998 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2999 		if (fromnd.ni_dvp != tdvp)
3000 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3001 		if (tvp) {
3002 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
3003 		}
3004 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3005 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3006 	} else {
3007 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3008 		if (tdvp == tvp)
3009 			vrele(tdvp);
3010 		else
3011 			vput(tdvp);
3012 		if (tvp)
3013 			vput(tvp);
3014 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3015 		vrele(fromnd.ni_dvp);
3016 		vrele(fvp);
3017 	}
3018 	vrele(tond.ni_startdir);
3019 	PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3020 out1:
3021 	if (fromnd.ni_startdir)
3022 		vrele(fromnd.ni_startdir);
3023 	PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3024 	return (error == -1 ? 0 : error);
3025 }
3026 
3027 /*
3028  * Make a directory file.
3029  */
3030 /* ARGSUSED */
3031 int
3032 sys_mkdir(l, v, retval)
3033 	struct lwp *l;
3034 	void *v;
3035 	register_t *retval;
3036 {
3037 	struct sys_mkdir_args /* {
3038 		syscallarg(const char *) path;
3039 		syscallarg(int) mode;
3040 	} */ *uap = v;
3041 	struct proc *p = l->l_proc;
3042 	struct vnode *vp;
3043 	struct vattr vattr;
3044 	int error;
3045 	struct nameidata nd;
3046 
3047 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
3048 	if ((error = namei(&nd)) != 0)
3049 		return (error);
3050 	vp = nd.ni_vp;
3051 	if (vp != NULL) {
3052 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3053 		if (nd.ni_dvp == vp)
3054 			vrele(nd.ni_dvp);
3055 		else
3056 			vput(nd.ni_dvp);
3057 		vrele(vp);
3058 		return (EEXIST);
3059 	}
3060 	VATTR_NULL(&vattr);
3061 	vattr.va_type = VDIR;
3062 	vattr.va_mode =
3063 	    (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3064 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3065 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3066 	if (!error)
3067 		vput(nd.ni_vp);
3068 	return (error);
3069 }
3070 
3071 /*
3072  * Remove a directory file.
3073  */
3074 /* ARGSUSED */
3075 int
3076 sys_rmdir(l, v, retval)
3077 	struct lwp *l;
3078 	void *v;
3079 	register_t *retval;
3080 {
3081 	struct sys_rmdir_args /* {
3082 		syscallarg(const char *) path;
3083 	} */ *uap = v;
3084 	struct proc *p = l->l_proc;
3085 	struct vnode *vp;
3086 	int error;
3087 	struct nameidata nd;
3088 
3089 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3090 	    SCARG(uap, path), p);
3091 	if ((error = namei(&nd)) != 0)
3092 		return (error);
3093 	vp = nd.ni_vp;
3094 	if (vp->v_type != VDIR) {
3095 		error = ENOTDIR;
3096 		goto out;
3097 	}
3098 	/*
3099 	 * No rmdir "." please.
3100 	 */
3101 	if (nd.ni_dvp == vp) {
3102 		error = EINVAL;
3103 		goto out;
3104 	}
3105 	/*
3106 	 * The root of a mounted filesystem cannot be deleted.
3107 	 */
3108 	if (vp->v_flag & VROOT)
3109 		error = EBUSY;
3110 out:
3111 	if (!error) {
3112 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3113 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3114 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3115 	} else {
3116 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3117 		if (nd.ni_dvp == vp)
3118 			vrele(nd.ni_dvp);
3119 		else
3120 			vput(nd.ni_dvp);
3121 		vput(vp);
3122 	}
3123 	return (error);
3124 }
3125 
3126 /*
3127  * Read a block of directory entries in a file system independent format.
3128  */
3129 int
3130 sys_getdents(l, v, retval)
3131 	struct lwp *l;
3132 	void *v;
3133 	register_t *retval;
3134 {
3135 	struct sys_getdents_args /* {
3136 		syscallarg(int) fd;
3137 		syscallarg(char *) buf;
3138 		syscallarg(size_t) count;
3139 	} */ *uap = v;
3140 	struct proc *p = l->l_proc;
3141 	struct file *fp;
3142 	int error, done;
3143 
3144 	/* getvnode() will use the descriptor for us */
3145 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3146 		return (error);
3147 	if ((fp->f_flag & FREAD) == 0) {
3148 		error = EBADF;
3149 		goto out;
3150 	}
3151 	error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3152 			SCARG(uap, count), &done, p, 0, 0);
3153 	*retval = done;
3154  out:
3155 	FILE_UNUSE(fp, p);
3156 	return (error);
3157 }
3158 
3159 /*
3160  * Set the mode mask for creation of filesystem nodes.
3161  */
3162 int
3163 sys_umask(l, v, retval)
3164 	struct lwp *l;
3165 	void *v;
3166 	register_t *retval;
3167 {
3168 	struct sys_umask_args /* {
3169 		syscallarg(mode_t) newmask;
3170 	} */ *uap = v;
3171 	struct proc *p = l->l_proc;
3172 	struct cwdinfo *cwdi;
3173 
3174 	cwdi = p->p_cwdi;
3175 	*retval = cwdi->cwdi_cmask;
3176 	cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3177 	return (0);
3178 }
3179 
3180 /*
3181  * Void all references to file by ripping underlying filesystem
3182  * away from vnode.
3183  */
3184 /* ARGSUSED */
3185 int
3186 sys_revoke(l, v, retval)
3187 	struct lwp *l;
3188 	void *v;
3189 	register_t *retval;
3190 {
3191 	struct sys_revoke_args /* {
3192 		syscallarg(const char *) path;
3193 	} */ *uap = v;
3194 	struct proc *p = l->l_proc;
3195 	struct vnode *vp;
3196 	struct vattr vattr;
3197 	int error;
3198 	struct nameidata nd;
3199 
3200 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3201 	if ((error = namei(&nd)) != 0)
3202 		return (error);
3203 	vp = nd.ni_vp;
3204 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3205 		goto out;
3206 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3207 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0)
3208 		goto out;
3209 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3210 		VOP_REVOKE(vp, REVOKEALL);
3211 out:
3212 	vrele(vp);
3213 	return (error);
3214 }
3215 
3216 /*
3217  * Convert a user file descriptor to a kernel file entry.
3218  */
3219 int
3220 getvnode(fdp, fd, fpp)
3221 	struct filedesc *fdp;
3222 	int fd;
3223 	struct file **fpp;
3224 {
3225 	struct vnode *vp;
3226 	struct file *fp;
3227 
3228 	if ((fp = fd_getfile(fdp, fd)) == NULL)
3229 		return (EBADF);
3230 
3231 	FILE_USE(fp);
3232 
3233 	if (fp->f_type != DTYPE_VNODE) {
3234 		FILE_UNUSE(fp, NULL);
3235 		return (EINVAL);
3236 	}
3237 
3238 	vp = (struct vnode *)fp->f_data;
3239 	if (vp->v_type == VBAD) {
3240 		FILE_UNUSE(fp, NULL);
3241 		return (EBADF);
3242 	}
3243 
3244 	*fpp = fp;
3245 	return (0);
3246 }
3247