xref: /netbsd-src/sys/kern/vfs_syscalls.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /*	$NetBSD: vfs_syscalls.c,v 1.187 2003/05/16 14:25:03 itojun Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)vfs_syscalls.c	8.42 (Berkeley) 7/31/95
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.187 2003/05/16 14:25:03 itojun Exp $");
45 
46 #include "opt_compat_netbsd.h"
47 #include "opt_compat_43.h"
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/dirent.h>
62 #include <sys/sysctl.h>
63 #include <sys/sa.h>
64 #include <sys/syscallargs.h>
65 
66 #include <miscfs/genfs/genfs.h>
67 #include <miscfs/syncfs/syncfs.h>
68 
69 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
70 
71 static int change_dir __P((struct nameidata *, struct proc *));
72 static int change_flags __P((struct vnode *, u_long, struct proc *));
73 static int change_mode __P((struct vnode *, int, struct proc *p));
74 static int change_owner __P((struct vnode *, uid_t, gid_t, struct proc *,
75     int));
76 static int change_utimes __P((struct vnode *vp, const struct timeval *,
77 	       struct proc *p));
78 static int rename_files __P((const char *, const char *, struct proc *, int));
79 static int dostatfs __P((struct mount *, struct statfs *, struct proc *, int,
80     int));
81 
82 void checkdirs __P((struct vnode *));
83 
84 int dovfsusermount = 0;
85 
86 /*
87  * Virtual File System System Calls
88  */
89 
90 /*
91  * Mount a file system.
92  */
93 
94 #if defined(COMPAT_09) || defined(COMPAT_43)
95 /*
96  * This table is used to maintain compatibility with 4.3BSD
97  * and NetBSD 0.9 mount syscalls.  Note, the order is important!
98  *
99  * Do not modify this table. It should only contain filesystems
100  * supported by NetBSD 0.9 and 4.3BSD.
101  */
102 const char * const mountcompatnames[] = {
103 	NULL,		/* 0 = MOUNT_NONE */
104 	MOUNT_FFS,	/* 1 = MOUNT_UFS */
105 	MOUNT_NFS,	/* 2 */
106 	MOUNT_MFS,	/* 3 */
107 	MOUNT_MSDOS,	/* 4 */
108 	MOUNT_CD9660,	/* 5 = MOUNT_ISOFS */
109 	MOUNT_FDESC,	/* 6 */
110 	MOUNT_KERNFS,	/* 7 */
111 	NULL,		/* 8 = MOUNT_DEVFS */
112 	MOUNT_AFS,	/* 9 */
113 };
114 const int nmountcompatnames = sizeof(mountcompatnames) /
115     sizeof(mountcompatnames[0]);
116 #endif /* COMPAT_09 || COMPAT_43 */
117 
118 /* ARGSUSED */
119 int
120 sys_mount(l, v, retval)
121 	struct lwp *l;
122 	void *v;
123 	register_t *retval;
124 {
125 	struct sys_mount_args /* {
126 		syscallarg(const char *) type;
127 		syscallarg(const char *) path;
128 		syscallarg(int) flags;
129 		syscallarg(void *) data;
130 	} */ *uap = v;
131 	struct proc *p = l->l_proc;
132 	struct vnode *vp;
133 	struct mount *mp;
134 	int error, flag = 0;
135 	char fstypename[MFSNAMELEN];
136 	struct vattr va;
137 	struct nameidata nd;
138 	struct vfsops *vfs;
139 
140 	if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
141 	    (error = suser(p->p_ucred, &p->p_acflag)))
142 		return (error);
143 	/*
144 	 * Get vnode to be covered
145 	 */
146 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
147 	    SCARG(uap, path), p);
148 	if ((error = namei(&nd)) != 0)
149 		return (error);
150 	vp = nd.ni_vp;
151 	/*
152 	 * A lookup in VFS_MOUNT might result in an attempt to
153 	 * lock this vnode again, so make the lock recursive.
154 	 */
155 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
156 	if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
157 		if ((vp->v_flag & VROOT) == 0) {
158 			vput(vp);
159 			return (EINVAL);
160 		}
161 		mp = vp->v_mount;
162 		flag = mp->mnt_flag;
163 		vfs = mp->mnt_op;
164 		/*
165 		 * We only allow the filesystem to be reloaded if it
166 		 * is currently mounted read-only.
167 		 */
168 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
169 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
170 			vput(vp);
171 			return (EOPNOTSUPP);	/* Needs translation */
172 		}
173 		/*
174 		 * In "highly secure" mode, don't let the caller do anything
175 		 * but downgrade a filesystem from read-write to read-only.
176 		 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
177 		 */
178 		if (securelevel >= 2 &&
179 		    SCARG(uap, flags) != MNT_GETARGS &&
180 		    SCARG(uap, flags) !=
181 		    (mp->mnt_flag | MNT_RDONLY |
182 		    MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
183 			vput(vp);
184 			return (EPERM);
185 		}
186 		mp->mnt_flag |= SCARG(uap, flags) &
187 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
188 		/*
189 		 * Only root, or the user that did the original mount is
190 		 * permitted to update it.
191 		 */
192 		if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
193 		    mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
194 		    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
195 			vput(vp);
196 			return (error);
197 		}
198 		/*
199 		 * Do not allow NFS export by non-root users. For non-root
200 		 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
201 		 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
202 		 */
203 		if (p->p_ucred->cr_uid != 0) {
204 			if (SCARG(uap, flags) & MNT_EXPORTED) {
205 				vput(vp);
206 				return (EPERM);
207 			}
208 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
209 			if (flag & MNT_NOEXEC)
210 				SCARG(uap, flags) |= MNT_NOEXEC;
211 		}
212 		if (vfs_busy(mp, LK_NOWAIT, 0)) {
213 			vput(vp);
214 			return (EPERM);
215 		}
216 		goto update;
217 	} else {
218 		if (securelevel >= 2) {
219 			vput(vp);
220 			return (EPERM);
221 		}
222 	}
223 	/*
224 	 * If the user is not root, ensure that they own the directory
225 	 * onto which we are attempting to mount.
226 	 */
227 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0 ||
228 	    (va.va_uid != p->p_ucred->cr_uid &&
229 		(error = suser(p->p_ucred, &p->p_acflag)) != 0)) {
230 		vput(vp);
231 		return (error);
232 	}
233 	/*
234 	 * Do not allow NFS export by non-root users. For non-root users,
235 	 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
236 	 * mount point is already MNT_NOEXEC.
237 	 */
238 	if (p->p_ucred->cr_uid != 0) {
239 		if (SCARG(uap, flags) & MNT_EXPORTED) {
240 			vput(vp);
241 			return (EPERM);
242 		}
243 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
244 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
245 			SCARG(uap, flags) |= MNT_NOEXEC;
246 	}
247 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
248 		return (error);
249 	if (vp->v_type != VDIR) {
250 		vput(vp);
251 		return (ENOTDIR);
252 	}
253 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
254 	if (error) {
255 #if defined(COMPAT_09) || defined(COMPAT_43)
256 		/*
257 		 * Historically filesystem types were identified by number.
258 		 * If we get an integer for the filesystem type instead of a
259 		 * string, we check to see if it matches one of the historic
260 		 * filesystem types.
261 		 */
262 		u_long fsindex = (u_long)SCARG(uap, type);
263 		if (fsindex >= nmountcompatnames ||
264 		    mountcompatnames[fsindex] == NULL) {
265 			vput(vp);
266 			return (ENODEV);
267 		}
268 		strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
269 #else
270 		vput(vp);
271 		return (error);
272 #endif
273 	}
274 #ifdef	COMPAT_10
275 	/* Accept `ufs' as an alias for `ffs'. */
276 	if (!strncmp(fstypename, "ufs", MFSNAMELEN))
277 		strncpy(fstypename, "ffs", MFSNAMELEN);
278 #endif
279 	if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
280 		vput(vp);
281 		return (ENODEV);
282 	}
283 	if (vp->v_mountedhere != NULL) {
284 		vput(vp);
285 		return (EBUSY);
286 	}
287 
288 	/*
289 	 * Allocate and initialize the file system.
290 	 */
291 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
292 		M_MOUNT, M_WAITOK);
293 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
294 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
295 	(void)vfs_busy(mp, LK_NOWAIT, 0);
296 	mp->mnt_op = vfs;
297 	vfs->vfs_refcount++;
298 	mp->mnt_vnodecovered = vp;
299 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
300 	mp->mnt_unmounter = NULL;
301 update:
302 	/*
303 	 * Set the mount level flags.
304 	 */
305 	if (SCARG(uap, flags) & MNT_RDONLY)
306 		mp->mnt_flag |= MNT_RDONLY;
307 	else if (mp->mnt_flag & MNT_RDONLY)
308 		mp->mnt_flag |= MNT_WANTRDWR;
309 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
310 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
311 	    MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
312 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
313 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
314 	    MNT_NOCOREDUMP | MNT_IGNORE | MNT_NOATIME | MNT_NODEVMTIME |
315 	    MNT_SYMPERM | MNT_SOFTDEP);
316 	/*
317 	 * Mount the filesystem.
318 	 */
319 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
320 	if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
321 		if (mp->mnt_flag & MNT_WANTRDWR)
322 			mp->mnt_flag &= ~MNT_RDONLY;
323 		if (error || (mp->mnt_flag & MNT_GETARGS))
324 			mp->mnt_flag = flag;
325 		mp->mnt_flag &=~
326 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR |
327 		     MNT_GETARGS);
328 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
329 			if (mp->mnt_syncer == NULL)
330 				error = vfs_allocate_syncvnode(mp);
331 		} else {
332 			if (mp->mnt_syncer != NULL)
333 				vfs_deallocate_syncvnode(mp);
334 		}
335 		vfs_unbusy(mp);
336 		VOP_UNLOCK(vp, 0);
337 		vrele(vp);
338 		return (error);
339 	}
340 	/*
341 	 * Put the new filesystem on the mount list after root.
342 	 */
343 	cache_purge(vp);
344 	if (!error) {
345 		vp->v_mountedhere = mp;
346 		simple_lock(&mountlist_slock);
347 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
348 		simple_unlock(&mountlist_slock);
349 		checkdirs(vp);
350 		VOP_UNLOCK(vp, 0);
351 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
352 			error = vfs_allocate_syncvnode(mp);
353 		vfs_unbusy(mp);
354 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
355 		if ((error = VFS_START(mp, 0, p)))
356 			vrele(vp);
357 	} else {
358 		vp->v_mountedhere = (struct mount *)0;
359 		vfs->vfs_refcount--;
360 		vfs_unbusy(mp);
361 		free(mp, M_MOUNT);
362 		vput(vp);
363 	}
364 	return (error);
365 }
366 
367 /*
368  * Scan all active processes to see if any of them have a current
369  * or root directory onto which the new filesystem has just been
370  * mounted. If so, replace them with the new mount point.
371  */
372 void
373 checkdirs(olddp)
374 	struct vnode *olddp;
375 {
376 	struct cwdinfo *cwdi;
377 	struct vnode *newdp;
378 	struct proc *p;
379 
380 	if (olddp->v_usecount == 1)
381 		return;
382 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
383 		panic("mount: lost mount");
384 	proclist_lock_read();
385 	LIST_FOREACH(p, &allproc, p_list) {
386 		cwdi = p->p_cwdi;
387 		if (cwdi->cwdi_cdir == olddp) {
388 			vrele(cwdi->cwdi_cdir);
389 			VREF(newdp);
390 			cwdi->cwdi_cdir = newdp;
391 		}
392 		if (cwdi->cwdi_rdir == olddp) {
393 			vrele(cwdi->cwdi_rdir);
394 			VREF(newdp);
395 			cwdi->cwdi_rdir = newdp;
396 		}
397 	}
398 	proclist_unlock_read();
399 	if (rootvnode == olddp) {
400 		vrele(rootvnode);
401 		VREF(newdp);
402 		rootvnode = newdp;
403 	}
404 	vput(newdp);
405 }
406 
407 /*
408  * Unmount a file system.
409  *
410  * Note: unmount takes a path to the vnode mounted on as argument,
411  * not special file (as before).
412  */
413 /* ARGSUSED */
414 int
415 sys_unmount(l, v, retval)
416 	struct lwp *l;
417 	void *v;
418 	register_t *retval;
419 {
420 	struct sys_unmount_args /* {
421 		syscallarg(const char *) path;
422 		syscallarg(int) flags;
423 	} */ *uap = v;
424 	struct proc *p = l->l_proc;
425 	struct vnode *vp;
426 	struct mount *mp;
427 	int error;
428 	struct nameidata nd;
429 
430 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
431 	    SCARG(uap, path), p);
432 	if ((error = namei(&nd)) != 0)
433 		return (error);
434 	vp = nd.ni_vp;
435 	mp = vp->v_mount;
436 
437 	/*
438 	 * Only root, or the user that did the original mount is
439 	 * permitted to unmount this filesystem.
440 	 */
441 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
442 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0) {
443 		vput(vp);
444 		return (error);
445 	}
446 
447 	/*
448 	 * Don't allow unmounting the root file system.
449 	 */
450 	if (mp->mnt_flag & MNT_ROOTFS) {
451 		vput(vp);
452 		return (EINVAL);
453 	}
454 
455 	/*
456 	 * Must be the root of the filesystem
457 	 */
458 	if ((vp->v_flag & VROOT) == 0) {
459 		vput(vp);
460 		return (EINVAL);
461 	}
462 	vput(vp);
463 
464 	/*
465 	 * XXX Freeze syncer.  Must do this before locking the
466 	 * mount point.  See dounmount() for details.
467 	 */
468 	lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
469 
470 	if (vfs_busy(mp, 0, 0)) {
471 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
472 		return (EBUSY);
473 	}
474 
475 	return (dounmount(mp, SCARG(uap, flags), p));
476 }
477 
478 /*
479  * Do the actual file system unmount. File system is assumed to have been
480  * marked busy by the caller.
481  */
482 int
483 dounmount(mp, flags, p)
484 	struct mount *mp;
485 	int flags;
486 	struct proc *p;
487 {
488 	struct vnode *coveredvp;
489 	int error;
490 	int async;
491 	int used_syncer;
492 
493 	simple_lock(&mountlist_slock);
494 	vfs_unbusy(mp);
495 	used_syncer = (mp->mnt_syncer != NULL);
496 
497 	/*
498 	 * XXX Syncer must be frozen when we get here.  This should really
499 	 * be done on a per-mountpoint basis, but especially the softdep
500 	 * code possibly called from the syncer doens't exactly work on a
501 	 * per-mountpoint basis, so the softdep code would become a maze
502 	 * of vfs_busy() calls.
503 	 *
504 	 * The caller of dounmount() must acquire syncer_lock because
505 	 * the syncer itself acquires locks in syncer_lock -> vfs_busy
506 	 * order, and we must preserve that order to avoid deadlock.
507 	 *
508 	 * So, if the file system did not use the syncer, now is
509 	 * the time to release the syncer_lock.
510 	 */
511 	if (used_syncer == 0)
512 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
513 
514 	mp->mnt_flag |= MNT_UNMOUNT;
515 	mp->mnt_unmounter = p;
516 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
517 	if (mp->mnt_flag & MNT_EXPUBLIC)
518 		vfs_setpublicfs(NULL, NULL, NULL);
519 	async = mp->mnt_flag & MNT_ASYNC;
520 	mp->mnt_flag &= ~MNT_ASYNC;
521 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
522 	if (mp->mnt_syncer != NULL)
523 		vfs_deallocate_syncvnode(mp);
524 	if (((mp->mnt_flag & MNT_RDONLY) ||
525 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
526 	    (flags & MNT_FORCE))
527 		error = VFS_UNMOUNT(mp, flags, p);
528 	simple_lock(&mountlist_slock);
529 	if (error) {
530 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
531 			(void) vfs_allocate_syncvnode(mp);
532 		mp->mnt_flag &= ~MNT_UNMOUNT;
533 		mp->mnt_unmounter = NULL;
534 		mp->mnt_flag |= async;
535 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
536 		    &mountlist_slock);
537 		if (used_syncer)
538 			lockmgr(&syncer_lock, LK_RELEASE, NULL);
539 		while (mp->mnt_wcnt > 0) {
540 			wakeup(mp);
541 			tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 0);
542 		}
543 		return (error);
544 	}
545 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
546 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
547 		coveredvp->v_mountedhere = NULL;
548 		vrele(coveredvp);
549 	}
550 	mp->mnt_op->vfs_refcount--;
551 	if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
552 		panic("unmount: dangling vnode");
553 	mp->mnt_flag |= MNT_GONE;
554 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
555 	if (used_syncer)
556 		lockmgr(&syncer_lock, LK_RELEASE, NULL);
557 	while(mp->mnt_wcnt > 0) {
558 		wakeup(mp);
559 		tsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0);
560 	}
561 	free(mp, M_MOUNT);
562 	return (0);
563 }
564 
565 /*
566  * Sync each mounted filesystem.
567  */
568 #ifdef DEBUG
569 int syncprt = 0;
570 struct ctldebug debug0 = { "syncprt", &syncprt };
571 #endif
572 
573 /* ARGSUSED */
574 int
575 sys_sync(l, v, retval)
576 	struct lwp *l;
577 	void *v;
578 	register_t *retval;
579 {
580 	struct mount *mp, *nmp;
581 	int asyncflag;
582 	struct proc *p = l == NULL ? &proc0 : l->l_proc;
583 
584 	simple_lock(&mountlist_slock);
585 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
586 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
587 			nmp = mp->mnt_list.cqe_prev;
588 			continue;
589 		}
590 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
591 			asyncflag = mp->mnt_flag & MNT_ASYNC;
592 			mp->mnt_flag &= ~MNT_ASYNC;
593 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
594 			if (asyncflag)
595 				 mp->mnt_flag |= MNT_ASYNC;
596 		}
597 		simple_lock(&mountlist_slock);
598 		nmp = mp->mnt_list.cqe_prev;
599 		vfs_unbusy(mp);
600 
601 	}
602 	simple_unlock(&mountlist_slock);
603 #ifdef DEBUG
604 	if (syncprt)
605 		vfs_bufstats();
606 #endif /* DEBUG */
607 	return (0);
608 }
609 
610 /*
611  * Change filesystem quotas.
612  */
613 /* ARGSUSED */
614 int
615 sys_quotactl(l, v, retval)
616 	struct lwp *l;
617 	void *v;
618 	register_t *retval;
619 {
620 	struct sys_quotactl_args /* {
621 		syscallarg(const char *) path;
622 		syscallarg(int) cmd;
623 		syscallarg(int) uid;
624 		syscallarg(caddr_t) arg;
625 	} */ *uap = v;
626 	struct proc *p = l->l_proc;
627 	struct mount *mp;
628 	int error;
629 	struct nameidata nd;
630 
631 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
632 	if ((error = namei(&nd)) != 0)
633 		return (error);
634 	mp = nd.ni_vp->v_mount;
635 	vrele(nd.ni_vp);
636 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
637 	    SCARG(uap, arg), p));
638 }
639 
640 static int
641 dostatfs(struct mount *mp, struct statfs *sp, struct proc *p, int flags,
642     int root)
643 {
644 	struct cwdinfo *cwdi = p->p_cwdi;
645 	int error = 0;
646 
647 	/*
648 	 * If MNT_NOWAIT or MNT_LAZY is specified, do not
649 	 * refresh the fsstat cache. MNT_WAIT or MNT_LAXY
650 	 * overrides MNT_NOWAIT.
651 	 */
652 	if (flags == MNT_NOWAIT	|| flags == MNT_LAZY ||
653 	    (flags != MNT_WAIT && flags != 0)) {
654 		memcpy(sp, &mp->mnt_stat, sizeof(*sp));
655 		goto done;
656 	}
657 
658 	if ((error = VFS_STATFS(mp, sp, p)) != 0) {
659 		return error;
660 	}
661 
662 	if (cwdi->cwdi_rdir == NULL)
663 		(void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
664 done:
665 	if (cwdi->cwdi_rdir != NULL) {
666 		size_t len;
667 		char *bp;
668 		char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
669 		if (!path)
670 			return ENOMEM;
671 
672 		bp = path + MAXPATHLEN;
673 		*--bp = '\0';
674 		error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
675 		    MAXPATHLEN / 2, 0, p);
676 		if (error) {
677 			free(path, M_TEMP);
678 			return error;
679 		}
680 		len = strlen(bp);
681 		/*
682 		 * for mount points that are below our root, we can see
683 		 * them, so we fix up the pathname and return them. The
684 		 * rest we cannot see, so we don't allow viewing the
685 		 * data.
686 		 */
687 		if (strncmp(bp, sp->f_mntonname, len) == 0) {
688 			strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
689 			    sizeof(sp->f_mntonname));
690 			if (sp->f_mntonname[0] == '\0')
691 				(void)strlcpy(sp->f_mntonname, "/",
692 				    sizeof(sp->f_mntonname));
693 		} else {
694 			if (root)
695 				(void)strlcpy(sp->f_mntonname, "/",
696 				    sizeof(sp->f_mntonname));
697 			else
698 				error = EPERM;
699 		}
700 		free(path, M_TEMP);
701 	}
702 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
703 	sp->f_oflags = sp->f_flags & 0xffff;
704 	return error;
705 }
706 
707 /*
708  * Get filesystem statistics.
709  */
710 /* ARGSUSED */
711 int
712 sys_statfs(l, v, retval)
713 	struct lwp *l;
714 	void *v;
715 	register_t *retval;
716 {
717 	struct sys_statfs_args /* {
718 		syscallarg(const char *) path;
719 		syscallarg(struct statfs *) buf;
720 	} */ *uap = v;
721 	struct proc *p = l->l_proc;
722 	struct mount *mp;
723 	struct statfs sbuf;
724 	int error;
725 	struct nameidata nd;
726 
727 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
728 	if ((error = namei(&nd)) != 0)
729 		return error;
730 	mp = nd.ni_vp->v_mount;
731 	vrele(nd.ni_vp);
732 	if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
733 		return error;
734 	return copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
735 }
736 
737 /*
738  * Get filesystem statistics.
739  */
740 /* ARGSUSED */
741 int
742 sys_fstatfs(l, v, retval)
743 	struct lwp *l;
744 	void *v;
745 	register_t *retval;
746 {
747 	struct sys_fstatfs_args /* {
748 		syscallarg(int) fd;
749 		syscallarg(struct statfs *) buf;
750 	} */ *uap = v;
751 	struct proc *p = l->l_proc;
752 	struct file *fp;
753 	struct mount *mp;
754 	struct statfs sbuf;
755 	int error;
756 
757 	/* getvnode() will use the descriptor for us */
758 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
759 		return (error);
760 	mp = ((struct vnode *)fp->f_data)->v_mount;
761 	if ((error = dostatfs(mp, &sbuf, p, 0, 1)) != 0)
762 		goto out;
763 	error = copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf));
764  out:
765 	FILE_UNUSE(fp, p);
766 	return error;
767 }
768 
769 
770 /*
771  * Get statistics on all filesystems.
772  */
773 int
774 sys_getfsstat(l, v, retval)
775 	struct lwp *l;
776 	void *v;
777 	register_t *retval;
778 {
779 	struct sys_getfsstat_args /* {
780 		syscallarg(struct statfs *) buf;
781 		syscallarg(long) bufsize;
782 		syscallarg(int) flags;
783 	} */ *uap = v;
784 	int root = 0;
785 	struct proc *p = l->l_proc;
786 	struct mount *mp, *nmp;
787 	struct statfs sbuf;
788 	caddr_t sfsp;
789 	long count, maxcount, error = 0;
790 
791 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
792 	sfsp = (caddr_t)SCARG(uap, buf);
793 	simple_lock(&mountlist_slock);
794 	count = 0;
795 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
796 	     mp = nmp) {
797 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
798 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
799 			continue;
800 		}
801 		if (sfsp && count < maxcount) {
802 			error = dostatfs(mp, &sbuf, p, SCARG(uap, flags), 0);
803 			if (error) {
804 				simple_lock(&mountlist_slock);
805 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
806 				vfs_unbusy(mp);
807 				continue;
808 			}
809 			error = copyout(&sbuf, sfsp, sizeof(sbuf));
810 			if (error) {
811 				vfs_unbusy(mp);
812 				return (error);
813 			}
814 			sfsp += sizeof(sbuf);
815 			root |= strcmp(sbuf.f_mntonname, "/") == 0;
816 		}
817 		count++;
818 		simple_lock(&mountlist_slock);
819 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
820 		vfs_unbusy(mp);
821 	}
822 	simple_unlock(&mountlist_slock);
823 	if (root == 0 && p->p_cwdi->cwdi_rdir) {
824 		/*
825 		 * fake a root entry
826 		 */
827 		if ((error = dostatfs(p->p_cwdi->cwdi_rdir->v_mount, &sbuf, p,
828 		    SCARG(uap, flags), 1)) != 0)
829 			return error;
830 		if (sfsp)
831 			error = copyout(&sbuf, sfsp, sizeof(sbuf));
832 		count++;
833 	}
834 	if (sfsp && count > maxcount)
835 		*retval = maxcount;
836 	else
837 		*retval = count;
838 	return error;
839 }
840 
841 /*
842  * Change current working directory to a given file descriptor.
843  */
844 /* ARGSUSED */
845 int
846 sys_fchdir(l, v, retval)
847 	struct lwp *l;
848 	void *v;
849 	register_t *retval;
850 {
851 	struct sys_fchdir_args /* {
852 		syscallarg(int) fd;
853 	} */ *uap = v;
854 	struct proc *p = l->l_proc;
855 	struct filedesc *fdp = p->p_fd;
856 	struct cwdinfo *cwdi = p->p_cwdi;
857 	struct vnode *vp, *tdp;
858 	struct mount *mp;
859 	struct file *fp;
860 	int error;
861 
862 	/* getvnode() will use the descriptor for us */
863 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
864 		return (error);
865 	vp = (struct vnode *)fp->f_data;
866 
867 	VREF(vp);
868 	vn_lock(vp,  LK_EXCLUSIVE | LK_RETRY);
869 	if (vp->v_type != VDIR)
870 		error = ENOTDIR;
871 	else
872 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
873 	while (!error && (mp = vp->v_mountedhere) != NULL) {
874 		if (vfs_busy(mp, 0, 0))
875 			continue;
876 		error = VFS_ROOT(mp, &tdp);
877 		vfs_unbusy(mp);
878 		if (error)
879 			break;
880 		vput(vp);
881 		vp = tdp;
882 	}
883 	if (error) {
884 		vput(vp);
885 		goto out;
886 	}
887 	VOP_UNLOCK(vp, 0);
888 
889 	/*
890 	 * Disallow changing to a directory not under the process's
891 	 * current root directory (if there is one).
892 	 */
893 	if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, p)) {
894 		vrele(vp);
895 		error = EPERM;	/* operation not permitted */
896 		goto out;
897 	}
898 
899 	vrele(cwdi->cwdi_cdir);
900 	cwdi->cwdi_cdir = vp;
901  out:
902 	FILE_UNUSE(fp, p);
903 	return (error);
904 }
905 
906 /*
907  * Change this process's notion of the root directory to a given file descriptor.
908  */
909 
910 int
911 sys_fchroot(l, v, retval)
912 	struct lwp *l;
913 	void *v;
914 	register_t *retval;
915 {
916 	struct sys_fchroot_args *uap = v;
917 	struct proc *p = l->l_proc;
918 	struct filedesc *fdp = p->p_fd;
919 	struct cwdinfo *cwdi = p->p_cwdi;
920 	struct vnode	*vp;
921 	struct file	*fp;
922 	int		 error;
923 
924 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
925 		return error;
926 	/* getvnode() will use the descriptor for us */
927 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
928 		return error;
929 	vp = (struct vnode *) fp->f_data;
930 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
931 	if (vp->v_type != VDIR)
932 		error = ENOTDIR;
933 	else
934 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
935 	VOP_UNLOCK(vp, 0);
936 	if (error)
937 		goto out;
938 	VREF(vp);
939 
940 	/*
941 	 * Prevent escaping from chroot by putting the root under
942 	 * the working directory.  Silently chdir to / if we aren't
943 	 * already there.
944 	 */
945 	if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
946 		/*
947 		 * XXX would be more failsafe to change directory to a
948 		 * deadfs node here instead
949 		 */
950 		vrele(cwdi->cwdi_cdir);
951 		VREF(vp);
952 		cwdi->cwdi_cdir = vp;
953 	}
954 
955 	if (cwdi->cwdi_rdir != NULL)
956 		vrele(cwdi->cwdi_rdir);
957 	cwdi->cwdi_rdir = vp;
958  out:
959 	FILE_UNUSE(fp, p);
960 	return (error);
961 }
962 
963 
964 
965 /*
966  * Change current working directory (``.'').
967  */
968 /* ARGSUSED */
969 int
970 sys_chdir(l, v, retval)
971 	struct lwp *l;
972 	void *v;
973 	register_t *retval;
974 {
975 	struct sys_chdir_args /* {
976 		syscallarg(const char *) path;
977 	} */ *uap = v;
978 	struct proc *p = l->l_proc;
979 	struct cwdinfo *cwdi = p->p_cwdi;
980 	int error;
981 	struct nameidata nd;
982 
983 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
984 	    SCARG(uap, path), p);
985 	if ((error = change_dir(&nd, p)) != 0)
986 		return (error);
987 	vrele(cwdi->cwdi_cdir);
988 	cwdi->cwdi_cdir = nd.ni_vp;
989 	return (0);
990 }
991 
992 /*
993  * Change notion of root (``/'') directory.
994  */
995 /* ARGSUSED */
996 int
997 sys_chroot(l, v, retval)
998 	struct lwp *l;
999 	void *v;
1000 	register_t *retval;
1001 {
1002 	struct sys_chroot_args /* {
1003 		syscallarg(const char *) path;
1004 	} */ *uap = v;
1005 	struct proc *p = l->l_proc;
1006 	struct cwdinfo *cwdi = p->p_cwdi;
1007 	struct vnode *vp;
1008 	int error;
1009 	struct nameidata nd;
1010 
1011 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1012 		return (error);
1013 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1014 	    SCARG(uap, path), p);
1015 	if ((error = change_dir(&nd, p)) != 0)
1016 		return (error);
1017 	if (cwdi->cwdi_rdir != NULL)
1018 		vrele(cwdi->cwdi_rdir);
1019 	vp = nd.ni_vp;
1020 	cwdi->cwdi_rdir = vp;
1021 
1022 	/*
1023 	 * Prevent escaping from chroot by putting the root under
1024 	 * the working directory.  Silently chdir to / if we aren't
1025 	 * already there.
1026 	 */
1027 	if (!vn_isunder(cwdi->cwdi_cdir, vp, p)) {
1028 		/*
1029 		 * XXX would be more failsafe to change directory to a
1030 		 * deadfs node here instead
1031 		 */
1032 		vrele(cwdi->cwdi_cdir);
1033 		VREF(vp);
1034 		cwdi->cwdi_cdir = vp;
1035 	}
1036 
1037 	return (0);
1038 }
1039 
1040 /*
1041  * Common routine for chroot and chdir.
1042  */
1043 static int
1044 change_dir(ndp, p)
1045 	struct nameidata *ndp;
1046 	struct proc *p;
1047 {
1048 	struct vnode *vp;
1049 	int error;
1050 
1051 	if ((error = namei(ndp)) != 0)
1052 		return (error);
1053 	vp = ndp->ni_vp;
1054 	if (vp->v_type != VDIR)
1055 		error = ENOTDIR;
1056 	else
1057 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
1058 
1059 	if (error)
1060 		vput(vp);
1061 	else
1062 		VOP_UNLOCK(vp, 0);
1063 	return (error);
1064 }
1065 
1066 /*
1067  * Check permissions, allocate an open file structure,
1068  * and call the device open routine if any.
1069  */
1070 int
1071 sys_open(l, v, retval)
1072 	struct lwp *l;
1073 	void *v;
1074 	register_t *retval;
1075 {
1076 	struct sys_open_args /* {
1077 		syscallarg(const char *) path;
1078 		syscallarg(int) flags;
1079 		syscallarg(int) mode;
1080 	} */ *uap = v;
1081 	struct proc *p = l->l_proc;
1082 	struct cwdinfo *cwdi = p->p_cwdi;
1083 	struct filedesc *fdp = p->p_fd;
1084 	struct file *fp;
1085 	struct vnode *vp;
1086 	int flags, cmode;
1087 	int type, indx, error;
1088 	struct flock lf;
1089 	struct nameidata nd;
1090 
1091 	flags = FFLAGS(SCARG(uap, flags));
1092 	if ((flags & (FREAD | FWRITE)) == 0)
1093 		return (EINVAL);
1094 	/* falloc() will use the file descriptor for us */
1095 	if ((error = falloc(p, &fp, &indx)) != 0)
1096 		return (error);
1097 	cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1098 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1099 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1100 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1101 		FILE_UNUSE(fp, p);
1102 		ffree(fp);
1103 		if ((error == ENODEV || error == ENXIO) &&
1104 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1105 		    (error =
1106 			dupfdopen(p, indx, p->p_dupfd, flags, error)) == 0) {
1107 			*retval = indx;
1108 			return (0);
1109 		}
1110 		if (error == ERESTART)
1111 			error = EINTR;
1112 		fdremove(fdp, indx);
1113 		return (error);
1114 	}
1115 	p->p_dupfd = 0;
1116 	vp = nd.ni_vp;
1117 	fp->f_flag = flags & FMASK;
1118 	fp->f_type = DTYPE_VNODE;
1119 	fp->f_ops = &vnops;
1120 	fp->f_data = vp;
1121 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1122 		lf.l_whence = SEEK_SET;
1123 		lf.l_start = 0;
1124 		lf.l_len = 0;
1125 		if (flags & O_EXLOCK)
1126 			lf.l_type = F_WRLCK;
1127 		else
1128 			lf.l_type = F_RDLCK;
1129 		type = F_FLOCK;
1130 		if ((flags & FNONBLOCK) == 0)
1131 			type |= F_WAIT;
1132 		VOP_UNLOCK(vp, 0);
1133 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1134 		if (error) {
1135 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
1136 			FILE_UNUSE(fp, p);
1137 			ffree(fp);
1138 			fdremove(fdp, indx);
1139 			return (error);
1140 		}
1141 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1142 		fp->f_flag |= FHASLOCK;
1143 	}
1144 	VOP_UNLOCK(vp, 0);
1145 	*retval = indx;
1146 	FILE_SET_MATURE(fp);
1147 	FILE_UNUSE(fp, p);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Get file handle system call
1153  */
1154 int
1155 sys_getfh(l, v, retval)
1156 	struct lwp *l;
1157 	void *v;
1158 	register_t *retval;
1159 {
1160 	struct sys_getfh_args /* {
1161 		syscallarg(char *) fname;
1162 		syscallarg(fhandle_t *) fhp;
1163 	} */ *uap = v;
1164 	struct proc *p = l->l_proc;
1165 	struct vnode *vp;
1166 	fhandle_t fh;
1167 	int error;
1168 	struct nameidata nd;
1169 
1170 	/*
1171 	 * Must be super user
1172 	 */
1173 	error = suser(p->p_ucred, &p->p_acflag);
1174 	if (error)
1175 		return (error);
1176 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1177 	    SCARG(uap, fname), p);
1178 	error = namei(&nd);
1179 	if (error)
1180 		return (error);
1181 	vp = nd.ni_vp;
1182 	memset(&fh, 0, sizeof(fh));
1183 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1184 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1185 	vput(vp);
1186 	if (error)
1187 		return (error);
1188 	error = copyout(&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh));
1189 	return (error);
1190 }
1191 
1192 /*
1193  * Open a file given a file handle.
1194  *
1195  * Check permissions, allocate an open file structure,
1196  * and call the device open routine if any.
1197  */
1198 int
1199 sys_fhopen(l, v, retval)
1200 	struct lwp *l;
1201 	void *v;
1202 	register_t *retval;
1203 {
1204 	struct sys_fhopen_args /* {
1205 		syscallarg(const fhandle_t *) fhp;
1206 		syscallarg(int) flags;
1207 	} */ *uap = v;
1208 	struct proc *p = l->l_proc;
1209 	struct filedesc *fdp = p->p_fd;
1210 	struct file *fp;
1211 	struct vnode *vp = NULL;
1212 	struct mount *mp;
1213 	struct ucred *cred = p->p_ucred;
1214 	int flags;
1215 	struct file *nfp;
1216 	int type, indx, error=0;
1217 	struct flock lf;
1218 	struct vattr va;
1219 	fhandle_t fh;
1220 
1221 	/*
1222 	 * Must be super user
1223 	 */
1224 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1225 		return (error);
1226 
1227 	flags = FFLAGS(SCARG(uap, flags));
1228 	if ((flags & (FREAD | FWRITE)) == 0)
1229 		return (EINVAL);
1230 	if ((flags & O_CREAT))
1231 		return (EINVAL);
1232 	/* falloc() will use the file descriptor for us */
1233 	if ((error = falloc(p, &nfp, &indx)) != 0)
1234 		return (error);
1235 	fp = nfp;
1236 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1237 		goto bad;
1238 
1239 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1240 		error = ESTALE;
1241 		goto bad;
1242 	}
1243 
1244 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1245 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1246 		goto bad;
1247 	}
1248 
1249 	/* Now do an effective vn_open */
1250 
1251 	if (vp->v_type == VSOCK) {
1252 		error = EOPNOTSUPP;
1253 		goto bad;
1254 	}
1255 	if (flags & FREAD) {
1256 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1257 			goto bad;
1258 	}
1259 	if (flags & (FWRITE | O_TRUNC)) {
1260 		if (vp->v_type == VDIR) {
1261 			error = EISDIR;
1262 			goto bad;
1263 		}
1264 		if ((error = vn_writechk(vp)) != 0 ||
1265 		    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
1266 			goto bad;
1267 	}
1268 	if (flags & O_TRUNC) {
1269 		VOP_UNLOCK(vp, 0);			/* XXX */
1270 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
1271 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
1272 		VATTR_NULL(&va);
1273 		va.va_size = 0;
1274 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1275 			goto bad;
1276 	}
1277 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1278 		goto bad;
1279 	if (vp->v_type == VREG &&
1280 	    uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1281 		error = EIO;
1282 		goto bad;
1283 	}
1284 	if (flags & FWRITE)
1285 		vp->v_writecount++;
1286 
1287 	/* done with modified vn_open, now finish what sys_open does. */
1288 
1289 	fp->f_flag = flags & FMASK;
1290 	fp->f_type = DTYPE_VNODE;
1291 	fp->f_ops = &vnops;
1292 	fp->f_data = vp;
1293 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1294 		lf.l_whence = SEEK_SET;
1295 		lf.l_start = 0;
1296 		lf.l_len = 0;
1297 		if (flags & O_EXLOCK)
1298 			lf.l_type = F_WRLCK;
1299 		else
1300 			lf.l_type = F_RDLCK;
1301 		type = F_FLOCK;
1302 		if ((flags & FNONBLOCK) == 0)
1303 			type |= F_WAIT;
1304 		VOP_UNLOCK(vp, 0);
1305 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1306 		if (error) {
1307 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
1308 			FILE_UNUSE(fp, p);
1309 			ffree(fp);
1310 			fdremove(fdp, indx);
1311 			return (error);
1312 		}
1313 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1314 		fp->f_flag |= FHASLOCK;
1315 	}
1316 	VOP_UNLOCK(vp, 0);
1317 	*retval = indx;
1318 	FILE_SET_MATURE(fp);
1319 	FILE_UNUSE(fp, p);
1320 	return (0);
1321 
1322 bad:
1323 	FILE_UNUSE(fp, p);
1324 	ffree(fp);
1325 	fdremove(fdp, indx);
1326 	if (vp != NULL)
1327 		vput(vp);
1328 	return (error);
1329 }
1330 
1331 /* ARGSUSED */
1332 int
1333 sys_fhstat(l, v, retval)
1334 	struct lwp *l;
1335 	void *v;
1336 	register_t *retval;
1337 {
1338 	struct sys_fhstat_args /* {
1339 		syscallarg(const fhandle_t *) fhp;
1340 		syscallarg(struct stat *) sb;
1341 	} */ *uap = v;
1342 	struct proc *p = l->l_proc;
1343 	struct stat sb;
1344 	int error;
1345 	fhandle_t fh;
1346 	struct mount *mp;
1347 	struct vnode *vp;
1348 
1349 	/*
1350 	 * Must be super user
1351 	 */
1352 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1353 		return (error);
1354 
1355 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1356 		return (error);
1357 
1358 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1359 		return (ESTALE);
1360 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1361 		return (error);
1362 	error = vn_stat(vp, &sb, p);
1363 	vput(vp);
1364 	if (error)
1365 		return (error);
1366 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1367 	return (error);
1368 }
1369 
1370 /* ARGSUSED */
1371 int
1372 sys_fhstatfs(l, v, retval)
1373 	struct lwp *l;
1374 	void *v;
1375 	register_t *retval;
1376 {
1377 	struct sys_fhstatfs_args /*
1378 		syscallarg(const fhandle_t *) fhp;
1379 		syscallarg(struct statfs *) buf;
1380 	} */ *uap = v;
1381 	struct proc *p = l->l_proc;
1382 	struct statfs sbuf;
1383 	fhandle_t fh;
1384 	struct mount *mp;
1385 	struct vnode *vp;
1386 	int error;
1387 
1388 	/*
1389 	 * Must be super user
1390 	 */
1391 	if ((error = suser(p->p_ucred, &p->p_acflag)))
1392 		return (error);
1393 
1394 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1395 		return (error);
1396 
1397 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1398 		return (ESTALE);
1399 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1400 		return (error);
1401 	mp = vp->v_mount;
1402 	vput(vp);
1403 	if ((error = VFS_STATFS(mp, &sbuf, p)) != 0)
1404 		return (error);
1405 	return (copyout(&sbuf, SCARG(uap, buf), sizeof(sbuf)));
1406 }
1407 
1408 /*
1409  * Create a special file.
1410  */
1411 /* ARGSUSED */
1412 int
1413 sys_mknod(l, v, retval)
1414 	struct lwp *l;
1415 	void *v;
1416 	register_t *retval;
1417 {
1418 	struct sys_mknod_args /* {
1419 		syscallarg(const char *) path;
1420 		syscallarg(int) mode;
1421 		syscallarg(int) dev;
1422 	} */ *uap = v;
1423 	struct proc *p = l->l_proc;
1424 	struct vnode *vp;
1425 	struct vattr vattr;
1426 	int error;
1427 	int whiteout = 0;
1428 	struct nameidata nd;
1429 
1430 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1431 		return (error);
1432 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1433 	if ((error = namei(&nd)) != 0)
1434 		return (error);
1435 	vp = nd.ni_vp;
1436 	if (vp != NULL)
1437 		error = EEXIST;
1438 	else {
1439 		VATTR_NULL(&vattr);
1440 		vattr.va_mode =
1441 		    (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1442 		vattr.va_rdev = SCARG(uap, dev);
1443 		whiteout = 0;
1444 
1445 		switch (SCARG(uap, mode) & S_IFMT) {
1446 		case S_IFMT:	/* used by badsect to flag bad sectors */
1447 			vattr.va_type = VBAD;
1448 			break;
1449 		case S_IFCHR:
1450 			vattr.va_type = VCHR;
1451 			break;
1452 		case S_IFBLK:
1453 			vattr.va_type = VBLK;
1454 			break;
1455 		case S_IFWHT:
1456 			whiteout = 1;
1457 			break;
1458 		default:
1459 			error = EINVAL;
1460 			break;
1461 		}
1462 	}
1463 	if (!error) {
1464 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1465 		if (whiteout) {
1466 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1467 			if (error)
1468 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1469 			vput(nd.ni_dvp);
1470 		} else {
1471 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1472 						&nd.ni_cnd, &vattr);
1473 			if (error == 0)
1474 				vput(nd.ni_vp);
1475 		}
1476 	} else {
1477 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1478 		if (nd.ni_dvp == vp)
1479 			vrele(nd.ni_dvp);
1480 		else
1481 			vput(nd.ni_dvp);
1482 		if (vp)
1483 			vrele(vp);
1484 	}
1485 	return (error);
1486 }
1487 
1488 /*
1489  * Create a named pipe.
1490  */
1491 /* ARGSUSED */
1492 int
1493 sys_mkfifo(l, v, retval)
1494 	struct lwp *l;
1495 	void *v;
1496 	register_t *retval;
1497 {
1498 	struct sys_mkfifo_args /* {
1499 		syscallarg(const char *) path;
1500 		syscallarg(int) mode;
1501 	} */ *uap = v;
1502 	struct proc *p = l->l_proc;
1503 	struct vattr vattr;
1504 	int error;
1505 	struct nameidata nd;
1506 
1507 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1508 	if ((error = namei(&nd)) != 0)
1509 		return (error);
1510 	if (nd.ni_vp != NULL) {
1511 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1512 		if (nd.ni_dvp == nd.ni_vp)
1513 			vrele(nd.ni_dvp);
1514 		else
1515 			vput(nd.ni_dvp);
1516 		vrele(nd.ni_vp);
1517 		return (EEXIST);
1518 	}
1519 	VATTR_NULL(&vattr);
1520 	vattr.va_type = VFIFO;
1521 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1522 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1523 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1524 	if (error == 0)
1525 		vput(nd.ni_vp);
1526 	return (error);
1527 }
1528 
1529 /*
1530  * Make a hard file link.
1531  */
1532 /* ARGSUSED */
1533 int
1534 sys_link(l, v, retval)
1535 	struct lwp *l;
1536 	void *v;
1537 	register_t *retval;
1538 {
1539 	struct sys_link_args /* {
1540 		syscallarg(const char *) path;
1541 		syscallarg(const char *) link;
1542 	} */ *uap = v;
1543 	struct proc *p = l->l_proc;
1544 	struct vnode *vp;
1545 	struct nameidata nd;
1546 	int error;
1547 
1548 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1549 	if ((error = namei(&nd)) != 0)
1550 		return (error);
1551 	vp = nd.ni_vp;
1552 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
1553 	if ((error = namei(&nd)) != 0)
1554 		goto out;
1555 	if (nd.ni_vp) {
1556 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1557 		if (nd.ni_dvp == nd.ni_vp)
1558 			vrele(nd.ni_dvp);
1559 		else
1560 			vput(nd.ni_dvp);
1561 		vrele(nd.ni_vp);
1562 		error = EEXIST;
1563 		goto out;
1564 	}
1565 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1566 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1567 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1568 out:
1569 	vrele(vp);
1570 	return (error);
1571 }
1572 
1573 /*
1574  * Make a symbolic link.
1575  */
1576 /* ARGSUSED */
1577 int
1578 sys_symlink(l, v, retval)
1579 	struct lwp *l;
1580 	void *v;
1581 	register_t *retval;
1582 {
1583 	struct sys_symlink_args /* {
1584 		syscallarg(const char *) path;
1585 		syscallarg(const char *) link;
1586 	} */ *uap = v;
1587 	struct proc *p = l->l_proc;
1588 	struct vattr vattr;
1589 	char *path;
1590 	int error;
1591 	struct nameidata nd;
1592 
1593 	path = PNBUF_GET();
1594 	error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1595 	if (error)
1596 		goto out;
1597 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
1598 	if ((error = namei(&nd)) != 0)
1599 		goto out;
1600 	if (nd.ni_vp) {
1601 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1602 		if (nd.ni_dvp == nd.ni_vp)
1603 			vrele(nd.ni_dvp);
1604 		else
1605 			vput(nd.ni_dvp);
1606 		vrele(nd.ni_vp);
1607 		error = EEXIST;
1608 		goto out;
1609 	}
1610 	VATTR_NULL(&vattr);
1611 	vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1612 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1613 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1614 	if (error == 0)
1615 		vput(nd.ni_vp);
1616 out:
1617 	PNBUF_PUT(path);
1618 	return (error);
1619 }
1620 
1621 /*
1622  * Delete a whiteout from the filesystem.
1623  */
1624 /* ARGSUSED */
1625 int
1626 sys_undelete(l, v, retval)
1627 	struct lwp *l;
1628 	void *v;
1629 	register_t *retval;
1630 {
1631 	struct sys_undelete_args /* {
1632 		syscallarg(const char *) path;
1633 	} */ *uap = v;
1634 	struct proc *p = l->l_proc;
1635 	int error;
1636 	struct nameidata nd;
1637 
1638 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1639 	    SCARG(uap, path), p);
1640 	error = namei(&nd);
1641 	if (error)
1642 		return (error);
1643 
1644 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1645 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1646 		if (nd.ni_dvp == nd.ni_vp)
1647 			vrele(nd.ni_dvp);
1648 		else
1649 			vput(nd.ni_dvp);
1650 		if (nd.ni_vp)
1651 			vrele(nd.ni_vp);
1652 		return (EEXIST);
1653 	}
1654 
1655 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1656 	if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
1657 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1658 	vput(nd.ni_dvp);
1659 	return (error);
1660 }
1661 
1662 /*
1663  * Delete a name from the filesystem.
1664  */
1665 /* ARGSUSED */
1666 int
1667 sys_unlink(l, v, retval)
1668 	struct lwp *l;
1669 	void *v;
1670 	register_t *retval;
1671 {
1672 	struct sys_unlink_args /* {
1673 		syscallarg(const char *) path;
1674 	} */ *uap = v;
1675 	struct proc *p = l->l_proc;
1676 	struct vnode *vp;
1677 	int error;
1678 	struct nameidata nd;
1679 
1680 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1681 	    SCARG(uap, path), p);
1682 	if ((error = namei(&nd)) != 0)
1683 		return (error);
1684 	vp = nd.ni_vp;
1685 
1686 	/*
1687 	 * The root of a mounted filesystem cannot be deleted.
1688 	 */
1689 	if (vp->v_flag & VROOT) {
1690 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1691 		if (nd.ni_dvp == vp)
1692 			vrele(nd.ni_dvp);
1693 		else
1694 			vput(nd.ni_dvp);
1695 		vput(vp);
1696 		error = EBUSY;
1697 		goto out;
1698 	}
1699 
1700 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1701 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1702 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1703 out:
1704 	return (error);
1705 }
1706 
1707 /*
1708  * Reposition read/write file offset.
1709  */
1710 int
1711 sys_lseek(l, v, retval)
1712 	struct lwp *l;
1713 	void *v;
1714 	register_t *retval;
1715 {
1716 	struct sys_lseek_args /* {
1717 		syscallarg(int) fd;
1718 		syscallarg(int) pad;
1719 		syscallarg(off_t) offset;
1720 		syscallarg(int) whence;
1721 	} */ *uap = v;
1722 	struct proc *p = l->l_proc;
1723 	struct ucred *cred = p->p_ucred;
1724 	struct filedesc *fdp = p->p_fd;
1725 	struct file *fp;
1726 	struct vnode *vp;
1727 	struct vattr vattr;
1728 	off_t newoff;
1729 	int error;
1730 
1731 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1732 		return (EBADF);
1733 
1734 	FILE_USE(fp);
1735 
1736 	vp = (struct vnode *)fp->f_data;
1737 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1738 		error = ESPIPE;
1739 		goto out;
1740 	}
1741 
1742 	switch (SCARG(uap, whence)) {
1743 	case SEEK_CUR:
1744 		newoff = fp->f_offset + SCARG(uap, offset);
1745 		break;
1746 	case SEEK_END:
1747 		error = VOP_GETATTR(vp, &vattr, cred, p);
1748 		if (error)
1749 			goto out;
1750 		newoff = SCARG(uap, offset) + vattr.va_size;
1751 		break;
1752 	case SEEK_SET:
1753 		newoff = SCARG(uap, offset);
1754 		break;
1755 	default:
1756 		error = EINVAL;
1757 		goto out;
1758 	}
1759 	if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
1760 		goto out;
1761 
1762 	*(off_t *)retval = fp->f_offset = newoff;
1763  out:
1764 	FILE_UNUSE(fp, p);
1765 	return (error);
1766 }
1767 
1768 /*
1769  * Positional read system call.
1770  */
1771 int
1772 sys_pread(l, v, retval)
1773 	struct lwp *l;
1774 	void *v;
1775 	register_t *retval;
1776 {
1777 	struct sys_pread_args /* {
1778 		syscallarg(int) fd;
1779 		syscallarg(void *) buf;
1780 		syscallarg(size_t) nbyte;
1781 		syscallarg(off_t) offset;
1782 	} */ *uap = v;
1783 	struct proc *p = l->l_proc;
1784 	struct filedesc *fdp = p->p_fd;
1785 	struct file *fp;
1786 	struct vnode *vp;
1787 	off_t offset;
1788 	int error, fd = SCARG(uap, fd);
1789 
1790 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1791 		return (EBADF);
1792 
1793 	if ((fp->f_flag & FREAD) == 0) {
1794 		simple_unlock(&fp->f_slock);
1795 		return (EBADF);
1796 	}
1797 
1798 	FILE_USE(fp);
1799 
1800 	vp = (struct vnode *)fp->f_data;
1801 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1802 		error = ESPIPE;
1803 		goto out;
1804 	}
1805 
1806 	offset = SCARG(uap, offset);
1807 
1808 	/*
1809 	 * XXX This works because no file systems actually
1810 	 * XXX take any action on the seek operation.
1811 	 */
1812 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1813 		goto out;
1814 
1815 	/* dofileread() will unuse the descriptor for us */
1816 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
1817 	    &offset, 0, retval));
1818 
1819  out:
1820 	FILE_UNUSE(fp, p);
1821 	return (error);
1822 }
1823 
1824 /*
1825  * Positional scatter read system call.
1826  */
1827 int
1828 sys_preadv(l, v, retval)
1829 	struct lwp *l;
1830 	void *v;
1831 	register_t *retval;
1832 {
1833 	struct sys_preadv_args /* {
1834 		syscallarg(int) fd;
1835 		syscallarg(const struct iovec *) iovp;
1836 		syscallarg(int) iovcnt;
1837 		syscallarg(off_t) offset;
1838 	} */ *uap = v;
1839 	struct proc *p = l->l_proc;
1840 	struct filedesc *fdp = p->p_fd;
1841 	struct file *fp;
1842 	struct vnode *vp;
1843 	off_t offset;
1844 	int error, fd = SCARG(uap, fd);
1845 
1846 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1847 		return (EBADF);
1848 
1849 	if ((fp->f_flag & FREAD) == 0) {
1850 		simple_unlock(&fp->f_slock);
1851 		return (EBADF);
1852 	}
1853 
1854 	FILE_USE(fp);
1855 
1856 	vp = (struct vnode *)fp->f_data;
1857 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1858 		error = ESPIPE;
1859 		goto out;
1860 	}
1861 
1862 	offset = SCARG(uap, offset);
1863 
1864 	/*
1865 	 * XXX This works because no file systems actually
1866 	 * XXX take any action on the seek operation.
1867 	 */
1868 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1869 		goto out;
1870 
1871 	/* dofilereadv() will unuse the descriptor for us */
1872 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
1873 	    &offset, 0, retval));
1874 
1875  out:
1876 	FILE_UNUSE(fp, p);
1877 	return (error);
1878 }
1879 
1880 /*
1881  * Positional write system call.
1882  */
1883 int
1884 sys_pwrite(l, v, retval)
1885 	struct lwp *l;
1886 	void *v;
1887 	register_t *retval;
1888 {
1889 	struct sys_pwrite_args /* {
1890 		syscallarg(int) fd;
1891 		syscallarg(const void *) buf;
1892 		syscallarg(size_t) nbyte;
1893 		syscallarg(off_t) offset;
1894 	} */ *uap = v;
1895 	struct proc *p = l->l_proc;
1896 	struct filedesc *fdp = p->p_fd;
1897 	struct file *fp;
1898 	struct vnode *vp;
1899 	off_t offset;
1900 	int error, fd = SCARG(uap, fd);
1901 
1902 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1903 		return (EBADF);
1904 
1905 	if ((fp->f_flag & FWRITE) == 0) {
1906 		simple_unlock(&fp->f_slock);
1907 		return (EBADF);
1908 	}
1909 
1910 	FILE_USE(fp);
1911 
1912 	vp = (struct vnode *)fp->f_data;
1913 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1914 		error = ESPIPE;
1915 		goto out;
1916 	}
1917 
1918 	offset = SCARG(uap, offset);
1919 
1920 	/*
1921 	 * XXX This works because no file systems actually
1922 	 * XXX take any action on the seek operation.
1923 	 */
1924 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1925 		goto out;
1926 
1927 	/* dofilewrite() will unuse the descriptor for us */
1928 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
1929 	    &offset, 0, retval));
1930 
1931  out:
1932 	FILE_UNUSE(fp, p);
1933 	return (error);
1934 }
1935 
1936 /*
1937  * Positional gather write system call.
1938  */
1939 int
1940 sys_pwritev(l, v, retval)
1941 	struct lwp *l;
1942 	void *v;
1943 	register_t *retval;
1944 {
1945 	struct sys_pwritev_args /* {
1946 		syscallarg(int) fd;
1947 		syscallarg(const struct iovec *) iovp;
1948 		syscallarg(int) iovcnt;
1949 		syscallarg(off_t) offset;
1950 	} */ *uap = v;
1951 	struct proc *p = l->l_proc;
1952 	struct filedesc *fdp = p->p_fd;
1953 	struct file *fp;
1954 	struct vnode *vp;
1955 	off_t offset;
1956 	int error, fd = SCARG(uap, fd);
1957 
1958 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1959 		return (EBADF);
1960 
1961 	if ((fp->f_flag & FWRITE) == 0) {
1962 		simple_unlock(&fp->f_slock);
1963 		return (EBADF);
1964 	}
1965 
1966 	FILE_USE(fp);
1967 
1968 	vp = (struct vnode *)fp->f_data;
1969 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1970 		error = ESPIPE;
1971 		goto out;
1972 	}
1973 
1974 	offset = SCARG(uap, offset);
1975 
1976 	/*
1977 	 * XXX This works because no file systems actually
1978 	 * XXX take any action on the seek operation.
1979 	 */
1980 	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
1981 		goto out;
1982 
1983 	/* dofilewritev() will unuse the descriptor for us */
1984 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
1985 	    &offset, 0, retval));
1986 
1987  out:
1988 	FILE_UNUSE(fp, p);
1989 	return (error);
1990 }
1991 
1992 /*
1993  * Check access permissions.
1994  */
1995 int
1996 sys_access(l, v, retval)
1997 	struct lwp *l;
1998 	void *v;
1999 	register_t *retval;
2000 {
2001 	struct sys_access_args /* {
2002 		syscallarg(const char *) path;
2003 		syscallarg(int) flags;
2004 	} */ *uap = v;
2005 	struct proc *p = l->l_proc;
2006 	struct ucred *cred = crget();
2007 	struct vnode *vp;
2008 	int error, flags;
2009 	struct nameidata nd;
2010 
2011 	(void)memcpy(cred, p->p_ucred, sizeof(*cred));
2012 	cred->cr_ref = 1;
2013 	cred->cr_uid = p->p_cred->p_ruid;
2014 	cred->cr_gid = p->p_cred->p_rgid;
2015 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2016 	    SCARG(uap, path), p);
2017 	/* Override default credentials */
2018 	nd.ni_cnd.cn_cred = cred;
2019 	if ((error = namei(&nd)) != 0)
2020 		goto out;
2021 	vp = nd.ni_vp;
2022 
2023 	/* Flags == 0 means only check for existence. */
2024 	if (SCARG(uap, flags)) {
2025 		flags = 0;
2026 		if (SCARG(uap, flags) & R_OK)
2027 			flags |= VREAD;
2028 		if (SCARG(uap, flags) & W_OK)
2029 			flags |= VWRITE;
2030 		if (SCARG(uap, flags) & X_OK)
2031 			flags |= VEXEC;
2032 
2033 		error = VOP_ACCESS(vp, flags, cred, p);
2034 		if (!error && (flags & VWRITE))
2035 			error = vn_writechk(vp);
2036 	}
2037 	vput(vp);
2038 out:
2039 	crfree(cred);
2040 	return (error);
2041 }
2042 
2043 /*
2044  * Get file status; this version follows links.
2045  */
2046 /* ARGSUSED */
2047 int
2048 sys___stat13(l, v, retval)
2049 	struct lwp *l;
2050 	void *v;
2051 	register_t *retval;
2052 {
2053 	struct sys___stat13_args /* {
2054 		syscallarg(const char *) path;
2055 		syscallarg(struct stat *) ub;
2056 	} */ *uap = v;
2057 	struct proc *p = l->l_proc;
2058 	struct stat sb;
2059 	int error;
2060 	struct nameidata nd;
2061 
2062 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2063 	    SCARG(uap, path), p);
2064 	if ((error = namei(&nd)) != 0)
2065 		return (error);
2066 	error = vn_stat(nd.ni_vp, &sb, p);
2067 	vput(nd.ni_vp);
2068 	if (error)
2069 		return (error);
2070 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2071 	return (error);
2072 }
2073 
2074 /*
2075  * Get file status; this version does not follow links.
2076  */
2077 /* ARGSUSED */
2078 int
2079 sys___lstat13(l, v, retval)
2080 	struct lwp *l;
2081 	void *v;
2082 	register_t *retval;
2083 {
2084 	struct sys___lstat13_args /* {
2085 		syscallarg(const char *) path;
2086 		syscallarg(struct stat *) ub;
2087 	} */ *uap = v;
2088 	struct proc *p = l->l_proc;
2089 	struct stat sb;
2090 	int error;
2091 	struct nameidata nd;
2092 
2093 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2094 	    SCARG(uap, path), p);
2095 	if ((error = namei(&nd)) != 0)
2096 		return (error);
2097 	error = vn_stat(nd.ni_vp, &sb, p);
2098 	vput(nd.ni_vp);
2099 	if (error)
2100 		return (error);
2101 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2102 	return (error);
2103 }
2104 
2105 /*
2106  * Get configurable pathname variables.
2107  */
2108 /* ARGSUSED */
2109 int
2110 sys_pathconf(l, v, retval)
2111 	struct lwp *l;
2112 	void *v;
2113 	register_t *retval;
2114 {
2115 	struct sys_pathconf_args /* {
2116 		syscallarg(const char *) path;
2117 		syscallarg(int) name;
2118 	} */ *uap = v;
2119 	struct proc *p = l->l_proc;
2120 	int error;
2121 	struct nameidata nd;
2122 
2123 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2124 	    SCARG(uap, path), p);
2125 	if ((error = namei(&nd)) != 0)
2126 		return (error);
2127 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2128 	vput(nd.ni_vp);
2129 	return (error);
2130 }
2131 
2132 /*
2133  * Return target name of a symbolic link.
2134  */
2135 /* ARGSUSED */
2136 int
2137 sys_readlink(l, v, retval)
2138 	struct lwp *l;
2139 	void *v;
2140 	register_t *retval;
2141 {
2142 	struct sys_readlink_args /* {
2143 		syscallarg(const char *) path;
2144 		syscallarg(char *) buf;
2145 		syscallarg(size_t) count;
2146 	} */ *uap = v;
2147 	struct proc *p = l->l_proc;
2148 	struct vnode *vp;
2149 	struct iovec aiov;
2150 	struct uio auio;
2151 	int error;
2152 	struct nameidata nd;
2153 
2154 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2155 	    SCARG(uap, path), p);
2156 	if ((error = namei(&nd)) != 0)
2157 		return (error);
2158 	vp = nd.ni_vp;
2159 	if (vp->v_type != VLNK)
2160 		error = EINVAL;
2161 	else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2162 	    (error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) == 0) {
2163 		aiov.iov_base = SCARG(uap, buf);
2164 		aiov.iov_len = SCARG(uap, count);
2165 		auio.uio_iov = &aiov;
2166 		auio.uio_iovcnt = 1;
2167 		auio.uio_offset = 0;
2168 		auio.uio_rw = UIO_READ;
2169 		auio.uio_segflg = UIO_USERSPACE;
2170 		auio.uio_procp = p;
2171 		auio.uio_resid = SCARG(uap, count);
2172 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2173 	}
2174 	vput(vp);
2175 	*retval = SCARG(uap, count) - auio.uio_resid;
2176 	return (error);
2177 }
2178 
2179 /*
2180  * Change flags of a file given a path name.
2181  */
2182 /* ARGSUSED */
2183 int
2184 sys_chflags(l, v, retval)
2185 	struct lwp *l;
2186 	void *v;
2187 	register_t *retval;
2188 {
2189 	struct sys_chflags_args /* {
2190 		syscallarg(const char *) path;
2191 		syscallarg(u_long) flags;
2192 	} */ *uap = v;
2193 	struct proc *p = l->l_proc;
2194 	struct vnode *vp;
2195 	int error;
2196 	struct nameidata nd;
2197 
2198 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2199 	if ((error = namei(&nd)) != 0)
2200 		return (error);
2201 	vp = nd.ni_vp;
2202 	error = change_flags(vp, SCARG(uap, flags), p);
2203 	vput(vp);
2204 	return (error);
2205 }
2206 
2207 /*
2208  * Change flags of a file given a file descriptor.
2209  */
2210 /* ARGSUSED */
2211 int
2212 sys_fchflags(l, v, retval)
2213 	struct lwp *l;
2214 	void *v;
2215 	register_t *retval;
2216 {
2217 	struct sys_fchflags_args /* {
2218 		syscallarg(int) fd;
2219 		syscallarg(u_long) flags;
2220 	} */ *uap = v;
2221 	struct proc *p = l->l_proc;
2222 	struct vnode *vp;
2223 	struct file *fp;
2224 	int error;
2225 
2226 	/* getvnode() will use the descriptor for us */
2227 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2228 		return (error);
2229 	vp = (struct vnode *)fp->f_data;
2230 	error = change_flags(vp, SCARG(uap, flags), p);
2231 	VOP_UNLOCK(vp, 0);
2232 	FILE_UNUSE(fp, p);
2233 	return (error);
2234 }
2235 
2236 /*
2237  * Change flags of a file given a path name; this version does
2238  * not follow links.
2239  */
2240 int
2241 sys_lchflags(l, v, retval)
2242 	struct lwp *l;
2243 	void *v;
2244 	register_t *retval;
2245 {
2246 	struct sys_lchflags_args /* {
2247 		syscallarg(const char *) path;
2248 		syscallarg(u_long) flags;
2249 	} */ *uap = v;
2250 	struct proc *p = l->l_proc;
2251 	struct vnode *vp;
2252 	int error;
2253 	struct nameidata nd;
2254 
2255 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2256 	if ((error = namei(&nd)) != 0)
2257 		return (error);
2258 	vp = nd.ni_vp;
2259 	error = change_flags(vp, SCARG(uap, flags), p);
2260 	vput(vp);
2261 	return (error);
2262 }
2263 
2264 /*
2265  * Common routine to change flags of a file.
2266  */
2267 int
2268 change_flags(vp, flags, p)
2269 	struct vnode *vp;
2270 	u_long flags;
2271 	struct proc *p;
2272 {
2273 	struct vattr vattr;
2274 	int error;
2275 
2276 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2277 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2278 	/*
2279 	 * Non-superusers cannot change the flags on devices, even if they
2280 	 * own them.
2281 	 */
2282 	if (suser(p->p_ucred, &p->p_acflag) != 0) {
2283 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2284 			goto out;
2285 		if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2286 			error = EINVAL;
2287 			goto out;
2288 		}
2289 	}
2290 	VATTR_NULL(&vattr);
2291 	vattr.va_flags = flags;
2292 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2293 out:
2294 	return (error);
2295 }
2296 
2297 /*
2298  * Change mode of a file given path name; this version follows links.
2299  */
2300 /* ARGSUSED */
2301 int
2302 sys_chmod(l, v, retval)
2303 	struct lwp *l;
2304 	void *v;
2305 	register_t *retval;
2306 {
2307 	struct sys_chmod_args /* {
2308 		syscallarg(const char *) path;
2309 		syscallarg(int) mode;
2310 	} */ *uap = v;
2311 	struct proc *p = l->l_proc;
2312 	int error;
2313 	struct nameidata nd;
2314 
2315 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2316 	if ((error = namei(&nd)) != 0)
2317 		return (error);
2318 
2319 	error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
2320 
2321 	vrele(nd.ni_vp);
2322 	return (error);
2323 }
2324 
2325 /*
2326  * Change mode of a file given a file descriptor.
2327  */
2328 /* ARGSUSED */
2329 int
2330 sys_fchmod(l, v, retval)
2331 	struct lwp *l;
2332 	void *v;
2333 	register_t *retval;
2334 {
2335 	struct sys_fchmod_args /* {
2336 		syscallarg(int) fd;
2337 		syscallarg(int) mode;
2338 	} */ *uap = v;
2339 	struct proc *p = l->l_proc;
2340 	struct file *fp;
2341 	int error;
2342 
2343 	/* getvnode() will use the descriptor for us */
2344 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2345 		return (error);
2346 
2347 	error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), p);
2348 	FILE_UNUSE(fp, p);
2349 	return (error);
2350 }
2351 
2352 /*
2353  * Change mode of a file given path name; this version does not follow links.
2354  */
2355 /* ARGSUSED */
2356 int
2357 sys_lchmod(l, v, retval)
2358 	struct lwp *l;
2359 	void *v;
2360 	register_t *retval;
2361 {
2362 	struct sys_lchmod_args /* {
2363 		syscallarg(const char *) path;
2364 		syscallarg(int) mode;
2365 	} */ *uap = v;
2366 	struct proc *p = l->l_proc;
2367 	int error;
2368 	struct nameidata nd;
2369 
2370 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2371 	if ((error = namei(&nd)) != 0)
2372 		return (error);
2373 
2374 	error = change_mode(nd.ni_vp, SCARG(uap, mode), p);
2375 
2376 	vrele(nd.ni_vp);
2377 	return (error);
2378 }
2379 
2380 /*
2381  * Common routine to set mode given a vnode.
2382  */
2383 static int
2384 change_mode(vp, mode, p)
2385 	struct vnode *vp;
2386 	int mode;
2387 	struct proc *p;
2388 {
2389 	struct vattr vattr;
2390 	int error;
2391 
2392 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2393 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2394 	VATTR_NULL(&vattr);
2395 	vattr.va_mode = mode & ALLPERMS;
2396 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2397 	VOP_UNLOCK(vp, 0);
2398 	return (error);
2399 }
2400 
2401 /*
2402  * Set ownership given a path name; this version follows links.
2403  */
2404 /* ARGSUSED */
2405 int
2406 sys_chown(l, v, retval)
2407 	struct lwp *l;
2408 	void *v;
2409 	register_t *retval;
2410 {
2411 	struct sys_chown_args /* {
2412 		syscallarg(const char *) path;
2413 		syscallarg(uid_t) uid;
2414 		syscallarg(gid_t) gid;
2415 	} */ *uap = v;
2416 	struct proc *p = l->l_proc;
2417 	int error;
2418 	struct nameidata nd;
2419 
2420 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2421 	if ((error = namei(&nd)) != 0)
2422 		return (error);
2423 
2424 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
2425 
2426 	vrele(nd.ni_vp);
2427 	return (error);
2428 }
2429 
2430 /*
2431  * Set ownership given a path name; this version follows links.
2432  * Provides POSIX semantics.
2433  */
2434 /* ARGSUSED */
2435 int
2436 sys___posix_chown(l, v, retval)
2437 	struct lwp *l;
2438 	void *v;
2439 	register_t *retval;
2440 {
2441 	struct sys_chown_args /* {
2442 		syscallarg(const char *) path;
2443 		syscallarg(uid_t) uid;
2444 		syscallarg(gid_t) gid;
2445 	} */ *uap = v;
2446 	struct proc *p = l->l_proc;
2447 	int error;
2448 	struct nameidata nd;
2449 
2450 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2451 	if ((error = namei(&nd)) != 0)
2452 		return (error);
2453 
2454 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
2455 
2456 	vrele(nd.ni_vp);
2457 	return (error);
2458 }
2459 
2460 /*
2461  * Set ownership given a file descriptor.
2462  */
2463 /* ARGSUSED */
2464 int
2465 sys_fchown(l, v, retval)
2466 	struct lwp *l;
2467 	void *v;
2468 	register_t *retval;
2469 {
2470 	struct sys_fchown_args /* {
2471 		syscallarg(int) fd;
2472 		syscallarg(uid_t) uid;
2473 		syscallarg(gid_t) gid;
2474 	} */ *uap = v;
2475 	struct proc *p = l->l_proc;
2476 	int error;
2477 	struct file *fp;
2478 
2479 	/* getvnode() will use the descriptor for us */
2480 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2481 		return (error);
2482 
2483 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2484 	    SCARG(uap, gid), p, 0);
2485 	FILE_UNUSE(fp, p);
2486 	return (error);
2487 }
2488 
2489 /*
2490  * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2491  */
2492 /* ARGSUSED */
2493 int
2494 sys___posix_fchown(l, v, retval)
2495 	struct lwp *l;
2496 	void *v;
2497 	register_t *retval;
2498 {
2499 	struct sys_fchown_args /* {
2500 		syscallarg(int) fd;
2501 		syscallarg(uid_t) uid;
2502 		syscallarg(gid_t) gid;
2503 	} */ *uap = v;
2504 	struct proc *p = l->l_proc;
2505 	int error;
2506 	struct file *fp;
2507 
2508 	/* getvnode() will use the descriptor for us */
2509 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2510 		return (error);
2511 
2512 	error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2513 	    SCARG(uap, gid), p, 1);
2514 	FILE_UNUSE(fp, p);
2515 	return (error);
2516 }
2517 
2518 /*
2519  * Set ownership given a path name; this version does not follow links.
2520  */
2521 /* ARGSUSED */
2522 int
2523 sys_lchown(l, v, retval)
2524 	struct lwp *l;
2525 	void *v;
2526 	register_t *retval;
2527 {
2528 	struct sys_lchown_args /* {
2529 		syscallarg(const char *) path;
2530 		syscallarg(uid_t) uid;
2531 		syscallarg(gid_t) gid;
2532 	} */ *uap = v;
2533 	struct proc *p = l->l_proc;
2534 	int error;
2535 	struct nameidata nd;
2536 
2537 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2538 	if ((error = namei(&nd)) != 0)
2539 		return (error);
2540 
2541 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 0);
2542 
2543 	vrele(nd.ni_vp);
2544 	return (error);
2545 }
2546 
2547 /*
2548  * Set ownership given a path name; this version does not follow links.
2549  * Provides POSIX/XPG semantics.
2550  */
2551 /* ARGSUSED */
2552 int
2553 sys___posix_lchown(l, v, retval)
2554 	struct lwp *l;
2555 	void *v;
2556 	register_t *retval;
2557 {
2558 	struct sys_lchown_args /* {
2559 		syscallarg(const char *) path;
2560 		syscallarg(uid_t) uid;
2561 		syscallarg(gid_t) gid;
2562 	} */ *uap = v;
2563 	struct proc *p = l->l_proc;
2564 	int error;
2565 	struct nameidata nd;
2566 
2567 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2568 	if ((error = namei(&nd)) != 0)
2569 		return (error);
2570 
2571 	error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), p, 1);
2572 
2573 	vrele(nd.ni_vp);
2574 	return (error);
2575 }
2576 
2577 /*
2578  * Common routine to set ownership given a vnode.
2579  */
2580 static int
2581 change_owner(vp, uid, gid, p, posix_semantics)
2582 	struct vnode *vp;
2583 	uid_t uid;
2584 	gid_t gid;
2585 	struct proc *p;
2586 	int posix_semantics;
2587 {
2588 	struct vattr vattr;
2589 	mode_t newmode;
2590 	int error;
2591 
2592 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2593 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2594 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2595 		goto out;
2596 
2597 #define CHANGED(x) ((int)(x) != -1)
2598 	newmode = vattr.va_mode;
2599 	if (posix_semantics) {
2600 		/*
2601 		 * POSIX/XPG semantics: if the caller is not the super-user,
2602 		 * clear set-user-id and set-group-id bits.  Both POSIX and
2603 		 * the XPG consider the behaviour for calls by the super-user
2604 		 * implementation-defined; we leave the set-user-id and set-
2605 		 * group-id settings intact in that case.
2606 		 */
2607 		if (suser(p->p_ucred, NULL) != 0)
2608 			newmode &= ~(S_ISUID | S_ISGID);
2609 	} else {
2610 		/*
2611 		 * NetBSD semantics: when changing owner and/or group,
2612 		 * clear the respective bit(s).
2613 		 */
2614 		if (CHANGED(uid))
2615 			newmode &= ~S_ISUID;
2616 		if (CHANGED(gid))
2617 			newmode &= ~S_ISGID;
2618 	}
2619 	/* Update va_mode iff altered. */
2620 	if (vattr.va_mode == newmode)
2621 		newmode = VNOVAL;
2622 
2623 	VATTR_NULL(&vattr);
2624 	vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2625 	vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2626 	vattr.va_mode = newmode;
2627 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2628 #undef CHANGED
2629 
2630 out:
2631 	VOP_UNLOCK(vp, 0);
2632 	return (error);
2633 }
2634 
2635 /*
2636  * Set the access and modification times given a path name; this
2637  * version follows links.
2638  */
2639 /* ARGSUSED */
2640 int
2641 sys_utimes(l, v, retval)
2642 	struct lwp *l;
2643 	void *v;
2644 	register_t *retval;
2645 {
2646 	struct sys_utimes_args /* {
2647 		syscallarg(const char *) path;
2648 		syscallarg(const struct timeval *) tptr;
2649 	} */ *uap = v;
2650 	struct proc *p = l->l_proc;
2651 	int error;
2652 	struct nameidata nd;
2653 
2654 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2655 	if ((error = namei(&nd)) != 0)
2656 		return (error);
2657 
2658 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
2659 
2660 	vrele(nd.ni_vp);
2661 	return (error);
2662 }
2663 
2664 /*
2665  * Set the access and modification times given a file descriptor.
2666  */
2667 /* ARGSUSED */
2668 int
2669 sys_futimes(l, v, retval)
2670 	struct lwp *l;
2671 	void *v;
2672 	register_t *retval;
2673 {
2674 	struct sys_futimes_args /* {
2675 		syscallarg(int) fd;
2676 		syscallarg(const struct timeval *) tptr;
2677 	} */ *uap = v;
2678 	struct proc *p = l->l_proc;
2679 	int error;
2680 	struct file *fp;
2681 
2682 	/* getvnode() will use the descriptor for us */
2683 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2684 		return (error);
2685 
2686 	error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), p);
2687 	FILE_UNUSE(fp, p);
2688 	return (error);
2689 }
2690 
2691 /*
2692  * Set the access and modification times given a path name; this
2693  * version does not follow links.
2694  */
2695 /* ARGSUSED */
2696 int
2697 sys_lutimes(l, v, retval)
2698 	struct lwp *l;
2699 	void *v;
2700 	register_t *retval;
2701 {
2702 	struct sys_lutimes_args /* {
2703 		syscallarg(const char *) path;
2704 		syscallarg(const struct timeval *) tptr;
2705 	} */ *uap = v;
2706 	struct proc *p = l->l_proc;
2707 	int error;
2708 	struct nameidata nd;
2709 
2710 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2711 	if ((error = namei(&nd)) != 0)
2712 		return (error);
2713 
2714 	error = change_utimes(nd.ni_vp, SCARG(uap, tptr), p);
2715 
2716 	vrele(nd.ni_vp);
2717 	return (error);
2718 }
2719 
2720 /*
2721  * Common routine to set access and modification times given a vnode.
2722  */
2723 static int
2724 change_utimes(vp, tptr, p)
2725 	struct vnode *vp;
2726 	const struct timeval *tptr;
2727 	struct proc *p;
2728 {
2729 	struct timeval tv[2];
2730 	struct vattr vattr;
2731 	int error;
2732 
2733 	VATTR_NULL(&vattr);
2734 	if (tptr == NULL) {
2735 		microtime(&tv[0]);
2736 		tv[1] = tv[0];
2737 		vattr.va_vaflags |= VA_UTIMES_NULL;
2738 	} else {
2739 		error = copyin(tptr, tv, sizeof(tv));
2740 		if (error)
2741 			return (error);
2742 	}
2743 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2744 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2745 	vattr.va_atime.tv_sec = tv[0].tv_sec;
2746 	vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
2747 	vattr.va_mtime.tv_sec = tv[1].tv_sec;
2748 	vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
2749 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2750 	VOP_UNLOCK(vp, 0);
2751 	return (error);
2752 }
2753 
2754 /*
2755  * Truncate a file given its path name.
2756  */
2757 /* ARGSUSED */
2758 int
2759 sys_truncate(l, v, retval)
2760 	struct lwp *l;
2761 	void *v;
2762 	register_t *retval;
2763 {
2764 	struct sys_truncate_args /* {
2765 		syscallarg(const char *) path;
2766 		syscallarg(int) pad;
2767 		syscallarg(off_t) length;
2768 	} */ *uap = v;
2769 	struct proc *p = l->l_proc;
2770 	struct vnode *vp;
2771 	struct vattr vattr;
2772 	int error;
2773 	struct nameidata nd;
2774 
2775 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2776 	if ((error = namei(&nd)) != 0)
2777 		return (error);
2778 	vp = nd.ni_vp;
2779 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2780 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2781 	if (vp->v_type == VDIR)
2782 		error = EISDIR;
2783 	else if ((error = vn_writechk(vp)) == 0 &&
2784 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2785 		VATTR_NULL(&vattr);
2786 		vattr.va_size = SCARG(uap, length);
2787 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2788 	}
2789 	vput(vp);
2790 	return (error);
2791 }
2792 
2793 /*
2794  * Truncate a file given a file descriptor.
2795  */
2796 /* ARGSUSED */
2797 int
2798 sys_ftruncate(l, v, retval)
2799 	struct lwp *l;
2800 	void *v;
2801 	register_t *retval;
2802 {
2803 	struct sys_ftruncate_args /* {
2804 		syscallarg(int) fd;
2805 		syscallarg(int) pad;
2806 		syscallarg(off_t) length;
2807 	} */ *uap = v;
2808 	struct proc *p = l->l_proc;
2809 	struct vattr vattr;
2810 	struct vnode *vp;
2811 	struct file *fp;
2812 	int error;
2813 
2814 	/* getvnode() will use the descriptor for us */
2815 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2816 		return (error);
2817 	if ((fp->f_flag & FWRITE) == 0) {
2818 		error = EINVAL;
2819 		goto out;
2820 	}
2821 	vp = (struct vnode *)fp->f_data;
2822 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2823 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2824 	if (vp->v_type == VDIR)
2825 		error = EISDIR;
2826 	else if ((error = vn_writechk(vp)) == 0) {
2827 		VATTR_NULL(&vattr);
2828 		vattr.va_size = SCARG(uap, length);
2829 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2830 	}
2831 	VOP_UNLOCK(vp, 0);
2832  out:
2833 	FILE_UNUSE(fp, p);
2834 	return (error);
2835 }
2836 
2837 /*
2838  * Sync an open file.
2839  */
2840 /* ARGSUSED */
2841 int
2842 sys_fsync(l, v, retval)
2843 	struct lwp *l;
2844 	void *v;
2845 	register_t *retval;
2846 {
2847 	struct sys_fsync_args /* {
2848 		syscallarg(int) fd;
2849 	} */ *uap = v;
2850 	struct proc *p = l->l_proc;
2851 	struct vnode *vp;
2852 	struct file *fp;
2853 	int error;
2854 
2855 	/* getvnode() will use the descriptor for us */
2856 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2857 		return (error);
2858 	vp = (struct vnode *)fp->f_data;
2859 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2860 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, p);
2861 	if (error == 0 && bioops.io_fsync != NULL &&
2862 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2863 		(*bioops.io_fsync)(vp);
2864 	VOP_UNLOCK(vp, 0);
2865 	FILE_UNUSE(fp, p);
2866 	return (error);
2867 }
2868 
2869 /*
2870  * Sync the data of an open file.
2871  */
2872 /* ARGSUSED */
2873 int
2874 sys_fdatasync(l, v, retval)
2875 	struct lwp *l;
2876 	void *v;
2877 	register_t *retval;
2878 {
2879 	struct sys_fdatasync_args /* {
2880 		syscallarg(int) fd;
2881 	} */ *uap = v;
2882 	struct proc *p = l->l_proc;
2883 	struct vnode *vp;
2884 	struct file *fp;
2885 	int error;
2886 
2887 	/* getvnode() will use the descriptor for us */
2888 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2889 		return (error);
2890 	vp = (struct vnode *)fp->f_data;
2891 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2892 	error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, p);
2893 	VOP_UNLOCK(vp, 0);
2894 	FILE_UNUSE(fp, p);
2895 	return (error);
2896 }
2897 
2898 /*
2899  * Rename files, (standard) BSD semantics frontend.
2900  */
2901 /* ARGSUSED */
2902 int
2903 sys_rename(l, v, retval)
2904 	struct lwp *l;
2905 	void *v;
2906 	register_t *retval;
2907 {
2908 	struct sys_rename_args /* {
2909 		syscallarg(const char *) from;
2910 		syscallarg(const char *) to;
2911 	} */ *uap = v;
2912 	struct proc *p = l->l_proc;
2913 
2914 	return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 0));
2915 }
2916 
2917 /*
2918  * Rename files, POSIX semantics frontend.
2919  */
2920 /* ARGSUSED */
2921 int
2922 sys___posix_rename(l, v, retval)
2923 	struct lwp *l;
2924 	void *v;
2925 	register_t *retval;
2926 {
2927 	struct sys___posix_rename_args /* {
2928 		syscallarg(const char *) from;
2929 		syscallarg(const char *) to;
2930 	} */ *uap = v;
2931 	struct proc *p = l->l_proc;
2932 
2933 	return (rename_files(SCARG(uap, from), SCARG(uap, to), p, 1));
2934 }
2935 
2936 /*
2937  * Rename files.  Source and destination must either both be directories,
2938  * or both not be directories.  If target is a directory, it must be empty.
2939  * If `from' and `to' refer to the same object, the value of the `retain'
2940  * argument is used to determine whether `from' will be
2941  *
2942  * (retain == 0)	deleted unless `from' and `to' refer to the same
2943  *			object in the file system's name space (BSD).
2944  * (retain == 1)	always retained (POSIX).
2945  */
2946 static int
2947 rename_files(from, to, p, retain)
2948 	const char *from, *to;
2949 	struct proc *p;
2950 	int retain;
2951 {
2952 	struct vnode *tvp, *fvp, *tdvp;
2953 	struct nameidata fromnd, tond;
2954 	int error;
2955 
2956 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2957 	    from, p);
2958 	if ((error = namei(&fromnd)) != 0)
2959 		return (error);
2960 	fvp = fromnd.ni_vp;
2961 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
2962 	    UIO_USERSPACE, to, p);
2963 	if ((error = namei(&tond)) != 0) {
2964 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2965 		vrele(fromnd.ni_dvp);
2966 		vrele(fvp);
2967 		goto out1;
2968 	}
2969 	tdvp = tond.ni_dvp;
2970 	tvp = tond.ni_vp;
2971 
2972 	if (tvp != NULL) {
2973 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2974 			error = ENOTDIR;
2975 			goto out;
2976 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2977 			error = EISDIR;
2978 			goto out;
2979 		}
2980 	}
2981 
2982 	if (fvp == tdvp)
2983 		error = EINVAL;
2984 
2985 	/*
2986 	 * Source and destination refer to the same object.
2987 	 */
2988 	if (fvp == tvp) {
2989 		if (retain)
2990 			error = -1;
2991 		else if (fromnd.ni_dvp == tdvp &&
2992 		    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2993 		    !memcmp(fromnd.ni_cnd.cn_nameptr,
2994 		          tond.ni_cnd.cn_nameptr,
2995 		          fromnd.ni_cnd.cn_namelen))
2996 		error = -1;
2997 	}
2998 
2999 out:
3000 	if (!error) {
3001 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
3002 		if (fromnd.ni_dvp != tdvp)
3003 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3004 		if (tvp) {
3005 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
3006 		}
3007 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3008 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3009 	} else {
3010 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3011 		if (tdvp == tvp)
3012 			vrele(tdvp);
3013 		else
3014 			vput(tdvp);
3015 		if (tvp)
3016 			vput(tvp);
3017 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3018 		vrele(fromnd.ni_dvp);
3019 		vrele(fvp);
3020 	}
3021 	vrele(tond.ni_startdir);
3022 	PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3023 out1:
3024 	if (fromnd.ni_startdir)
3025 		vrele(fromnd.ni_startdir);
3026 	PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3027 	return (error == -1 ? 0 : error);
3028 }
3029 
3030 /*
3031  * Make a directory file.
3032  */
3033 /* ARGSUSED */
3034 int
3035 sys_mkdir(l, v, retval)
3036 	struct lwp *l;
3037 	void *v;
3038 	register_t *retval;
3039 {
3040 	struct sys_mkdir_args /* {
3041 		syscallarg(const char *) path;
3042 		syscallarg(int) mode;
3043 	} */ *uap = v;
3044 	struct proc *p = l->l_proc;
3045 	struct vnode *vp;
3046 	struct vattr vattr;
3047 	int error;
3048 	struct nameidata nd;
3049 
3050 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
3051 	if ((error = namei(&nd)) != 0)
3052 		return (error);
3053 	vp = nd.ni_vp;
3054 	if (vp != NULL) {
3055 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3056 		if (nd.ni_dvp == vp)
3057 			vrele(nd.ni_dvp);
3058 		else
3059 			vput(nd.ni_dvp);
3060 		vrele(vp);
3061 		return (EEXIST);
3062 	}
3063 	VATTR_NULL(&vattr);
3064 	vattr.va_type = VDIR;
3065 	vattr.va_mode =
3066 	    (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3067 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3068 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3069 	if (!error)
3070 		vput(nd.ni_vp);
3071 	return (error);
3072 }
3073 
3074 /*
3075  * Remove a directory file.
3076  */
3077 /* ARGSUSED */
3078 int
3079 sys_rmdir(l, v, retval)
3080 	struct lwp *l;
3081 	void *v;
3082 	register_t *retval;
3083 {
3084 	struct sys_rmdir_args /* {
3085 		syscallarg(const char *) path;
3086 	} */ *uap = v;
3087 	struct proc *p = l->l_proc;
3088 	struct vnode *vp;
3089 	int error;
3090 	struct nameidata nd;
3091 
3092 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3093 	    SCARG(uap, path), p);
3094 	if ((error = namei(&nd)) != 0)
3095 		return (error);
3096 	vp = nd.ni_vp;
3097 	if (vp->v_type != VDIR) {
3098 		error = ENOTDIR;
3099 		goto out;
3100 	}
3101 	/*
3102 	 * No rmdir "." please.
3103 	 */
3104 	if (nd.ni_dvp == vp) {
3105 		error = EINVAL;
3106 		goto out;
3107 	}
3108 	/*
3109 	 * The root of a mounted filesystem cannot be deleted.
3110 	 */
3111 	if (vp->v_flag & VROOT)
3112 		error = EBUSY;
3113 out:
3114 	if (!error) {
3115 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3116 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3117 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3118 	} else {
3119 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3120 		if (nd.ni_dvp == vp)
3121 			vrele(nd.ni_dvp);
3122 		else
3123 			vput(nd.ni_dvp);
3124 		vput(vp);
3125 	}
3126 	return (error);
3127 }
3128 
3129 /*
3130  * Read a block of directory entries in a file system independent format.
3131  */
3132 int
3133 sys_getdents(l, v, retval)
3134 	struct lwp *l;
3135 	void *v;
3136 	register_t *retval;
3137 {
3138 	struct sys_getdents_args /* {
3139 		syscallarg(int) fd;
3140 		syscallarg(char *) buf;
3141 		syscallarg(size_t) count;
3142 	} */ *uap = v;
3143 	struct proc *p = l->l_proc;
3144 	struct file *fp;
3145 	int error, done;
3146 
3147 	/* getvnode() will use the descriptor for us */
3148 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3149 		return (error);
3150 	if ((fp->f_flag & FREAD) == 0) {
3151 		error = EBADF;
3152 		goto out;
3153 	}
3154 	error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3155 			SCARG(uap, count), &done, p, 0, 0);
3156 	*retval = done;
3157  out:
3158 	FILE_UNUSE(fp, p);
3159 	return (error);
3160 }
3161 
3162 /*
3163  * Set the mode mask for creation of filesystem nodes.
3164  */
3165 int
3166 sys_umask(l, v, retval)
3167 	struct lwp *l;
3168 	void *v;
3169 	register_t *retval;
3170 {
3171 	struct sys_umask_args /* {
3172 		syscallarg(mode_t) newmask;
3173 	} */ *uap = v;
3174 	struct proc *p = l->l_proc;
3175 	struct cwdinfo *cwdi;
3176 
3177 	cwdi = p->p_cwdi;
3178 	*retval = cwdi->cwdi_cmask;
3179 	cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3180 	return (0);
3181 }
3182 
3183 /*
3184  * Void all references to file by ripping underlying filesystem
3185  * away from vnode.
3186  */
3187 /* ARGSUSED */
3188 int
3189 sys_revoke(l, v, retval)
3190 	struct lwp *l;
3191 	void *v;
3192 	register_t *retval;
3193 {
3194 	struct sys_revoke_args /* {
3195 		syscallarg(const char *) path;
3196 	} */ *uap = v;
3197 	struct proc *p = l->l_proc;
3198 	struct vnode *vp;
3199 	struct vattr vattr;
3200 	int error;
3201 	struct nameidata nd;
3202 
3203 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3204 	if ((error = namei(&nd)) != 0)
3205 		return (error);
3206 	vp = nd.ni_vp;
3207 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3208 		goto out;
3209 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3210 	    (error = suser(p->p_ucred, &p->p_acflag)) != 0)
3211 		goto out;
3212 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3213 		VOP_REVOKE(vp, REVOKEALL);
3214 out:
3215 	vrele(vp);
3216 	return (error);
3217 }
3218 
3219 /*
3220  * Convert a user file descriptor to a kernel file entry.
3221  */
3222 int
3223 getvnode(fdp, fd, fpp)
3224 	struct filedesc *fdp;
3225 	int fd;
3226 	struct file **fpp;
3227 {
3228 	struct vnode *vp;
3229 	struct file *fp;
3230 
3231 	if ((fp = fd_getfile(fdp, fd)) == NULL)
3232 		return (EBADF);
3233 
3234 	FILE_USE(fp);
3235 
3236 	if (fp->f_type != DTYPE_VNODE) {
3237 		FILE_UNUSE(fp, NULL);
3238 		return (EINVAL);
3239 	}
3240 
3241 	vp = (struct vnode *)fp->f_data;
3242 	if (vp->v_type == VBAD) {
3243 		FILE_UNUSE(fp, NULL);
3244 		return (EBADF);
3245 	}
3246 
3247 	*fpp = fp;
3248 	return (0);
3249 }
3250