xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.152 2008/11/11 02:11:25 tedu Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/uio.h>
51 #include <sys/malloc.h>
52 #include <sys/pool.h>
53 #include <sys/dirent.h>
54 #include <sys/dkio.h>
55 #include <sys/disklabel.h>
56 
57 #include <sys/syscallargs.h>
58 
59 #include <uvm/uvm_extern.h>
60 #include <sys/sysctl.h>
61 
62 extern int suid_clear;
63 int	usermount = 0;		/* sysctl: by default, users may not mount */
64 
65 static int change_dir(struct nameidata *, struct proc *);
66 
67 void checkdirs(struct vnode *);
68 
69 int copyout_statfs(struct statfs *, void *, struct proc *);
70 
71 /*
72  * Virtual File System System Calls
73  */
74 
75 /*
76  * Mount a file system.
77  */
78 /* ARGSUSED */
79 int
80 sys_mount(struct proc *p, void *v, register_t *retval)
81 {
82 	struct sys_mount_args /* {
83 		syscallarg(const char *) type;
84 		syscallarg(const char *) path;
85 		syscallarg(int) flags;
86 		syscallarg(void *) data;
87 	} */ *uap = v;
88 	struct vnode *vp;
89 	struct mount *mp;
90 	int error, flag = 0;
91 #ifdef COMPAT_43
92 	u_long fstypenum = 0;
93 #endif
94 	char fstypename[MFSNAMELEN];
95 	char fspath[MNAMELEN];
96 	struct vattr va;
97 	struct nameidata nd;
98 	struct vfsconf *vfsp;
99 
100 	if (usermount == 0 && (error = suser(p, 0)))
101 		return (error);
102 
103 	/*
104 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
105 	 */
106 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
107 	if (error)
108 		return(error);
109 
110 	/*
111 	 * Get vnode to be covered
112 	 */
113 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
114 	if ((error = namei(&nd)) != 0)
115 		return (error);
116 	vp = nd.ni_vp;
117 	if (SCARG(uap, flags) & MNT_UPDATE) {
118 		if ((vp->v_flag & VROOT) == 0) {
119 			vput(vp);
120 			return (EINVAL);
121 		}
122 		mp = vp->v_mount;
123 		flag = mp->mnt_flag;
124 		/*
125 		 * We only allow the filesystem to be reloaded if it
126 		 * is currently mounted read-only.
127 		 */
128 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
129 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
130 			vput(vp);
131 			return (EOPNOTSUPP);	/* Needs translation */
132 		}
133 
134 		/*
135 		 * Only root, or the user that did the original mount is
136 		 * permitted to update it.
137 		 */
138 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
139 		    (error = suser(p, 0))) {
140 			vput(vp);
141 			return (error);
142 		}
143 		/*
144 		 * Do not allow NFS export by non-root users. Silently
145 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and
146 		 * inherit MNT_NOEXEC from the mount point.
147 		 */
148 		if (p->p_ucred->cr_uid != 0) {
149 			if (SCARG(uap, flags) & MNT_EXPORTED) {
150 				vput(vp);
151 				return (EPERM);
152 			}
153 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
154 			if (flag & MNT_NOEXEC)
155 				SCARG(uap, flags) |= MNT_NOEXEC;
156 		}
157 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
158 			vput(vp);
159 			return (error);
160 		}
161 		mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_UPDATE);
162 		goto update;
163 	}
164 	/*
165 	 * If the user is not root, ensure that they own the directory
166 	 * onto which we are attempting to mount.
167 	 */
168 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
169 	    (va.va_uid != p->p_ucred->cr_uid &&
170 	    (error = suser(p, 0)))) {
171 		vput(vp);
172 		return (error);
173 	}
174 	/*
175 	 * Do not allow NFS export by non-root users. Silently
176 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and inherit
177 	 * MNT_NOEXEC from the mount point.
178 	 */
179 	if (p->p_ucred->cr_uid != 0) {
180 		if (SCARG(uap, flags) & MNT_EXPORTED) {
181 			vput(vp);
182 			return (EPERM);
183 		}
184 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
185 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
186 			SCARG(uap, flags) |= MNT_NOEXEC;
187 	}
188 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
189 		vput(vp);
190 		return (error);
191 	}
192 	if (vp->v_type != VDIR) {
193 		vput(vp);
194 		return (ENOTDIR);
195 	}
196 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
197 	if (error) {
198 #ifdef COMPAT_43
199 		/*
200 		 * Historically filesystem types were identified by number.
201 		 * If we get an integer for the filesystem type instead of a
202 		 * string, we check to see if it matches one of the historic
203 		 * filesystem types.
204 		 */
205 		fstypenum = (u_long)SCARG(uap, type);
206 
207 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
208 			if (vfsp->vfc_typenum == fstypenum)
209 				break;
210 		if (vfsp == NULL) {
211 			vput(vp);
212 			return (ENODEV);
213 		}
214 		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
215 
216 #else
217 		vput(vp);
218 		return (error);
219 #endif
220 	}
221 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
222 		if (!strcmp(vfsp->vfc_name, fstypename))
223 			break;
224 	}
225 
226 	if (vfsp == NULL) {
227 		vput(vp);
228 		return (EOPNOTSUPP);
229 	}
230 
231 	if (vp->v_mountedhere != NULL) {
232 		vput(vp);
233 		return (EBUSY);
234 	}
235 
236 	/*
237 	 * Allocate and initialize the file system.
238 	 */
239 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
240 		M_MOUNT, M_WAITOK|M_ZERO);
241 	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
242 	mp->mnt_op = vfsp->vfc_vfsops;
243 	mp->mnt_vfc = vfsp;
244 	mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
245 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
246 	mp->mnt_vnodecovered = vp;
247 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
248 update:
249 	/*
250 	 * Set the mount level flags.
251 	 */
252 	if (SCARG(uap, flags) & MNT_RDONLY)
253 		mp->mnt_flag |= MNT_RDONLY;
254 	else if (mp->mnt_flag & MNT_RDONLY)
255 		mp->mnt_flag |= MNT_WANTRDWR;
256 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
257 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
258 	    MNT_FORCE);
259 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
260 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
261 	    MNT_NOATIME | MNT_FORCE);
262 	/*
263 	 * Mount the filesystem.
264 	 */
265 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
266 	if (!error) {
267 		mp->mnt_stat.f_ctime = time_second;
268 	}
269 	if (mp->mnt_flag & MNT_UPDATE) {
270 		vput(vp);
271 		if (mp->mnt_flag & MNT_WANTRDWR)
272 			mp->mnt_flag &= ~MNT_RDONLY;
273 		mp->mnt_flag &=~
274 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
275 		if (error)
276 			mp->mnt_flag = flag;
277 
278  		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
279  			if (mp->mnt_syncer == NULL)
280  				error = vfs_allocate_syncvnode(mp);
281  		} else {
282  			if (mp->mnt_syncer != NULL)
283  				vgone(mp->mnt_syncer);
284  			mp->mnt_syncer = NULL;
285  		}
286 
287 		vfs_unbusy(mp);
288 		return (error);
289 	}
290 
291 	vp->v_mountedhere = mp;
292 
293 	/*
294 	 * Put the new filesystem on the mount list after root.
295 	 */
296 	cache_purge(vp);
297 	if (!error) {
298 		vfsp->vfc_refcount++;
299 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
300 		checkdirs(vp);
301 		VOP_UNLOCK(vp, 0, p);
302  		if ((mp->mnt_flag & MNT_RDONLY) == 0)
303  			error = vfs_allocate_syncvnode(mp);
304 		vfs_unbusy(mp);
305 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
306 		if ((error = VFS_START(mp, 0, p)) != 0)
307 			vrele(vp);
308 	} else {
309 		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
310 		vfs_unbusy(mp);
311 		free(mp, M_MOUNT);
312 		vput(vp);
313 	}
314 	return (error);
315 }
316 
317 /*
318  * Scan all active processes to see if any of them have a current
319  * or root directory onto which the new filesystem has just been
320  * mounted. If so, replace them with the new mount point.
321  */
322 void
323 checkdirs(struct vnode *olddp)
324 {
325 	struct filedesc *fdp;
326 	struct vnode *newdp, *vp;
327 	struct proc *p;
328 
329 	if (olddp->v_usecount == 1)
330 		return;
331 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
332 		panic("mount: lost mount");
333 again:
334 	LIST_FOREACH(p, &allproc, p_list) {
335 		fdp = p->p_fd;
336 		if (fdp->fd_cdir == olddp) {
337 			vp = fdp->fd_cdir;
338 			VREF(newdp);
339 			fdp->fd_cdir = newdp;
340 			if (vrele(vp))
341 				goto again;
342 		}
343 		if (fdp->fd_rdir == olddp) {
344 			vp = fdp->fd_rdir;
345 			VREF(newdp);
346 			fdp->fd_rdir = newdp;
347 			if (vrele(vp))
348 				goto again;
349 		}
350 	}
351 	if (rootvnode == olddp) {
352 		vrele(rootvnode);
353 		VREF(newdp);
354 		rootvnode = newdp;
355 	}
356 	vput(newdp);
357 }
358 
359 /*
360  * Unmount a file system.
361  *
362  * Note: unmount takes a path to the vnode mounted on as argument,
363  * not special file (as before).
364  */
365 /* ARGSUSED */
366 int
367 sys_unmount(struct proc *p, void *v, register_t *retval)
368 {
369 	struct sys_unmount_args /* {
370 		syscallarg(const char *) path;
371 		syscallarg(int) flags;
372 	} */ *uap = v;
373 	struct vnode *vp;
374 	struct mount *mp;
375 	int error;
376 	struct nameidata nd;
377 
378 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
379 	    SCARG(uap, path), p);
380 	if ((error = namei(&nd)) != 0)
381 		return (error);
382 	vp = nd.ni_vp;
383 	mp = vp->v_mount;
384 
385 	/*
386 	 * Only root, or the user that did the original mount is
387 	 * permitted to unmount this filesystem.
388 	 */
389 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
390 	    (error = suser(p, 0))) {
391 		vput(vp);
392 		return (error);
393 	}
394 
395 	/*
396 	 * Don't allow unmounting the root file system.
397 	 */
398 	if (mp->mnt_flag & MNT_ROOTFS) {
399 		vput(vp);
400 		return (EINVAL);
401 	}
402 
403 	/*
404 	 * Must be the root of the filesystem
405 	 */
406 	if ((vp->v_flag & VROOT) == 0) {
407 		vput(vp);
408 		return (EINVAL);
409 	}
410 	vput(vp);
411 
412 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
413 		return (EBUSY);
414 
415 	return (dounmount(mp, SCARG(uap, flags), p, vp));
416 }
417 
418 /*
419  * Do the actual file system unmount.
420  */
421 int
422 dounmount(struct mount *mp, int flags, struct proc *p, struct vnode *olddp)
423 {
424 	struct vnode *coveredvp;
425 	int error;
426 	int hadsyncer = 0;
427 
428  	mp->mnt_flag &=~ MNT_ASYNC;
429  	cache_purgevfs(mp);	/* remove cache entries for this file sys */
430  	if (mp->mnt_syncer != NULL) {
431 		hadsyncer = 1;
432  		vgone(mp->mnt_syncer);
433 		mp->mnt_syncer = NULL;
434 	}
435 	if (((mp->mnt_flag & MNT_RDONLY) ||
436 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
437  	    (flags & MNT_FORCE))
438  		error = VFS_UNMOUNT(mp, flags, p);
439 
440  	if (error && error != EIO && !(flags & MNT_DOOMED)) {
441  		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
442  			(void) vfs_allocate_syncvnode(mp);
443 		vfs_unbusy(mp);
444 		return (error);
445 	}
446 
447 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
448 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
449 		coveredvp->v_mountedhere = NULL;
450  		vrele(coveredvp);
451  	}
452 
453 	mp->mnt_vfc->vfc_refcount--;
454 
455 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
456 		panic("unmount: dangling vnode");
457 
458 	vfs_unbusy(mp);
459 	free(mp, M_MOUNT);
460 
461 	return (0);
462 }
463 
464 /*
465  * Sync each mounted filesystem.
466  */
467 #ifdef DEBUG
468 int syncprt = 0;
469 struct ctldebug debug0 = { "syncprt", &syncprt };
470 #endif
471 
472 /* ARGSUSED */
473 int
474 sys_sync(struct proc *p, void *v, register_t *retval)
475 {
476 	struct mount *mp, *nmp;
477 	int asyncflag;
478 
479 	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
480 	    mp = nmp) {
481 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
482 			nmp = CIRCLEQ_PREV(mp, mnt_list);
483 			continue;
484 		}
485 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
486 			asyncflag = mp->mnt_flag & MNT_ASYNC;
487 			mp->mnt_flag &= ~MNT_ASYNC;
488 			uvm_vnp_sync(mp);
489 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
490 			if (asyncflag)
491 				mp->mnt_flag |= MNT_ASYNC;
492 		}
493 		nmp = CIRCLEQ_PREV(mp, mnt_list);
494 		vfs_unbusy(mp);
495 	}
496 
497 	return (0);
498 }
499 
500 /*
501  * Change filesystem quotas.
502  */
503 /* ARGSUSED */
504 int
505 sys_quotactl(struct proc *p, void *v, register_t *retval)
506 {
507 	struct sys_quotactl_args /* {
508 		syscallarg(const char *) path;
509 		syscallarg(int) cmd;
510 		syscallarg(int) uid;
511 		syscallarg(char *) arg;
512 	} */ *uap = v;
513 	struct mount *mp;
514 	int error;
515 	struct nameidata nd;
516 
517 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
518 	if ((error = namei(&nd)) != 0)
519 		return (error);
520 	mp = nd.ni_vp->v_mount;
521 	vrele(nd.ni_vp);
522 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
523 	    SCARG(uap, arg), p));
524 }
525 
526 int
527 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
528 {
529 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
530 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
531 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
532 	char *s, *d;
533 	int error;
534 
535 	/* Don't let non-root see filesystem id (for NFS security) */
536 	if (suser(p, 0)) {
537 		fsid_t fsid;
538 
539 		s = (char *)sp;
540 		d = (char *)uaddr;
541 
542 		memset(&fsid, 0, sizeof(fsid));
543 
544 		if ((error = copyout(s, d, co_sz1)) != 0)
545 			return (error);
546 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
547 			return (error);
548 		return (copyout(s + co_off2, d + co_off2, co_sz2));
549 	}
550 
551 	return (copyout(sp, uaddr, sizeof(*sp)));
552 }
553 
554 /*
555  * Get filesystem statistics.
556  */
557 /* ARGSUSED */
558 int
559 sys_statfs(struct proc *p, void *v, register_t *retval)
560 {
561 	struct sys_statfs_args /* {
562 		syscallarg(const char *) path;
563 		syscallarg(struct statfs *) buf;
564 	} */ *uap = v;
565 	struct mount *mp;
566 	struct statfs *sp;
567 	int error;
568 	struct nameidata nd;
569 
570 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
571 	if ((error = namei(&nd)) != 0)
572 		return (error);
573 	mp = nd.ni_vp->v_mount;
574 	sp = &mp->mnt_stat;
575 	vrele(nd.ni_vp);
576 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
577 		return (error);
578 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
579 
580 	return (copyout_statfs(sp, SCARG(uap, buf), p));
581 }
582 
583 /*
584  * Get filesystem statistics.
585  */
586 /* ARGSUSED */
587 int
588 sys_fstatfs(struct proc *p, void *v, register_t *retval)
589 {
590 	struct sys_fstatfs_args /* {
591 		syscallarg(int) fd;
592 		syscallarg(struct statfs *) buf;
593 	} */ *uap = v;
594 	struct file *fp;
595 	struct mount *mp;
596 	struct statfs *sp;
597 	int error;
598 
599 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
600 		return (error);
601 	mp = ((struct vnode *)fp->f_data)->v_mount;
602 	if (!mp) {
603 		FRELE(fp);
604 		return (ENOENT);
605 	}
606 	sp = &mp->mnt_stat;
607 	error = VFS_STATFS(mp, sp, p);
608 	FRELE(fp);
609 	if (error)
610 		return (error);
611 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
612 
613 	return (copyout_statfs(sp, SCARG(uap, buf), p));
614 }
615 
616 /*
617  * Get statistics on all filesystems.
618  */
619 int
620 sys_getfsstat(struct proc *p, void *v, register_t *retval)
621 {
622 	struct sys_getfsstat_args /* {
623 		syscallarg(struct statfs *) buf;
624 		syscallarg(size_t) bufsize;
625 		syscallarg(int) flags;
626 	} */ *uap = v;
627 	struct mount *mp, *nmp;
628 	struct statfs *sp;
629 	struct statfs *sfsp;
630 	size_t count, maxcount;
631 	int error, flags = SCARG(uap, flags);
632 
633 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
634 	sfsp = SCARG(uap, buf);
635 	count = 0;
636 
637 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
638 	    mp = nmp) {
639 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
640 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
641 			continue;
642 		}
643 		if (sfsp && count < maxcount) {
644 			sp = &mp->mnt_stat;
645 
646 			/* Refresh stats unless MNT_NOWAIT is specified */
647 			if (flags != MNT_NOWAIT &&
648 			    flags != MNT_LAZY &&
649 			    (flags == MNT_WAIT ||
650 			    flags == 0) &&
651 			    (error = VFS_STATFS(mp, sp, p))) {
652 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
653 				vfs_unbusy(mp);
654  				continue;
655 			}
656 
657 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
658 #if notyet
659 			if (mp->mnt_flag & MNT_SOFTDEP)
660 				sp->f_eflags = STATFS_SOFTUPD;
661 #endif
662 			error = (copyout_statfs(sp, sfsp, p));
663 			if (error) {
664 				vfs_unbusy(mp);
665 				return (error);
666 			}
667 			sfsp++;
668 		}
669 		count++;
670 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
671 		vfs_unbusy(mp);
672 	}
673 
674 	if (sfsp && count > maxcount)
675 		*retval = maxcount;
676 	else
677 		*retval = count;
678 
679 	return (0);
680 }
681 
682 /*
683  * Change current working directory to a given file descriptor.
684  */
685 /* ARGSUSED */
686 int
687 sys_fchdir(struct proc *p, void *v, register_t *retval)
688 {
689 	struct sys_fchdir_args /* {
690 		syscallarg(int) fd;
691 	} */ *uap = v;
692 	struct filedesc *fdp = p->p_fd;
693 	struct vnode *vp, *tdp;
694 	struct mount *mp;
695 	struct file *fp;
696 	int error;
697 
698 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
699 		return (error);
700 	vp = (struct vnode *)fp->f_data;
701 	VREF(vp);
702 	FRELE(fp);
703 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
704 	if (vp->v_type != VDIR)
705 		error = ENOTDIR;
706 	else
707 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
708 
709 	while (!error && (mp = vp->v_mountedhere) != NULL) {
710 		if (vfs_busy(mp, VB_READ|VB_WAIT))
711 			continue;
712 		error = VFS_ROOT(mp, &tdp);
713 		vfs_unbusy(mp);
714 		if (error)
715 			break;
716 		vput(vp);
717 		vp = tdp;
718 	}
719 	if (error) {
720 		vput(vp);
721 		return (error);
722 	}
723 	VOP_UNLOCK(vp, 0, p);
724 	vrele(fdp->fd_cdir);
725 	fdp->fd_cdir = vp;
726 	return (0);
727 }
728 
729 /*
730  * Change current working directory (``.'').
731  */
732 /* ARGSUSED */
733 int
734 sys_chdir(struct proc *p, void *v, register_t *retval)
735 {
736 	struct sys_chdir_args /* {
737 		syscallarg(const char *) path;
738 	} */ *uap = v;
739 	struct filedesc *fdp = p->p_fd;
740 	int error;
741 	struct nameidata nd;
742 
743 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
744 	    SCARG(uap, path), p);
745 	if ((error = change_dir(&nd, p)) != 0)
746 		return (error);
747 	vrele(fdp->fd_cdir);
748 	fdp->fd_cdir = nd.ni_vp;
749 	return (0);
750 }
751 
752 /*
753  * Change notion of root (``/'') directory.
754  */
755 /* ARGSUSED */
756 int
757 sys_chroot(struct proc *p, void *v, register_t *retval)
758 {
759 	struct sys_chroot_args /* {
760 		syscallarg(const char *) path;
761 	} */ *uap = v;
762 	struct filedesc *fdp = p->p_fd;
763 	int error;
764 	struct nameidata nd;
765 
766 	if ((error = suser(p, 0)) != 0)
767 		return (error);
768 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
769 	    SCARG(uap, path), p);
770 	if ((error = change_dir(&nd, p)) != 0)
771 		return (error);
772 	if (fdp->fd_rdir != NULL) {
773 		/*
774 		 * A chroot() done inside a changed root environment does
775 		 * an automatic chdir to avoid the out-of-tree experience.
776 		 */
777 		vrele(fdp->fd_rdir);
778 		vrele(fdp->fd_cdir);
779 		VREF(nd.ni_vp);
780 		fdp->fd_cdir = nd.ni_vp;
781 	}
782 	fdp->fd_rdir = nd.ni_vp;
783 	return (0);
784 }
785 
786 /*
787  * Common routine for chroot and chdir.
788  */
789 static int
790 change_dir(struct nameidata *ndp, struct proc *p)
791 {
792 	struct vnode *vp;
793 	int error;
794 
795 	if ((error = namei(ndp)) != 0)
796 		return (error);
797 	vp = ndp->ni_vp;
798 	if (vp->v_type != VDIR)
799 		error = ENOTDIR;
800 	else
801 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
802 	if (error)
803 		vput(vp);
804 	else
805 		VOP_UNLOCK(vp, 0, p);
806 	return (error);
807 }
808 
809 /*
810  * Check permissions, allocate an open file structure,
811  * and call the device open routine if any.
812  */
813 int
814 sys_open(struct proc *p, void *v, register_t *retval)
815 {
816 	struct sys_open_args /* {
817 		syscallarg(const char *) path;
818 		syscallarg(int) flags;
819 		syscallarg(mode_t) mode;
820 	} */ *uap = v;
821 	struct filedesc *fdp = p->p_fd;
822 	struct file *fp;
823 	struct vnode *vp;
824 	struct vattr vattr;
825 	int flags, cmode;
826 	int type, indx, error, localtrunc = 0;
827 	struct flock lf;
828 	struct nameidata nd;
829 
830 	fdplock(fdp);
831 
832 	if ((error = falloc(p, &fp, &indx)) != 0)
833 		goto out;
834 
835 	flags = FFLAGS(SCARG(uap, flags));
836 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
837 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
838 	p->p_dupfd = -1;			/* XXX check for fdopen */
839 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
840 		localtrunc = 1;
841 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
842 	}
843 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
844 		if ((error == ENODEV || error == ENXIO) &&
845 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
846 		    (error =
847 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
848 			closef(fp, p);
849 			*retval = indx;
850 			goto out;
851 		}
852 		if (error == ERESTART)
853 			error = EINTR;
854 		fdremove(fdp, indx);
855 		closef(fp, p);
856 		goto out;
857 	}
858 	p->p_dupfd = 0;
859 	vp = nd.ni_vp;
860 	fp->f_flag = flags & FMASK;
861 	fp->f_type = DTYPE_VNODE;
862 	fp->f_ops = &vnops;
863 	fp->f_data = vp;
864 	if (flags & (O_EXLOCK | O_SHLOCK)) {
865 		lf.l_whence = SEEK_SET;
866 		lf.l_start = 0;
867 		lf.l_len = 0;
868 		if (flags & O_EXLOCK)
869 			lf.l_type = F_WRLCK;
870 		else
871 			lf.l_type = F_RDLCK;
872 		type = F_FLOCK;
873 		if ((flags & FNONBLOCK) == 0)
874 			type |= F_WAIT;
875 		VOP_UNLOCK(vp, 0, p);
876 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
877 		if (error) {
878 			/* closef will vn_close the file for us. */
879 			fdremove(fdp, indx);
880 			closef(fp, p);
881 			goto out;
882 		}
883 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
884 		fp->f_flag |= FHASLOCK;
885 	}
886 	if (localtrunc) {
887 		if ((fp->f_flag & FWRITE) == 0)
888 			error = EACCES;
889 		else if (vp->v_mount->mnt_flag & MNT_RDONLY)
890 			error = EROFS;
891 		else if (vp->v_type == VDIR)
892 			error = EISDIR;
893 		else if ((error = vn_writechk(vp)) == 0) {
894 			VATTR_NULL(&vattr);
895 			vattr.va_size = 0;
896 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
897 		}
898 		if (error) {
899 			VOP_UNLOCK(vp, 0, p);
900 			/* closef will close the file for us. */
901 			fdremove(fdp, indx);
902 			closef(fp, p);
903 			goto out;
904 		}
905 	}
906 	VOP_UNLOCK(vp, 0, p);
907 	*retval = indx;
908 	FILE_SET_MATURE(fp);
909 out:
910 	fdpunlock(fdp);
911 	return (error);
912 }
913 
914 /*
915  * Get file handle system call
916  */
917 int
918 sys_getfh(struct proc *p, void *v, register_t *retval)
919 {
920 	struct sys_getfh_args /* {
921 		syscallarg(const char *) fname;
922 		syscallarg(fhandle_t *) fhp;
923 	} */ *uap = v;
924 	struct vnode *vp;
925 	fhandle_t fh;
926 	int error;
927 	struct nameidata nd;
928 
929 	/*
930 	 * Must be super user
931 	 */
932 	error = suser(p, 0);
933 	if (error)
934 		return (error);
935 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
936 	    SCARG(uap, fname), p);
937 	error = namei(&nd);
938 	if (error)
939 		return (error);
940 	vp = nd.ni_vp;
941 	bzero(&fh, sizeof(fh));
942 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
943 	error = VFS_VPTOFH(vp, &fh.fh_fid);
944 	vput(vp);
945 	if (error)
946 		return (error);
947 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
948 	return (error);
949 }
950 
951 /*
952  * Open a file given a file handle.
953  *
954  * Check permissions, allocate an open file structure,
955  * and call the device open routine if any.
956  */
957 int
958 sys_fhopen(struct proc *p, void *v, register_t *retval)
959 {
960 	struct sys_fhopen_args /* {
961 		syscallarg(const fhandle_t *) fhp;
962 		syscallarg(int) flags;
963 	} */ *uap = v;
964 	struct filedesc *fdp = p->p_fd;
965 	struct file *fp;
966 	struct vnode *vp = NULL;
967 	struct mount *mp;
968 	struct ucred *cred = p->p_ucred;
969 	int flags;
970 	int type, indx, error=0;
971 	struct flock lf;
972 	struct vattr va;
973 	fhandle_t fh;
974 
975 	/*
976 	 * Must be super user
977 	 */
978 	if ((error = suser(p, 0)))
979 		return (error);
980 
981 	flags = FFLAGS(SCARG(uap, flags));
982 	if ((flags & (FREAD | FWRITE)) == 0)
983 		return (EINVAL);
984 	if ((flags & O_CREAT))
985 		return (EINVAL);
986 
987 	fdplock(fdp);
988 	if ((error = falloc(p, &fp, &indx)) != 0) {
989 		fp = NULL;
990 		goto bad;
991 	}
992 
993 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
994 		goto bad;
995 
996 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
997 		error = ESTALE;
998 		goto bad;
999 	}
1000 
1001 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1002 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1003 		goto bad;
1004 	}
1005 
1006 	/* Now do an effective vn_open */
1007 
1008 	if (vp->v_type == VSOCK) {
1009 		error = EOPNOTSUPP;
1010 		goto bad;
1011 	}
1012 	if (flags & FREAD) {
1013 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1014 			goto bad;
1015 	}
1016 	if (flags & (FWRITE | O_TRUNC)) {
1017 		if (vp->v_type == VDIR) {
1018 			error = EISDIR;
1019 			goto bad;
1020 		}
1021 		if ((error = vn_writechk(vp)) != 0 ||
1022 		    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
1023 			goto bad;
1024 	}
1025 	if (flags & O_TRUNC) {
1026 		VATTR_NULL(&va);
1027 		va.va_size = 0;
1028 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1029 			goto bad;
1030 	}
1031 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1032 		goto bad;
1033 	if (flags & FWRITE)
1034 		vp->v_writecount++;
1035 
1036 	/* done with modified vn_open, now finish what sys_open does. */
1037 
1038 	fp->f_flag = flags & FMASK;
1039 	fp->f_type = DTYPE_VNODE;
1040 	fp->f_ops = &vnops;
1041 	fp->f_data = vp;
1042 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1043 		lf.l_whence = SEEK_SET;
1044 		lf.l_start = 0;
1045 		lf.l_len = 0;
1046 		if (flags & O_EXLOCK)
1047 			lf.l_type = F_WRLCK;
1048 		else
1049 			lf.l_type = F_RDLCK;
1050 		type = F_FLOCK;
1051 		if ((flags & FNONBLOCK) == 0)
1052 			type |= F_WAIT;
1053 		VOP_UNLOCK(vp, 0, p);
1054 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1055 		if (error) {
1056 			vp = NULL;	/* closef will vn_close the file */
1057 			goto bad;
1058 		}
1059 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1060 		fp->f_flag |= FHASLOCK;
1061 	}
1062 	VOP_UNLOCK(vp, 0, p);
1063 	*retval = indx;
1064 	FILE_SET_MATURE(fp);
1065 
1066 	fdpunlock(fdp);
1067 	return (0);
1068 
1069 bad:
1070 	if (fp) {
1071 		fdremove(fdp, indx);
1072 		closef(fp, p);
1073 		if (vp != NULL)
1074 			vput(vp);
1075 	}
1076 	fdpunlock(fdp);
1077 	return (error);
1078 }
1079 
1080 /* ARGSUSED */
1081 int
1082 sys_fhstat(struct proc *p, void *v, register_t *retval)
1083 {
1084 	struct sys_fhstat_args /* {
1085 		syscallarg(const fhandle_t *) fhp;
1086 		syscallarg(struct stat *) sb;
1087 	} */ *uap = v;
1088 	struct stat sb;
1089 	int error;
1090 	fhandle_t fh;
1091 	struct mount *mp;
1092 	struct vnode *vp;
1093 
1094 	/*
1095 	 * Must be super user
1096 	 */
1097 	if ((error = suser(p, 0)))
1098 		return (error);
1099 
1100 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1101 		return (error);
1102 
1103 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1104 		return (ESTALE);
1105 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1106 		return (error);
1107 	error = vn_stat(vp, &sb, p);
1108 	vput(vp);
1109 	if (error)
1110 		return (error);
1111 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1112 	return (error);
1113 }
1114 
1115 /* ARGSUSED */
1116 int
1117 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1118 {
1119 	struct sys_fhstatfs_args /*
1120 		syscallarg(const fhandle_t *) fhp;
1121 		syscallarg(struct statfs *) buf;
1122 	} */ *uap = v;
1123 	struct statfs *sp;
1124 	fhandle_t fh;
1125 	struct mount *mp;
1126 	struct vnode *vp;
1127 	int error;
1128 
1129 	/*
1130 	 * Must be super user
1131 	 */
1132 	if ((error = suser(p, 0)))
1133 		return (error);
1134 
1135 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1136 		return (error);
1137 
1138 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1139 		return (ESTALE);
1140 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1141 		return (error);
1142 	mp = vp->v_mount;
1143 	sp = &mp->mnt_stat;
1144 	vput(vp);
1145 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1146 		return (error);
1147 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1148 	return (copyout(sp, SCARG(uap, buf), sizeof(sp)));
1149 }
1150 
1151 /*
1152  * Create a special file.
1153  */
1154 /* ARGSUSED */
1155 int
1156 sys_mknod(struct proc *p, void *v, register_t *retval)
1157 {
1158 	struct sys_mknod_args /* {
1159 		syscallarg(const char *) path;
1160 		syscallarg(mode_t) mode;
1161 		syscallarg(int) dev;
1162 	} */ *uap = v;
1163 	struct vnode *vp;
1164 	struct vattr vattr;
1165 	int error;
1166 	struct nameidata nd;
1167 
1168 	if ((error = suser(p, 0)) != 0)
1169 		return (error);
1170 	if (p->p_fd->fd_rdir)
1171 		return (EINVAL);
1172 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1173 	if ((error = namei(&nd)) != 0)
1174 		return (error);
1175 	vp = nd.ni_vp;
1176 	if (vp != NULL)
1177 		error = EEXIST;
1178 	else {
1179 		VATTR_NULL(&vattr);
1180 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1181 		vattr.va_rdev = SCARG(uap, dev);
1182 
1183 		switch (SCARG(uap, mode) & S_IFMT) {
1184 		case S_IFMT:	/* used by badsect to flag bad sectors */
1185 			vattr.va_type = VBAD;
1186 			break;
1187 		case S_IFCHR:
1188 			vattr.va_type = VCHR;
1189 			break;
1190 		case S_IFBLK:
1191 			vattr.va_type = VBLK;
1192 			break;
1193 		default:
1194 			error = EINVAL;
1195 			break;
1196 		}
1197 	}
1198 	if (!error) {
1199 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1200 	} else {
1201 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1202 		if (nd.ni_dvp == vp)
1203 			vrele(nd.ni_dvp);
1204 		else
1205 			vput(nd.ni_dvp);
1206 		if (vp)
1207 			vrele(vp);
1208 	}
1209 	return (error);
1210 }
1211 
1212 /*
1213  * Create a named pipe.
1214  */
1215 /* ARGSUSED */
1216 int
1217 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1218 {
1219 #ifndef FIFO
1220 	return (EOPNOTSUPP);
1221 #else
1222 	struct sys_mkfifo_args /* {
1223 		syscallarg(const char *) path;
1224 		syscallarg(mode_t) mode;
1225 	} */ *uap = v;
1226 	struct vattr vattr;
1227 	int error;
1228 	struct nameidata nd;
1229 
1230 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1231 	if ((error = namei(&nd)) != 0)
1232 		return (error);
1233 	if (nd.ni_vp != NULL) {
1234 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1235 		if (nd.ni_dvp == nd.ni_vp)
1236 			vrele(nd.ni_dvp);
1237 		else
1238 			vput(nd.ni_dvp);
1239 		vrele(nd.ni_vp);
1240 		return (EEXIST);
1241 	}
1242 	VATTR_NULL(&vattr);
1243 	vattr.va_type = VFIFO;
1244 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1245 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
1246 #endif /* FIFO */
1247 }
1248 
1249 /*
1250  * Make a hard file link.
1251  */
1252 /* ARGSUSED */
1253 int
1254 sys_link(struct proc *p, void *v, register_t *retval)
1255 {
1256 	struct sys_link_args /* {
1257 		syscallarg(const char *) path;
1258 		syscallarg(const char *) link;
1259 	} */ *uap = v;
1260 	struct vnode *vp;
1261 	struct nameidata nd;
1262 	int error;
1263 	int flags;
1264 
1265 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1266 	if ((error = namei(&nd)) != 0)
1267 		return (error);
1268 	vp = nd.ni_vp;
1269 
1270 	flags = LOCKPARENT;
1271 	if (vp->v_type == VDIR) {
1272 		flags |= STRIPSLASHES;
1273 	}
1274 
1275 	NDINIT(&nd, CREATE, flags, UIO_USERSPACE, SCARG(uap, link), p);
1276 	if ((error = namei(&nd)) != 0)
1277 		goto out;
1278 	if (nd.ni_vp) {
1279 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1280 		if (nd.ni_dvp == nd.ni_vp)
1281 			vrele(nd.ni_dvp);
1282 		else
1283 			vput(nd.ni_dvp);
1284 		vrele(nd.ni_vp);
1285 		error = EEXIST;
1286 		goto out;
1287 	}
1288 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1289 out:
1290 	vrele(vp);
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Make a symbolic link.
1296  */
1297 /* ARGSUSED */
1298 int
1299 sys_symlink(struct proc *p, void *v, register_t *retval)
1300 {
1301 	struct sys_symlink_args /* {
1302 		syscallarg(const char *) path;
1303 		syscallarg(const char *) link;
1304 	} */ *uap = v;
1305 	struct vattr vattr;
1306 	char *path;
1307 	int error;
1308 	struct nameidata nd;
1309 
1310 	path = pool_get(&namei_pool, PR_WAITOK);
1311 	error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1312 	if (error)
1313 		goto out;
1314 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
1315 	if ((error = namei(&nd)) != 0)
1316 		goto out;
1317 	if (nd.ni_vp) {
1318 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1319 		if (nd.ni_dvp == nd.ni_vp)
1320 			vrele(nd.ni_dvp);
1321 		else
1322 			vput(nd.ni_dvp);
1323 		vrele(nd.ni_vp);
1324 		error = EEXIST;
1325 		goto out;
1326 	}
1327 	VATTR_NULL(&vattr);
1328 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1329 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1330 out:
1331 	pool_put(&namei_pool, path);
1332 	return (error);
1333 }
1334 
1335 /*
1336  * Delete a name from the filesystem.
1337  */
1338 /* ARGSUSED */
1339 int
1340 sys_unlink(struct proc *p, void *v, register_t *retval)
1341 {
1342 	struct sys_unlink_args /* {
1343 		syscallarg(const char *) path;
1344 	} */ *uap = v;
1345 	struct vnode *vp;
1346 	int error;
1347 	struct nameidata nd;
1348 
1349 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1350 	    SCARG(uap, path), p);
1351 	if ((error = namei(&nd)) != 0)
1352 		return (error);
1353 	vp = nd.ni_vp;
1354 
1355 	/*
1356 	 * The root of a mounted filesystem cannot be deleted.
1357 	 */
1358 	if (vp->v_flag & VROOT) {
1359 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1360 		if (nd.ni_dvp == vp)
1361 			vrele(nd.ni_dvp);
1362 		else
1363 			vput(nd.ni_dvp);
1364 		vput(vp);
1365 		error = EBUSY;
1366 		goto out;
1367 	}
1368 
1369 	(void)uvm_vnp_uncache(vp);
1370 
1371 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1372 out:
1373 	return (error);
1374 }
1375 
1376 /*
1377  * Reposition read/write file offset.
1378  */
1379 int
1380 sys_lseek(struct proc *p, void *v, register_t *retval)
1381 {
1382 	struct sys_lseek_args /* {
1383 		syscallarg(int) fd;
1384 		syscallarg(int) pad;
1385 		syscallarg(off_t) offset;
1386 		syscallarg(int) whence;
1387 	} */ *uap = v;
1388 	struct ucred *cred = p->p_ucred;
1389 	struct filedesc *fdp = p->p_fd;
1390 	struct file *fp;
1391 	struct vattr vattr;
1392 	struct vnode *vp;
1393 	off_t offarg, newoff;
1394 	int error, special;
1395 
1396 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1397 		return (EBADF);
1398 	if (fp->f_type != DTYPE_VNODE)
1399 		return (ESPIPE);
1400 	vp = (struct vnode *)fp->f_data;
1401 	if (vp->v_type == VFIFO)
1402 		return (ESPIPE);
1403 	if (vp->v_type == VCHR)
1404 		special = 1;
1405 	else
1406 		special = 0;
1407 	offarg = SCARG(uap, offset);
1408 
1409 	switch (SCARG(uap, whence)) {
1410 	case SEEK_CUR:
1411 		newoff = fp->f_offset + offarg;;
1412 		break;
1413 	case SEEK_END:
1414 		error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr,
1415 				    cred, p);
1416 		if (error)
1417 			return (error);
1418 		newoff = offarg + (off_t)vattr.va_size;
1419 		break;
1420 	case SEEK_SET:
1421 		newoff = offarg;
1422 		break;
1423 	default:
1424 		return (EINVAL);
1425 	}
1426 	if (!special) {
1427 		if (newoff < 0)
1428 			return (EINVAL);
1429 	} else {
1430 		/*
1431 		 * Make sure the user don't seek beyond the end of the
1432 		 * partition.
1433 		 */
1434 		struct partinfo dpart;
1435 		error = vn_ioctl(fp, DIOCGPART, (void *)&dpart, p);
1436 		if (!error) {
1437 			if (newoff >= DL_GETPSIZE(dpart.part) *
1438 			    dpart.disklab->d_secsize)
1439 					return (EINVAL);
1440 		}
1441 	}
1442 	*(off_t *)retval = fp->f_offset = newoff;
1443 	fp->f_seek++;
1444 	return (0);
1445 }
1446 
1447 /*
1448  * Check access permissions.
1449  */
1450 int
1451 sys_access(struct proc *p, void *v, register_t *retval)
1452 {
1453 	struct sys_access_args /* {
1454 		syscallarg(const char *) path;
1455 		syscallarg(int) flags;
1456 	} */ *uap = v;
1457 	struct ucred *cred = p->p_ucred;
1458 	struct vnode *vp;
1459 	int error, flags, t_gid, t_uid;
1460 	struct nameidata nd;
1461 
1462 	if (SCARG(uap, flags) & ~(R_OK | W_OK | X_OK))
1463 		return (EINVAL);
1464 	t_uid = cred->cr_uid;
1465 	t_gid = cred->cr_gid;
1466 	cred->cr_uid = p->p_cred->p_ruid;
1467 	cred->cr_gid = p->p_cred->p_rgid;
1468 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1469 	    SCARG(uap, path), p);
1470 	if ((error = namei(&nd)) != 0)
1471 		goto out1;
1472 	vp = nd.ni_vp;
1473 
1474 	/* Flags == 0 means only check for existence. */
1475 	if (SCARG(uap, flags)) {
1476 		flags = 0;
1477 		if (SCARG(uap, flags) & R_OK)
1478 			flags |= VREAD;
1479 		if (SCARG(uap, flags) & W_OK)
1480 			flags |= VWRITE;
1481 		if (SCARG(uap, flags) & X_OK)
1482 			flags |= VEXEC;
1483 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1484 			error = VOP_ACCESS(vp, flags, cred, p);
1485 	}
1486 	vput(vp);
1487 out1:
1488 	cred->cr_uid = t_uid;
1489 	cred->cr_gid = t_gid;
1490 	return (error);
1491 }
1492 
1493 /*
1494  * Get file status; this version follows links.
1495  */
1496 /* ARGSUSED */
1497 int
1498 sys_stat(struct proc *p, void *v, register_t *retval)
1499 {
1500 	struct sys_stat_args /* {
1501 		syscallarg(const char *) path;
1502 		syscallarg(struct stat *) ub;
1503 	} */ *uap = v;
1504 	struct stat sb;
1505 	int error;
1506 	struct nameidata nd;
1507 
1508 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1509 	    SCARG(uap, path), p);
1510 	if ((error = namei(&nd)) != 0)
1511 		return (error);
1512 	error = vn_stat(nd.ni_vp, &sb, p);
1513 	vput(nd.ni_vp);
1514 	if (error)
1515 		return (error);
1516 	/* Don't let non-root see generation numbers (for NFS security) */
1517 	if (suser(p, 0))
1518 		sb.st_gen = 0;
1519 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
1520 	return (error);
1521 }
1522 
1523 /*
1524  * Get file status; this version does not follow links.
1525  */
1526 /* ARGSUSED */
1527 int
1528 sys_lstat(struct proc *p, void *v, register_t *retval)
1529 {
1530 	struct sys_lstat_args /* {
1531 		syscallarg(const char *) path;
1532 		syscallarg(struct stat *) ub;
1533 	} */ *uap = v;
1534 	struct stat sb;
1535 	int error;
1536 	struct nameidata nd;
1537 
1538 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
1539 	    SCARG(uap, path), p);
1540 	if ((error = namei(&nd)) != 0)
1541 		return (error);
1542 	error = vn_stat(nd.ni_vp, &sb, p);
1543 	vput(nd.ni_vp);
1544 	if (error)
1545 		return (error);
1546 	/* Don't let non-root see generation numbers (for NFS security) */
1547 	if (suser(p, 0))
1548 		sb.st_gen = 0;
1549 	error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Get configurable pathname variables.
1555  */
1556 /* ARGSUSED */
1557 int
1558 sys_pathconf(struct proc *p, void *v, register_t *retval)
1559 {
1560 	struct sys_pathconf_args /* {
1561 		syscallarg(const char *) path;
1562 		syscallarg(int) name;
1563 	} */ *uap = v;
1564 	int error;
1565 	struct nameidata nd;
1566 
1567 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1568 	    SCARG(uap, path), p);
1569 	if ((error = namei(&nd)) != 0)
1570 		return (error);
1571 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1572 	vput(nd.ni_vp);
1573 	return (error);
1574 }
1575 
1576 /*
1577  * Return target name of a symbolic link.
1578  */
1579 /* ARGSUSED */
1580 int
1581 sys_readlink(struct proc *p, void *v, register_t *retval)
1582 {
1583 	struct sys_readlink_args /* {
1584 		syscallarg(const char *) path;
1585 		syscallarg(char *) buf;
1586 		syscallarg(size_t) count;
1587 	} */ *uap = v;
1588 	struct vnode *vp;
1589 	struct iovec aiov;
1590 	struct uio auio;
1591 	int error;
1592 	struct nameidata nd;
1593 
1594 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
1595 	    SCARG(uap, path), p);
1596 	if ((error = namei(&nd)) != 0)
1597 		return (error);
1598 	vp = nd.ni_vp;
1599 	if (vp->v_type != VLNK)
1600 		error = EINVAL;
1601 	else {
1602 		aiov.iov_base = SCARG(uap, buf);
1603 		aiov.iov_len = SCARG(uap, count);
1604 		auio.uio_iov = &aiov;
1605 		auio.uio_iovcnt = 1;
1606 		auio.uio_offset = 0;
1607 		auio.uio_rw = UIO_READ;
1608 		auio.uio_segflg = UIO_USERSPACE;
1609 		auio.uio_procp = p;
1610 		auio.uio_resid = SCARG(uap, count);
1611 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1612 	}
1613 	vput(vp);
1614 	*retval = SCARG(uap, count) - auio.uio_resid;
1615 	return (error);
1616 }
1617 
1618 /*
1619  * Change flags of a file given a path name.
1620  */
1621 /* ARGSUSED */
1622 int
1623 sys_chflags(struct proc *p, void *v, register_t *retval)
1624 {
1625 	struct sys_chflags_args /* {
1626 		syscallarg(const char *) path;
1627 		syscallarg(u_int) flags;
1628 	} */ *uap = v;
1629 	struct vnode *vp;
1630 	struct vattr vattr;
1631 	int error;
1632 	struct nameidata nd;
1633 
1634 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1635 	if ((error = namei(&nd)) != 0)
1636 		return (error);
1637 	vp = nd.ni_vp;
1638 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1639 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1640 		error = EROFS;
1641 	else if (SCARG(uap, flags) == VNOVAL)
1642 		error = EINVAL;
1643 	else {
1644 		if (suser(p, 0)) {
1645 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
1646 				goto out;
1647 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1648 				error = EINVAL;
1649 				goto out;
1650 			}
1651 		}
1652 		VATTR_NULL(&vattr);
1653 		vattr.va_flags = SCARG(uap, flags);
1654 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1655 	}
1656 out:
1657 	vput(vp);
1658 	return (error);
1659 }
1660 
1661 /*
1662  * Change flags of a file given a file descriptor.
1663  */
1664 /* ARGSUSED */
1665 int
1666 sys_fchflags(struct proc *p, void *v, register_t *retval)
1667 {
1668 	struct sys_fchflags_args /* {
1669 		syscallarg(int) fd;
1670 		syscallarg(u_int) flags;
1671 	} */ *uap = v;
1672 	struct vattr vattr;
1673 	struct vnode *vp;
1674 	struct file *fp;
1675 	int error;
1676 
1677 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1678 		return (error);
1679 	vp = (struct vnode *)fp->f_data;
1680 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1681 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1682 		error = EROFS;
1683 	else if (SCARG(uap, flags) == VNOVAL)
1684 		error = EINVAL;
1685 	else {
1686 		if (suser(p, 0)) {
1687 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
1688 			    != 0)
1689 				goto out;
1690 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1691 				error = EINVAL;
1692 				goto out;
1693 			}
1694 		}
1695 		VATTR_NULL(&vattr);
1696 		vattr.va_flags = SCARG(uap, flags);
1697 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1698 	}
1699 out:
1700 	VOP_UNLOCK(vp, 0, p);
1701 	FRELE(fp);
1702 	return (error);
1703 }
1704 
1705 /*
1706  * Change mode of a file given path name.
1707  */
1708 /* ARGSUSED */
1709 int
1710 sys_chmod(struct proc *p, void *v, register_t *retval)
1711 {
1712 	struct sys_chmod_args /* {
1713 		syscallarg(const char *) path;
1714 		syscallarg(mode_t) mode;
1715 	} */ *uap = v;
1716 	struct vnode *vp;
1717 	struct vattr vattr;
1718 	int error;
1719 	struct nameidata nd;
1720 
1721 	if (SCARG(uap, mode) & ~(S_IFMT | ALLPERMS))
1722 		return (EINVAL);
1723 
1724 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1725 	if ((error = namei(&nd)) != 0)
1726 		return (error);
1727 	vp = nd.ni_vp;
1728 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1729 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1730 		error = EROFS;
1731 	else {
1732 		VATTR_NULL(&vattr);
1733 		vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
1734 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1735 	}
1736 	vput(vp);
1737 	return (error);
1738 }
1739 
1740 /*
1741  * Change mode of a file given a file descriptor.
1742  */
1743 /* ARGSUSED */
1744 int
1745 sys_fchmod(struct proc *p, void *v, register_t *retval)
1746 {
1747 	struct sys_fchmod_args /* {
1748 		syscallarg(int) fd;
1749 		syscallarg(mode_t) mode;
1750 	} */ *uap = v;
1751 	struct vattr vattr;
1752 	struct vnode *vp;
1753 	struct file *fp;
1754 	int error;
1755 
1756 	if (SCARG(uap, mode) & ~(S_IFMT | ALLPERMS))
1757 		return (EINVAL);
1758 
1759 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1760 		return (error);
1761 	vp = (struct vnode *)fp->f_data;
1762 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1763 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1764 		error = EROFS;
1765 	else {
1766 		VATTR_NULL(&vattr);
1767 		vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
1768 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1769 	}
1770 	VOP_UNLOCK(vp, 0, p);
1771 	FRELE(fp);
1772 	return (error);
1773 }
1774 
1775 /*
1776  * Set ownership given a path name.
1777  */
1778 /* ARGSUSED */
1779 int
1780 sys_chown(struct proc *p, void *v, register_t *retval)
1781 {
1782 	struct sys_chown_args /* {
1783 		syscallarg(const char *) path;
1784 		syscallarg(uid_t) uid;
1785 		syscallarg(gid_t) gid;
1786 	} */ *uap = v;
1787 	struct vnode *vp;
1788 	struct vattr vattr;
1789 	int error;
1790 	struct nameidata nd;
1791 	mode_t mode;
1792 
1793 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1794 	if ((error = namei(&nd)) != 0)
1795 		return (error);
1796 	vp = nd.ni_vp;
1797 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1798 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1799 		error = EROFS;
1800 	else {
1801 		if ((SCARG(uap, uid) != -1 || SCARG(uap, gid) != -1) &&
1802 		    (suser(p, 0) || suid_clear)) {
1803 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
1804 			if (error)
1805 				goto out;
1806 			mode = vattr.va_mode & ~(VSUID | VSGID);
1807 			if (mode == vattr.va_mode)
1808 				mode = VNOVAL;
1809 		}
1810 		else
1811 			mode = VNOVAL;
1812 		VATTR_NULL(&vattr);
1813 		vattr.va_uid = SCARG(uap, uid);
1814 		vattr.va_gid = SCARG(uap, gid);
1815 		vattr.va_mode = mode;
1816 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1817 	}
1818 out:
1819 	vput(vp);
1820 	return (error);
1821 }
1822 
1823 /*
1824  * Set ownership given a path name, without following links.
1825  */
1826 /* ARGSUSED */
1827 int
1828 sys_lchown(struct proc *p, void *v, register_t *retval)
1829 {
1830 	struct sys_lchown_args /* {
1831 		syscallarg(const char *) path;
1832 		syscallarg(uid_t) uid;
1833 		syscallarg(gid_t) gid;
1834 	} */ *uap = v;
1835 	struct vnode *vp;
1836 	struct vattr vattr;
1837 	int error;
1838 	struct nameidata nd;
1839 	mode_t mode;
1840 
1841 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1842 	if ((error = namei(&nd)) != 0)
1843 		return (error);
1844 	vp = nd.ni_vp;
1845 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1846 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1847 		error = EROFS;
1848 	else {
1849 		if ((SCARG(uap, uid) != -1 || SCARG(uap, gid) != -1) &&
1850 		    (suser(p, 0) || suid_clear)) {
1851 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
1852 			if (error)
1853 				goto out;
1854 			mode = vattr.va_mode & ~(VSUID | VSGID);
1855 			if (mode == vattr.va_mode)
1856 				mode = VNOVAL;
1857 		}
1858 		else
1859 			mode = VNOVAL;
1860 		VATTR_NULL(&vattr);
1861 		vattr.va_uid = SCARG(uap, uid);
1862 		vattr.va_gid = SCARG(uap, gid);
1863 		vattr.va_mode = mode;
1864 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1865 	}
1866 out:
1867 	vput(vp);
1868 	return (error);
1869 }
1870 
1871 /*
1872  * Set ownership given a file descriptor.
1873  */
1874 /* ARGSUSED */
1875 int
1876 sys_fchown(struct proc *p, void *v, register_t *retval)
1877 {
1878 	struct sys_fchown_args /* {
1879 		syscallarg(int) fd;
1880 		syscallarg(uid_t) uid;
1881 		syscallarg(gid_t) gid;
1882 	} */ *uap = v;
1883 	struct vnode *vp;
1884 	struct vattr vattr;
1885 	int error;
1886 	struct file *fp;
1887 	mode_t mode;
1888 
1889 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1890 		return (error);
1891 	vp = (struct vnode *)fp->f_data;
1892 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1893 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1894 		error = EROFS;
1895 	else {
1896 		if ((SCARG(uap, uid) != -1 || SCARG(uap, gid) != -1) &&
1897 		    (suser(p, 0) || suid_clear)) {
1898 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
1899 			if (error)
1900 				goto out;
1901 			mode = vattr.va_mode & ~(VSUID | VSGID);
1902 			if (mode == vattr.va_mode)
1903 				mode = VNOVAL;
1904 		} else
1905 			mode = VNOVAL;
1906 		VATTR_NULL(&vattr);
1907 		vattr.va_uid = SCARG(uap, uid);
1908 		vattr.va_gid = SCARG(uap, gid);
1909 		vattr.va_mode = mode;
1910 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1911 	}
1912 out:
1913 	VOP_UNLOCK(vp, 0, p);
1914 	FRELE(fp);
1915 	return (error);
1916 }
1917 
1918 /*
1919  * Set the access and modification times given a path name.
1920  */
1921 /* ARGSUSED */
1922 int
1923 sys_utimes(struct proc *p, void *v, register_t *retval)
1924 {
1925 	struct sys_utimes_args /* {
1926 		syscallarg(const char *) path;
1927 		syscallarg(const struct timeval *) tptr;
1928 	} */ *uap = v;
1929 	struct vnode *vp;
1930 	struct timeval tv[2];
1931 	struct vattr vattr;
1932 	int error;
1933 	struct nameidata nd;
1934 
1935 	VATTR_NULL(&vattr);
1936 	if (SCARG(uap, tptr) == NULL) {
1937 		getmicrotime(&tv[0]);
1938 		tv[1] = tv[0];
1939 		vattr.va_vaflags |= VA_UTIMES_NULL;
1940 	} else {
1941 		error = copyin(SCARG(uap, tptr), tv,
1942 		    sizeof(tv));
1943 		if (error)
1944 			return (error);
1945 		/* XXX workaround timeval matching the VFS constant VNOVAL */
1946 		if (tv[0].tv_sec == VNOVAL)
1947 			tv[0].tv_sec = VNOVAL - 1;
1948 		if (tv[1].tv_sec == VNOVAL)
1949 			tv[1].tv_sec = VNOVAL - 1;
1950 	}
1951 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1952 	if ((error = namei(&nd)) != 0)
1953 		return (error);
1954 	vp = nd.ni_vp;
1955 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1956 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1957 		error = EROFS;
1958 	else {
1959 		vattr.va_atime.tv_sec = tv[0].tv_sec;
1960 		vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
1961 		vattr.va_mtime.tv_sec = tv[1].tv_sec;
1962 		vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
1963 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1964 	}
1965 	vput(vp);
1966 	return (error);
1967 }
1968 
1969 /*
1970  * Set the access and modification times given a file descriptor.
1971  */
1972 /* ARGSUSED */
1973 int
1974 sys_futimes(struct proc *p, void *v, register_t *retval)
1975 {
1976 	struct sys_futimes_args /* {
1977 		syscallarg(int) fd;
1978 		syscallarg(const struct timeval *) tptr;
1979 	} */ *uap = v;
1980 	struct vnode *vp;
1981 	struct timeval tv[2];
1982 	struct vattr vattr;
1983 	int error;
1984 	struct file *fp;
1985 
1986 	VATTR_NULL(&vattr);
1987 	if (SCARG(uap, tptr) == NULL) {
1988 		getmicrotime(&tv[0]);
1989 		tv[1] = tv[0];
1990 		vattr.va_vaflags |= VA_UTIMES_NULL;
1991 	} else {
1992 		error = copyin(SCARG(uap, tptr), tv,
1993 		    sizeof(tv));
1994 		if (error)
1995 			return (error);
1996 		/* XXX workaround timeval matching the VFS constant VNOVAL */
1997 		if (tv[0].tv_sec == VNOVAL)
1998 			tv[0].tv_sec = VNOVAL - 1;
1999 		if (tv[1].tv_sec == VNOVAL)
2000 			tv[1].tv_sec = VNOVAL - 1;
2001 	}
2002 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2003 		return (error);
2004 	vp = (struct vnode *)fp->f_data;
2005 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2006 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2007 		error = EROFS;
2008 	else {
2009 		vattr.va_atime.tv_sec = tv[0].tv_sec;
2010 		vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
2011 		vattr.va_mtime.tv_sec = tv[1].tv_sec;
2012 		vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
2013 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2014 	}
2015 	VOP_UNLOCK(vp, 0, p);
2016 	FRELE(fp);
2017 	return (error);
2018 }
2019 
2020 /*
2021  * Truncate a file given its path name.
2022  */
2023 /* ARGSUSED */
2024 int
2025 sys_truncate(struct proc *p, void *v, register_t *retval)
2026 {
2027 	struct sys_truncate_args /* {
2028 		syscallarg(const char *) path;
2029 		syscallarg(int) pad;
2030 		syscallarg(off_t) length;
2031 	} */ *uap = v;
2032 	struct vnode *vp;
2033 	struct vattr vattr;
2034 	int error;
2035 	struct nameidata nd;
2036 
2037 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2038 	if ((error = namei(&nd)) != 0)
2039 		return (error);
2040 	vp = nd.ni_vp;
2041 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2042 	if (vp->v_type == VDIR)
2043 		error = EISDIR;
2044 	else if ((error = vn_writechk(vp)) == 0 &&
2045 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2046 		VATTR_NULL(&vattr);
2047 		vattr.va_size = SCARG(uap, length);
2048 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2049 	}
2050 	vput(vp);
2051 	return (error);
2052 }
2053 
2054 /*
2055  * Truncate a file given a file descriptor.
2056  */
2057 /* ARGSUSED */
2058 int
2059 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2060 {
2061 	struct sys_ftruncate_args /* {
2062 		syscallarg(int) fd;
2063 		syscallarg(int) pad;
2064 		syscallarg(off_t) length;
2065 	} */ *uap = v;
2066 	struct vattr vattr;
2067 	struct vnode *vp;
2068 	struct file *fp;
2069 	off_t len;
2070 	int error;
2071 
2072 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2073 		return (error);
2074 	len = SCARG(uap, length);
2075 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2076 		error = EINVAL;
2077 		goto bad;
2078 	}
2079 	vp = (struct vnode *)fp->f_data;
2080 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2081 	if (vp->v_type == VDIR)
2082 		error = EISDIR;
2083 	else if ((error = vn_writechk(vp)) == 0) {
2084 		VATTR_NULL(&vattr);
2085 		vattr.va_size = len;
2086 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2087 	}
2088 	VOP_UNLOCK(vp, 0, p);
2089 bad:
2090 	FRELE(fp);
2091 	return (error);
2092 }
2093 
2094 /*
2095  * Sync an open file.
2096  */
2097 /* ARGSUSED */
2098 int
2099 sys_fsync(struct proc *p, void *v, register_t *retval)
2100 {
2101 	struct sys_fsync_args /* {
2102 		syscallarg(int) fd;
2103 	} */ *uap = v;
2104 	struct vnode *vp;
2105 	struct file *fp;
2106 	int error;
2107 
2108 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2109 		return (error);
2110 	vp = (struct vnode *)fp->f_data;
2111 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2112 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2113 #ifdef FFS_SOFTUPDATES
2114 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2115 		error = softdep_fsync(vp);
2116 #endif
2117 
2118 	VOP_UNLOCK(vp, 0, p);
2119 	FRELE(fp);
2120 	return (error);
2121 }
2122 
2123 /*
2124  * Rename files.  Source and destination must either both be directories,
2125  * or both not be directories.  If target is a directory, it must be empty.
2126  */
2127 /* ARGSUSED */
2128 int
2129 sys_rename(struct proc *p, void *v, register_t *retval)
2130 {
2131 	struct sys_rename_args /* {
2132 		syscallarg(const char *) from;
2133 		syscallarg(const char *) to;
2134 	} */ *uap = v;
2135 	struct vnode *tvp, *fvp, *tdvp;
2136 	struct nameidata fromnd, tond;
2137 	int error;
2138 	int flags;
2139 
2140 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2141 	    SCARG(uap, from), p);
2142 	if ((error = namei(&fromnd)) != 0)
2143 		return (error);
2144 	fvp = fromnd.ni_vp;
2145 
2146 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2147 	/*
2148 	 * rename("foo/", "bar/");  is  OK
2149 	 */
2150 	if (fvp->v_type == VDIR)
2151 		flags |= STRIPSLASHES;
2152 
2153 	NDINIT(&tond, RENAME, flags,
2154 	    UIO_USERSPACE, SCARG(uap, to), p);
2155 	if ((error = namei(&tond)) != 0) {
2156 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2157 		vrele(fromnd.ni_dvp);
2158 		vrele(fvp);
2159 		goto out1;
2160 	}
2161 	tdvp = tond.ni_dvp;
2162 	tvp = tond.ni_vp;
2163 	if (tvp != NULL) {
2164 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2165 			error = ENOTDIR;
2166 			goto out;
2167 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2168 			error = EISDIR;
2169 			goto out;
2170 		}
2171 	}
2172 	if (fvp == tdvp)
2173 		error = EINVAL;
2174 	/*
2175 	 * If source is the same as the destination (that is the
2176 	 * same inode number)
2177 	 */
2178 	if (fvp == tvp)
2179 		error = -1;
2180 out:
2181 	if (!error) {
2182 		if (tvp) {
2183 			(void)uvm_vnp_uncache(tvp);
2184 		}
2185 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2186 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2187 	} else {
2188 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2189 		if (tdvp == tvp)
2190 			vrele(tdvp);
2191 		else
2192 			vput(tdvp);
2193 		if (tvp)
2194 			vput(tvp);
2195 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2196 		vrele(fromnd.ni_dvp);
2197 		vrele(fvp);
2198 	}
2199 	vrele(tond.ni_startdir);
2200 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2201 out1:
2202 	if (fromnd.ni_startdir)
2203 		vrele(fromnd.ni_startdir);
2204 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2205 	if (error == -1)
2206 		return (0);
2207 	return (error);
2208 }
2209 
2210 /*
2211  * Make a directory file.
2212  */
2213 /* ARGSUSED */
2214 int
2215 sys_mkdir(struct proc *p, void *v, register_t *retval)
2216 {
2217 	struct sys_mkdir_args /* {
2218 		syscallarg(const char *) path;
2219 		syscallarg(mode_t) mode;
2220 	} */ *uap = v;
2221 	struct vnode *vp;
2222 	struct vattr vattr;
2223 	int error;
2224 	struct nameidata nd;
2225 
2226 	NDINIT(&nd, CREATE, LOCKPARENT | STRIPSLASHES,
2227 	    UIO_USERSPACE, SCARG(uap, path), p);
2228 	if ((error = namei(&nd)) != 0)
2229 		return (error);
2230 	vp = nd.ni_vp;
2231 	if (vp != NULL) {
2232 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2233 		if (nd.ni_dvp == vp)
2234 			vrele(nd.ni_dvp);
2235 		else
2236 			vput(nd.ni_dvp);
2237 		vrele(vp);
2238 		return (EEXIST);
2239 	}
2240 	VATTR_NULL(&vattr);
2241 	vattr.va_type = VDIR;
2242 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2243 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2244 	if (!error)
2245 		vput(nd.ni_vp);
2246 	return (error);
2247 }
2248 
2249 /*
2250  * Remove a directory file.
2251  */
2252 /* ARGSUSED */
2253 int
2254 sys_rmdir(struct proc *p, void *v, register_t *retval)
2255 {
2256 	struct sys_rmdir_args /* {
2257 		syscallarg(const char *) path;
2258 	} */ *uap = v;
2259 	struct vnode *vp;
2260 	int error;
2261 	struct nameidata nd;
2262 
2263 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2264 	    SCARG(uap, path), p);
2265 	if ((error = namei(&nd)) != 0)
2266 		return (error);
2267 	vp = nd.ni_vp;
2268 	if (vp->v_type != VDIR) {
2269 		error = ENOTDIR;
2270 		goto out;
2271 	}
2272 	/*
2273 	 * No rmdir "." please.
2274 	 */
2275 	if (nd.ni_dvp == vp) {
2276 		error = EBUSY;
2277 		goto out;
2278 	}
2279 	/*
2280 	 * The root of a mounted filesystem cannot be deleted.
2281 	 */
2282 	if (vp->v_flag & VROOT)
2283 		error = EBUSY;
2284 out:
2285 	if (!error) {
2286 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2287 	} else {
2288 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2289 		if (nd.ni_dvp == vp)
2290 			vrele(nd.ni_dvp);
2291 		else
2292 			vput(nd.ni_dvp);
2293 		vput(vp);
2294 	}
2295 	return (error);
2296 }
2297 
2298 /*
2299  * Read a block of directory entries in a file system independent format.
2300  */
2301 int
2302 sys_getdirentries(struct proc *p, void *v, register_t *retval)
2303 {
2304 	struct sys_getdirentries_args /* {
2305 		syscallarg(int) fd;
2306 		syscallarg(char *) buf;
2307 		syscallarg(int) count;
2308 		syscallarg(long *) basep;
2309 	} */ *uap = v;
2310 	struct vnode *vp;
2311 	struct file *fp;
2312 	struct uio auio;
2313 	struct iovec aiov;
2314 	long loff;
2315 	int error, eofflag;
2316 
2317 	if (SCARG(uap, count) < 0)
2318 		return EINVAL;
2319 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2320 		return (error);
2321 	if ((fp->f_flag & FREAD) == 0) {
2322 		error = EBADF;
2323 		goto bad;
2324 	}
2325 	vp = (struct vnode *)fp->f_data;
2326 	if (vp->v_type != VDIR) {
2327 		error = EINVAL;
2328 		goto bad;
2329 	}
2330 	aiov.iov_base = SCARG(uap, buf);
2331 	aiov.iov_len = SCARG(uap, count);
2332 	auio.uio_iov = &aiov;
2333 	auio.uio_iovcnt = 1;
2334 	auio.uio_rw = UIO_READ;
2335 	auio.uio_segflg = UIO_USERSPACE;
2336 	auio.uio_procp = p;
2337 	auio.uio_resid = SCARG(uap, count);
2338 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2339 	loff = auio.uio_offset = fp->f_offset;
2340 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0);
2341 	fp->f_offset = auio.uio_offset;
2342 	VOP_UNLOCK(vp, 0, p);
2343 	if (error)
2344 		goto bad;
2345 	error = copyout(&loff, SCARG(uap, basep),
2346 	    sizeof(long));
2347 	*retval = SCARG(uap, count) - auio.uio_resid;
2348 bad:
2349 	FRELE(fp);
2350 	return (error);
2351 }
2352 
2353 /*
2354  * Set the mode mask for creation of filesystem nodes.
2355  */
2356 int
2357 sys_umask(struct proc *p, void *v, register_t *retval)
2358 {
2359 	struct sys_umask_args /* {
2360 		syscallarg(mode_t) newmask;
2361 	} */ *uap = v;
2362 	struct filedesc *fdp;
2363 
2364 	fdp = p->p_fd;
2365 	*retval = fdp->fd_cmask;
2366 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2367 	return (0);
2368 }
2369 
2370 /*
2371  * Void all references to file by ripping underlying filesystem
2372  * away from vnode.
2373  */
2374 /* ARGSUSED */
2375 int
2376 sys_revoke(struct proc *p, void *v, register_t *retval)
2377 {
2378 	struct sys_revoke_args /* {
2379 		syscallarg(const char *) path;
2380 	} */ *uap = v;
2381 	struct vnode *vp;
2382 	struct vattr vattr;
2383 	int error;
2384 	struct nameidata nd;
2385 
2386 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2387 	if ((error = namei(&nd)) != 0)
2388 		return (error);
2389 	vp = nd.ni_vp;
2390 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2391 		goto out;
2392 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2393 	    (error = suser(p, 0)))
2394 		goto out;
2395 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
2396 		VOP_REVOKE(vp, REVOKEALL);
2397 out:
2398 	vrele(vp);
2399 	return (error);
2400 }
2401 
2402 /*
2403  * Convert a user file descriptor to a kernel file entry.
2404  *
2405  * On return *fpp is FREF:ed.
2406  */
2407 int
2408 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2409 {
2410 	struct file *fp;
2411 	struct vnode *vp;
2412 
2413 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2414 		return (EBADF);
2415 
2416 	if (fp->f_type != DTYPE_VNODE)
2417 		return (EINVAL);
2418 
2419 	vp = (struct vnode *)fp->f_data;
2420 	if (vp->v_type == VBAD)
2421 		return (EBADF);
2422 
2423 	FREF(fp);
2424 	*fpp = fp;
2425 
2426 	return (0);
2427 }
2428 
2429 /*
2430  * Positional read system call.
2431  */
2432 int
2433 sys_pread(struct proc *p, void *v, register_t *retval)
2434 {
2435 	struct sys_pread_args /* {
2436 		syscallarg(int) fd;
2437 		syscallarg(void *) buf;
2438 		syscallarg(size_t) nbyte;
2439 		syscallarg(int) pad;
2440 		syscallarg(off_t) offset;
2441 	} */ *uap = v;
2442 	struct filedesc *fdp = p->p_fd;
2443 	struct file *fp;
2444 	struct vnode *vp;
2445 	off_t offset;
2446 	int fd = SCARG(uap, fd);
2447 
2448 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2449 		return (EBADF);
2450 	if ((fp->f_flag & FREAD) == 0)
2451 		return (EBADF);
2452 
2453 	vp = (struct vnode *)fp->f_data;
2454 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2455 		return (ESPIPE);
2456 	}
2457 
2458 	offset = SCARG(uap, offset);
2459 
2460 	FREF(fp);
2461 
2462 	/* dofileread() will FRELE the descriptor for us */
2463 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2464 	    &offset, retval));
2465 }
2466 
2467 /*
2468  * Positional scatter read system call.
2469  */
2470 int
2471 sys_preadv(struct proc *p, void *v, register_t *retval)
2472 {
2473 	struct sys_preadv_args /* {
2474 		syscallarg(int) fd;
2475 		syscallarg(const struct iovec *) iovp;
2476 		syscallarg(int) iovcnt;
2477 		syscallarg(int) pad;
2478 		syscallarg(off_t) offset;
2479 	} */ *uap = v;
2480 	struct filedesc *fdp = p->p_fd;
2481 	struct file *fp;
2482 	struct vnode *vp;
2483 	off_t offset;
2484 	int fd = SCARG(uap, fd);
2485 
2486 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2487 		return (EBADF);
2488 	if ((fp->f_flag & FREAD) == 0)
2489 		return (EBADF);
2490 
2491 	vp = (struct vnode *)fp->f_data;
2492 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2493 		return (ESPIPE);
2494 	}
2495 
2496 	FREF(fp);
2497 
2498 	offset = SCARG(uap, offset);
2499 
2500 	/* dofilereadv() will FRELE the descriptor for us */
2501 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2502 	    &offset, retval));
2503 }
2504 
2505 /*
2506  * Positional write system call.
2507  */
2508 int
2509 sys_pwrite(struct proc *p, void *v, register_t *retval)
2510 {
2511 	struct sys_pwrite_args /* {
2512 		syscallarg(int) fd;
2513 		syscallarg(const void *) buf;
2514 		syscallarg(size_t) nbyte;
2515 		syscallarg(int) pad;
2516 		syscallarg(off_t) offset;
2517 	} */ *uap = v;
2518 	struct filedesc *fdp = p->p_fd;
2519 	struct file *fp;
2520 	struct vnode *vp;
2521 	off_t offset;
2522 	int fd = SCARG(uap, fd);
2523 
2524 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2525 		return (EBADF);
2526 	if ((fp->f_flag & FWRITE) == 0)
2527 		return (EBADF);
2528 
2529 	vp = (struct vnode *)fp->f_data;
2530 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2531 		return (ESPIPE);
2532 	}
2533 
2534 	FREF(fp);
2535 
2536 	offset = SCARG(uap, offset);
2537 
2538 	/* dofilewrite() will FRELE the descriptor for us */
2539 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2540 	    &offset, retval));
2541 }
2542 
2543 /*
2544  * Positional gather write system call.
2545  */
2546 int
2547 sys_pwritev(struct proc *p, void *v, register_t *retval)
2548 {
2549 	struct sys_pwritev_args /* {
2550 		syscallarg(int) fd;
2551 		syscallarg(const struct iovec *) iovp;
2552 		syscallarg(int) iovcnt;
2553 		syscallarg(int) pad;
2554 		syscallarg(off_t) offset;
2555 	} */ *uap = v;
2556 	struct filedesc *fdp = p->p_fd;
2557 	struct file *fp;
2558 	struct vnode *vp;
2559 	off_t offset;
2560 	int fd = SCARG(uap, fd);
2561 
2562 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2563 		return (EBADF);
2564 	if ((fp->f_flag & FWRITE) == 0)
2565 		return (EBADF);
2566 
2567 	vp = (struct vnode *)fp->f_data;
2568 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2569 		return (ESPIPE);
2570 	}
2571 
2572 	FREF(fp);
2573 
2574 	offset = SCARG(uap, offset);
2575 
2576 	/* dofilewritev() will FRELE the descriptor for us */
2577 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2578 	    &offset, retval));
2579 }
2580