xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.187 2012/07/11 23:07:19 guenther Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/uio.h>
51 #include <sys/malloc.h>
52 #include <sys/pool.h>
53 #include <sys/dirent.h>
54 #include <sys/dkio.h>
55 #include <sys/disklabel.h>
56 #include <sys/ktrace.h>
57 
58 #include <sys/syscallargs.h>
59 
60 #include <uvm/uvm_extern.h>
61 #include <sys/sysctl.h>
62 
63 extern int suid_clear;
64 int	usermount = 0;		/* sysctl: by default, users may not mount */
65 
66 static int change_dir(struct nameidata *, struct proc *);
67 
68 void checkdirs(struct vnode *);
69 
70 int copyout_statfs(struct statfs *, void *, struct proc *);
71 
72 int getdirentries_internal(struct proc *, int, char *, int, off_t *,
73     register_t *);
74 
75 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
76 int domknodat(struct proc *, int, const char *, mode_t, dev_t, register_t *);
77 int domkfifoat(struct proc *, int, const char *, mode_t, register_t *);
78 int dolinkat(struct proc *, int, const char *, int, const char *, int,
79     register_t *);
80 int dosymlinkat(struct proc *, const char *, int, const char *, register_t *);
81 int dounlinkat(struct proc *, int, const char *, int, register_t *);
82 int dofaccessat(struct proc *, int, const char *, int, int, register_t *);
83 int dofstatat(struct proc *, int, const char *, struct stat *, int,
84     register_t *);
85 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
86     register_t *);
87 int dofchmodat(struct proc *, int, const char *, mode_t, int, register_t *);
88 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int,
89     register_t *);
90 int dorenameat(struct proc *, int, const char *, int, const char *,
91     register_t *);
92 int domkdirat(struct proc *, int, const char *, mode_t, register_t *);
93 int doutimensat(struct proc *, int, const char *, struct timespec [2],
94     int, register_t *);
95 int dovutimens(struct proc *, struct vnode *, struct timespec [2],
96     register_t *);
97 int dofutimens(struct proc *, int, struct timespec [2], register_t *);
98 
99 /*
100  * Virtual File System System Calls
101  */
102 
103 /*
104  * Mount a file system.
105  */
106 /* ARGSUSED */
107 int
108 sys_mount(struct proc *p, void *v, register_t *retval)
109 {
110 	struct sys_mount_args /* {
111 		syscallarg(const char *) type;
112 		syscallarg(const char *) path;
113 		syscallarg(int) flags;
114 		syscallarg(void *) data;
115 	} */ *uap = v;
116 	struct vnode *vp;
117 	struct mount *mp;
118 	int error, mntflag = 0;
119 	char fstypename[MFSNAMELEN];
120 	char fspath[MNAMELEN];
121 	struct vattr va;
122 	struct nameidata nd;
123 	struct vfsconf *vfsp;
124 	int flags = SCARG(uap, flags);
125 
126 	if (usermount == 0 && (error = suser(p, 0)))
127 		return (error);
128 
129 	/*
130 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
131 	 */
132 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
133 	if (error)
134 		return(error);
135 
136 	/*
137 	 * Get vnode to be covered
138 	 */
139 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
140 	if ((error = namei(&nd)) != 0)
141 		return (error);
142 	vp = nd.ni_vp;
143 	if (flags & MNT_UPDATE) {
144 		if ((vp->v_flag & VROOT) == 0) {
145 			vput(vp);
146 			return (EINVAL);
147 		}
148 		mp = vp->v_mount;
149 		vfsp = mp->mnt_vfc;
150 		mntflag = mp->mnt_flag;
151 		/*
152 		 * We only allow the filesystem to be reloaded if it
153 		 * is currently mounted read-only.
154 		 */
155 		if ((flags & MNT_RELOAD) &&
156 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
157 			vput(vp);
158 			return (EOPNOTSUPP);	/* Needs translation */
159 		}
160 
161 		/*
162 		 * Only root, or the user that did the original mount is
163 		 * permitted to update it.
164 		 */
165 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
166 		    (error = suser(p, 0))) {
167 			vput(vp);
168 			return (error);
169 		}
170 		/*
171 		 * Do not allow NFS export by non-root users. Silently
172 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and
173 		 * inherit MNT_NOEXEC from the mount point.
174 		 */
175 		if (suser(p, 0) != 0) {
176 			if (flags & MNT_EXPORTED) {
177 				vput(vp);
178 				return (EPERM);
179 			}
180 			flags |= MNT_NOSUID | MNT_NODEV;
181 			if (mntflag & MNT_NOEXEC)
182 				flags |= MNT_NOEXEC;
183 		}
184 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
185 			vput(vp);
186 			return (error);
187 		}
188 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
189 		goto update;
190 	}
191 	/*
192 	 * If the user is not root, ensure that they own the directory
193 	 * onto which we are attempting to mount.
194 	 */
195 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
196 	    (va.va_uid != p->p_ucred->cr_uid &&
197 	    (error = suser(p, 0)))) {
198 		vput(vp);
199 		return (error);
200 	}
201 	/*
202 	 * Do not allow NFS export by non-root users. Silently
203 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and inherit
204 	 * MNT_NOEXEC from the mount point.
205 	 */
206 	if (suser(p, 0) != 0) {
207 		if (flags & MNT_EXPORTED) {
208 			vput(vp);
209 			return (EPERM);
210 		}
211 		flags |= MNT_NOSUID | MNT_NODEV;
212 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
213 			flags |= MNT_NOEXEC;
214 	}
215 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
216 		vput(vp);
217 		return (error);
218 	}
219 	if (vp->v_type != VDIR) {
220 		vput(vp);
221 		return (ENOTDIR);
222 	}
223 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
224 	if (error) {
225 		vput(vp);
226 		return (error);
227 	}
228 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
229 		if (!strcmp(vfsp->vfc_name, fstypename))
230 			break;
231 	}
232 
233 	if (vfsp == NULL) {
234 		vput(vp);
235 		return (EOPNOTSUPP);
236 	}
237 
238 	if (vp->v_mountedhere != NULL) {
239 		vput(vp);
240 		return (EBUSY);
241 	}
242 
243 	/*
244 	 * Allocate and initialize the file system.
245 	 */
246 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
247 		M_MOUNT, M_WAITOK|M_ZERO);
248 	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
249 	mp->mnt_op = vfsp->vfc_vfsops;
250 	mp->mnt_vfc = vfsp;
251 	mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
252 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
253 	mp->mnt_vnodecovered = vp;
254 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
255 update:
256 	/*
257 	 * Set the mount level flags.
258 	 */
259 	if (flags & MNT_RDONLY)
260 		mp->mnt_flag |= MNT_RDONLY;
261 	else if (mp->mnt_flag & MNT_RDONLY)
262 		mp->mnt_flag |= MNT_WANTRDWR;
263 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
264 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
265 	    MNT_FORCE);
266 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC |
267 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
268 	    MNT_NOATIME | MNT_FORCE);
269 	/*
270 	 * Mount the filesystem.
271 	 */
272 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
273 	if (!error) {
274 		mp->mnt_stat.f_ctime = time_second;
275 	}
276 	if (mp->mnt_flag & MNT_UPDATE) {
277 		vput(vp);
278 		if (mp->mnt_flag & MNT_WANTRDWR)
279 			mp->mnt_flag &= ~MNT_RDONLY;
280 		mp->mnt_flag &=~
281 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
282 		if (error)
283 			mp->mnt_flag = mntflag;
284 
285  		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
286  			if (mp->mnt_syncer == NULL)
287  				error = vfs_allocate_syncvnode(mp);
288  		} else {
289  			if (mp->mnt_syncer != NULL)
290  				vgone(mp->mnt_syncer);
291  			mp->mnt_syncer = NULL;
292  		}
293 
294 		vfs_unbusy(mp);
295 		return (error);
296 	}
297 
298 	vp->v_mountedhere = mp;
299 
300 	/*
301 	 * Put the new filesystem on the mount list after root.
302 	 */
303 	cache_purge(vp);
304 	if (!error) {
305 		vfsp->vfc_refcount++;
306 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
307 		checkdirs(vp);
308 		VOP_UNLOCK(vp, 0, p);
309  		if ((mp->mnt_flag & MNT_RDONLY) == 0)
310  			error = vfs_allocate_syncvnode(mp);
311 		vfs_unbusy(mp);
312 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
313 		if ((error = VFS_START(mp, 0, p)) != 0)
314 			vrele(vp);
315 	} else {
316 		mp->mnt_vnodecovered->v_mountedhere = NULL;
317 		vfs_unbusy(mp);
318 		free(mp, M_MOUNT);
319 		vput(vp);
320 	}
321 	return (error);
322 }
323 
324 /*
325  * Scan all active processes to see if any of them have a current
326  * or root directory onto which the new filesystem has just been
327  * mounted. If so, replace them with the new mount point.
328  */
329 void
330 checkdirs(struct vnode *olddp)
331 {
332 	struct filedesc *fdp;
333 	struct vnode *newdp, *vp;
334 	struct proc *p;
335 
336 	if (olddp->v_usecount == 1)
337 		return;
338 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
339 		panic("mount: lost mount");
340 again:
341 	LIST_FOREACH(p, &allproc, p_list) {
342 		fdp = p->p_fd;
343 		if (fdp->fd_cdir == olddp) {
344 			vp = fdp->fd_cdir;
345 			vref(newdp);
346 			fdp->fd_cdir = newdp;
347 			if (vrele(vp))
348 				goto again;
349 		}
350 		if (fdp->fd_rdir == olddp) {
351 			vp = fdp->fd_rdir;
352 			vref(newdp);
353 			fdp->fd_rdir = newdp;
354 			if (vrele(vp))
355 				goto again;
356 		}
357 	}
358 	if (rootvnode == olddp) {
359 		vrele(rootvnode);
360 		vref(newdp);
361 		rootvnode = newdp;
362 	}
363 	vput(newdp);
364 }
365 
366 /*
367  * Unmount a file system.
368  *
369  * Note: unmount takes a path to the vnode mounted on as argument,
370  * not special file (as before).
371  */
372 /* ARGSUSED */
373 int
374 sys_unmount(struct proc *p, void *v, register_t *retval)
375 {
376 	struct sys_unmount_args /* {
377 		syscallarg(const char *) path;
378 		syscallarg(int) flags;
379 	} */ *uap = v;
380 	struct vnode *vp;
381 	struct mount *mp;
382 	int error;
383 	struct nameidata nd;
384 
385 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
386 	    SCARG(uap, path), p);
387 	if ((error = namei(&nd)) != 0)
388 		return (error);
389 	vp = nd.ni_vp;
390 	mp = vp->v_mount;
391 
392 	/*
393 	 * Only root, or the user that did the original mount is
394 	 * permitted to unmount this filesystem.
395 	 */
396 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
397 	    (error = suser(p, 0))) {
398 		vput(vp);
399 		return (error);
400 	}
401 
402 	/*
403 	 * Don't allow unmounting the root file system.
404 	 */
405 	if (mp->mnt_flag & MNT_ROOTFS) {
406 		vput(vp);
407 		return (EINVAL);
408 	}
409 
410 	/*
411 	 * Must be the root of the filesystem
412 	 */
413 	if ((vp->v_flag & VROOT) == 0) {
414 		vput(vp);
415 		return (EINVAL);
416 	}
417 	vput(vp);
418 
419 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
420 		return (EBUSY);
421 
422 	return (dounmount(mp, SCARG(uap, flags), p, vp));
423 }
424 
425 /*
426  * Do the actual file system unmount.
427  */
428 int
429 dounmount(struct mount *mp, int flags, struct proc *p, struct vnode *olddp)
430 {
431 	struct vnode *coveredvp;
432 	int error;
433 	int hadsyncer = 0;
434 
435  	mp->mnt_flag &=~ MNT_ASYNC;
436  	cache_purgevfs(mp);	/* remove cache entries for this file sys */
437  	if (mp->mnt_syncer != NULL) {
438 		hadsyncer = 1;
439  		vgone(mp->mnt_syncer);
440 		mp->mnt_syncer = NULL;
441 	}
442 	if (((mp->mnt_flag & MNT_RDONLY) ||
443 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
444  	    (flags & MNT_FORCE))
445  		error = VFS_UNMOUNT(mp, flags, p);
446 
447  	if (error && error != EIO && !(flags & MNT_DOOMED)) {
448  		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
449  			(void) vfs_allocate_syncvnode(mp);
450 		vfs_unbusy(mp);
451 		return (error);
452 	}
453 
454 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
455 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
456 		coveredvp->v_mountedhere = NULL;
457  		vrele(coveredvp);
458  	}
459 
460 	mp->mnt_vfc->vfc_refcount--;
461 
462 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
463 		panic("unmount: dangling vnode");
464 
465 	vfs_unbusy(mp);
466 	free(mp, M_MOUNT);
467 
468 	return (0);
469 }
470 
471 /*
472  * Sync each mounted filesystem.
473  */
474 #ifdef DEBUG
475 int syncprt = 0;
476 struct ctldebug debug0 = { "syncprt", &syncprt };
477 #endif
478 
479 /* ARGSUSED */
480 int
481 sys_sync(struct proc *p, void *v, register_t *retval)
482 {
483 	struct mount *mp, *nmp;
484 	int asyncflag;
485 
486 	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
487 	    mp = nmp) {
488 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
489 			nmp = CIRCLEQ_PREV(mp, mnt_list);
490 			continue;
491 		}
492 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
493 			asyncflag = mp->mnt_flag & MNT_ASYNC;
494 			mp->mnt_flag &= ~MNT_ASYNC;
495 			uvm_vnp_sync(mp);
496 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
497 			if (asyncflag)
498 				mp->mnt_flag |= MNT_ASYNC;
499 		}
500 		nmp = CIRCLEQ_PREV(mp, mnt_list);
501 		vfs_unbusy(mp);
502 	}
503 
504 	return (0);
505 }
506 
507 /*
508  * Change filesystem quotas.
509  */
510 /* ARGSUSED */
511 int
512 sys_quotactl(struct proc *p, void *v, register_t *retval)
513 {
514 	struct sys_quotactl_args /* {
515 		syscallarg(const char *) path;
516 		syscallarg(int) cmd;
517 		syscallarg(int) uid;
518 		syscallarg(char *) arg;
519 	} */ *uap = v;
520 	struct mount *mp;
521 	int error;
522 	struct nameidata nd;
523 
524 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
525 	if ((error = namei(&nd)) != 0)
526 		return (error);
527 	mp = nd.ni_vp->v_mount;
528 	vrele(nd.ni_vp);
529 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
530 	    SCARG(uap, arg), p));
531 }
532 
533 int
534 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
535 {
536 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
537 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
538 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
539 	char *s, *d;
540 	int error;
541 
542 	/* Don't let non-root see filesystem id (for NFS security) */
543 	if (suser(p, 0)) {
544 		fsid_t fsid;
545 
546 		s = (char *)sp;
547 		d = (char *)uaddr;
548 
549 		memset(&fsid, 0, sizeof(fsid));
550 
551 		if ((error = copyout(s, d, co_sz1)) != 0)
552 			return (error);
553 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
554 			return (error);
555 		return (copyout(s + co_off2, d + co_off2, co_sz2));
556 	}
557 
558 	return (copyout(sp, uaddr, sizeof(*sp)));
559 }
560 
561 /*
562  * Get filesystem statistics.
563  */
564 /* ARGSUSED */
565 int
566 sys_statfs(struct proc *p, void *v, register_t *retval)
567 {
568 	struct sys_statfs_args /* {
569 		syscallarg(const char *) path;
570 		syscallarg(struct statfs *) buf;
571 	} */ *uap = v;
572 	struct mount *mp;
573 	struct statfs *sp;
574 	int error;
575 	struct nameidata nd;
576 
577 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
578 	if ((error = namei(&nd)) != 0)
579 		return (error);
580 	mp = nd.ni_vp->v_mount;
581 	sp = &mp->mnt_stat;
582 	vrele(nd.ni_vp);
583 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
584 		return (error);
585 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
586 
587 	return (copyout_statfs(sp, SCARG(uap, buf), p));
588 }
589 
590 /*
591  * Get filesystem statistics.
592  */
593 /* ARGSUSED */
594 int
595 sys_fstatfs(struct proc *p, void *v, register_t *retval)
596 {
597 	struct sys_fstatfs_args /* {
598 		syscallarg(int) fd;
599 		syscallarg(struct statfs *) buf;
600 	} */ *uap = v;
601 	struct file *fp;
602 	struct mount *mp;
603 	struct statfs *sp;
604 	int error;
605 
606 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
607 		return (error);
608 	mp = ((struct vnode *)fp->f_data)->v_mount;
609 	if (!mp) {
610 		FRELE(fp, p);
611 		return (ENOENT);
612 	}
613 	sp = &mp->mnt_stat;
614 	error = VFS_STATFS(mp, sp, p);
615 	FRELE(fp, p);
616 	if (error)
617 		return (error);
618 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
619 
620 	return (copyout_statfs(sp, SCARG(uap, buf), p));
621 }
622 
623 /*
624  * Get statistics on all filesystems.
625  */
626 int
627 sys_getfsstat(struct proc *p, void *v, register_t *retval)
628 {
629 	struct sys_getfsstat_args /* {
630 		syscallarg(struct statfs *) buf;
631 		syscallarg(size_t) bufsize;
632 		syscallarg(int) flags;
633 	} */ *uap = v;
634 	struct mount *mp, *nmp;
635 	struct statfs *sp;
636 	struct statfs *sfsp;
637 	size_t count, maxcount;
638 	int error, flags = SCARG(uap, flags);
639 
640 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
641 	sfsp = SCARG(uap, buf);
642 	count = 0;
643 
644 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
645 	    mp = nmp) {
646 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
647 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
648 			continue;
649 		}
650 		if (sfsp && count < maxcount) {
651 			sp = &mp->mnt_stat;
652 
653 			/* Refresh stats unless MNT_NOWAIT is specified */
654 			if (flags != MNT_NOWAIT &&
655 			    flags != MNT_LAZY &&
656 			    (flags == MNT_WAIT ||
657 			    flags == 0) &&
658 			    (error = VFS_STATFS(mp, sp, p))) {
659 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
660 				vfs_unbusy(mp);
661  				continue;
662 			}
663 
664 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
665 #if notyet
666 			if (mp->mnt_flag & MNT_SOFTDEP)
667 				sp->f_eflags = STATFS_SOFTUPD;
668 #endif
669 			error = (copyout_statfs(sp, sfsp, p));
670 			if (error) {
671 				vfs_unbusy(mp);
672 				return (error);
673 			}
674 			sfsp++;
675 		}
676 		count++;
677 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
678 		vfs_unbusy(mp);
679 	}
680 
681 	if (sfsp && count > maxcount)
682 		*retval = maxcount;
683 	else
684 		*retval = count;
685 
686 	return (0);
687 }
688 
689 /*
690  * Change current working directory to a given file descriptor.
691  */
692 /* ARGSUSED */
693 int
694 sys_fchdir(struct proc *p, void *v, register_t *retval)
695 {
696 	struct sys_fchdir_args /* {
697 		syscallarg(int) fd;
698 	} */ *uap = v;
699 	struct filedesc *fdp = p->p_fd;
700 	struct vnode *vp, *tdp;
701 	struct mount *mp;
702 	struct file *fp;
703 	int error;
704 
705 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
706 		return (EBADF);
707 	vp = (struct vnode *)fp->f_data;
708 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR)
709 		return (ENOTDIR);
710 	vref(vp);
711 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
712 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
713 
714 	while (!error && (mp = vp->v_mountedhere) != NULL) {
715 		if (vfs_busy(mp, VB_READ|VB_WAIT))
716 			continue;
717 		error = VFS_ROOT(mp, &tdp);
718 		vfs_unbusy(mp);
719 		if (error)
720 			break;
721 		vput(vp);
722 		vp = tdp;
723 	}
724 	if (error) {
725 		vput(vp);
726 		return (error);
727 	}
728 	VOP_UNLOCK(vp, 0, p);
729 	vrele(fdp->fd_cdir);
730 	fdp->fd_cdir = vp;
731 	return (0);
732 }
733 
734 /*
735  * Change current working directory (``.'').
736  */
737 /* ARGSUSED */
738 int
739 sys_chdir(struct proc *p, void *v, register_t *retval)
740 {
741 	struct sys_chdir_args /* {
742 		syscallarg(const char *) path;
743 	} */ *uap = v;
744 	struct filedesc *fdp = p->p_fd;
745 	int error;
746 	struct nameidata nd;
747 
748 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
749 	    SCARG(uap, path), p);
750 	if ((error = change_dir(&nd, p)) != 0)
751 		return (error);
752 	vrele(fdp->fd_cdir);
753 	fdp->fd_cdir = nd.ni_vp;
754 	return (0);
755 }
756 
757 /*
758  * Change notion of root (``/'') directory.
759  */
760 /* ARGSUSED */
761 int
762 sys_chroot(struct proc *p, void *v, register_t *retval)
763 {
764 	struct sys_chroot_args /* {
765 		syscallarg(const char *) path;
766 	} */ *uap = v;
767 	struct filedesc *fdp = p->p_fd;
768 	int error;
769 	struct nameidata nd;
770 
771 	if ((error = suser(p, 0)) != 0)
772 		return (error);
773 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
774 	    SCARG(uap, path), p);
775 	if ((error = change_dir(&nd, p)) != 0)
776 		return (error);
777 	if (fdp->fd_rdir != NULL) {
778 		/*
779 		 * A chroot() done inside a changed root environment does
780 		 * an automatic chdir to avoid the out-of-tree experience.
781 		 */
782 		vrele(fdp->fd_rdir);
783 		vrele(fdp->fd_cdir);
784 		vref(nd.ni_vp);
785 		fdp->fd_cdir = nd.ni_vp;
786 	}
787 	fdp->fd_rdir = nd.ni_vp;
788 	return (0);
789 }
790 
791 /*
792  * Common routine for chroot and chdir.
793  */
794 static int
795 change_dir(struct nameidata *ndp, struct proc *p)
796 {
797 	struct vnode *vp;
798 	int error;
799 
800 	if ((error = namei(ndp)) != 0)
801 		return (error);
802 	vp = ndp->ni_vp;
803 	if (vp->v_type != VDIR)
804 		error = ENOTDIR;
805 	else
806 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
807 	if (error)
808 		vput(vp);
809 	else
810 		VOP_UNLOCK(vp, 0, p);
811 	return (error);
812 }
813 
814 /*
815  * Check permissions, allocate an open file structure,
816  * and call the device open routine if any.
817  */
818 int
819 sys_open(struct proc *p, void *v, register_t *retval)
820 {
821 	struct sys_open_args /* {
822 		syscallarg(const char *) path;
823 		syscallarg(int) flags;
824 		syscallarg(mode_t) mode;
825 	} */ *uap = v;
826 
827 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
828 	    SCARG(uap, mode), retval));
829 }
830 
831 int
832 sys_openat(struct proc *p, void *v, register_t *retval)
833 {
834 	struct sys_openat_args /* {
835 		syscallarg(int) fd;
836 		syscallarg(const char *) path;
837 		syscallarg(int) flags;
838 		syscallarg(mode_t) mode;
839 	} */ *uap = v;
840 
841 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
842 	    SCARG(uap, flags), SCARG(uap, mode), retval));
843 }
844 
845 int
846 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
847     register_t *retval)
848 {
849 	struct filedesc *fdp = p->p_fd;
850 	struct file *fp;
851 	struct vnode *vp;
852 	struct vattr vattr;
853 	int flags, cmode;
854 	int type, indx, error, localtrunc = 0;
855 	struct flock lf;
856 	struct nameidata nd;
857 
858 	fdplock(fdp);
859 
860 	if ((error = falloc(p, &fp, &indx)) != 0)
861 		goto out;
862 	flags = FFLAGS(oflags);
863 	if (flags & O_CLOEXEC)
864 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
865 
866 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
867 	NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
868 	p->p_dupfd = -1;			/* XXX check for fdopen */
869 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
870 		localtrunc = 1;
871 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
872 	}
873 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
874 		if (error == ENODEV &&
875 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
876 		    (error =
877 			dupfdopen(fdp, indx, p->p_dupfd, flags)) == 0) {
878 			closef(fp, p);
879 			*retval = indx;
880 			goto out;
881 		}
882 		if (error == ERESTART)
883 			error = EINTR;
884 		fdremove(fdp, indx);
885 		closef(fp, p);
886 		goto out;
887 	}
888 	p->p_dupfd = 0;
889 	vp = nd.ni_vp;
890 	fp->f_flag = flags & FMASK;
891 	fp->f_type = DTYPE_VNODE;
892 	fp->f_ops = &vnops;
893 	fp->f_data = vp;
894 	if (flags & (O_EXLOCK | O_SHLOCK)) {
895 		lf.l_whence = SEEK_SET;
896 		lf.l_start = 0;
897 		lf.l_len = 0;
898 		if (flags & O_EXLOCK)
899 			lf.l_type = F_WRLCK;
900 		else
901 			lf.l_type = F_RDLCK;
902 		type = F_FLOCK;
903 		if ((flags & FNONBLOCK) == 0)
904 			type |= F_WAIT;
905 		VOP_UNLOCK(vp, 0, p);
906 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
907 		if (error) {
908 			/* closef will vn_close the file for us. */
909 			fdremove(fdp, indx);
910 			closef(fp, p);
911 			goto out;
912 		}
913 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
914 		fp->f_flag |= FHASLOCK;
915 	}
916 	if (localtrunc) {
917 		if ((fp->f_flag & FWRITE) == 0)
918 			error = EACCES;
919 		else if (vp->v_mount->mnt_flag & MNT_RDONLY)
920 			error = EROFS;
921 		else if (vp->v_type == VDIR)
922 			error = EISDIR;
923 		else if ((error = vn_writechk(vp)) == 0) {
924 			VATTR_NULL(&vattr);
925 			vattr.va_size = 0;
926 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
927 		}
928 		if (error) {
929 			VOP_UNLOCK(vp, 0, p);
930 			/* closef will close the file for us. */
931 			fdremove(fdp, indx);
932 			closef(fp, p);
933 			goto out;
934 		}
935 	}
936 	VOP_UNLOCK(vp, 0, p);
937 	*retval = indx;
938 	FILE_SET_MATURE(fp, p);
939 out:
940 	fdpunlock(fdp);
941 	return (error);
942 }
943 
944 /*
945  * Get file handle system call
946  */
947 int
948 sys_getfh(struct proc *p, void *v, register_t *retval)
949 {
950 	struct sys_getfh_args /* {
951 		syscallarg(const char *) fname;
952 		syscallarg(fhandle_t *) fhp;
953 	} */ *uap = v;
954 	struct vnode *vp;
955 	fhandle_t fh;
956 	int error;
957 	struct nameidata nd;
958 
959 	/*
960 	 * Must be super user
961 	 */
962 	error = suser(p, 0);
963 	if (error)
964 		return (error);
965 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
966 	    SCARG(uap, fname), p);
967 	error = namei(&nd);
968 	if (error)
969 		return (error);
970 	vp = nd.ni_vp;
971 	bzero(&fh, sizeof(fh));
972 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
973 	error = VFS_VPTOFH(vp, &fh.fh_fid);
974 	vput(vp);
975 	if (error)
976 		return (error);
977 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
978 	return (error);
979 }
980 
981 /*
982  * Open a file given a file handle.
983  *
984  * Check permissions, allocate an open file structure,
985  * and call the device open routine if any.
986  */
987 int
988 sys_fhopen(struct proc *p, void *v, register_t *retval)
989 {
990 	struct sys_fhopen_args /* {
991 		syscallarg(const fhandle_t *) fhp;
992 		syscallarg(int) flags;
993 	} */ *uap = v;
994 	struct filedesc *fdp = p->p_fd;
995 	struct file *fp;
996 	struct vnode *vp = NULL;
997 	struct mount *mp;
998 	struct ucred *cred = p->p_ucred;
999 	int flags;
1000 	int type, indx, error=0;
1001 	struct flock lf;
1002 	struct vattr va;
1003 	fhandle_t fh;
1004 
1005 	/*
1006 	 * Must be super user
1007 	 */
1008 	if ((error = suser(p, 0)))
1009 		return (error);
1010 
1011 	flags = FFLAGS(SCARG(uap, flags));
1012 	if ((flags & (FREAD | FWRITE)) == 0)
1013 		return (EINVAL);
1014 	if ((flags & O_CREAT))
1015 		return (EINVAL);
1016 
1017 	fdplock(fdp);
1018 	if ((error = falloc(p, &fp, &indx)) != 0) {
1019 		fp = NULL;
1020 		goto bad;
1021 	}
1022 	if (flags & O_CLOEXEC)
1023 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
1024 
1025 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1026 		goto bad;
1027 
1028 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1029 		error = ESTALE;
1030 		goto bad;
1031 	}
1032 
1033 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1034 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1035 		goto bad;
1036 	}
1037 
1038 	/* Now do an effective vn_open */
1039 
1040 	if (vp->v_type == VSOCK) {
1041 		error = EOPNOTSUPP;
1042 		goto bad;
1043 	}
1044 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1045 		error = ENOTDIR;
1046 		goto bad;
1047 	}
1048 	if (flags & FREAD) {
1049 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1050 			goto bad;
1051 	}
1052 	if (flags & (FWRITE | O_TRUNC)) {
1053 		if (vp->v_type == VDIR) {
1054 			error = EISDIR;
1055 			goto bad;
1056 		}
1057 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1058 		    (error = vn_writechk(vp)) != 0)
1059 			goto bad;
1060 	}
1061 	if (flags & O_TRUNC) {
1062 		VATTR_NULL(&va);
1063 		va.va_size = 0;
1064 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1065 			goto bad;
1066 	}
1067 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1068 		goto bad;
1069 	if (flags & FWRITE)
1070 		vp->v_writecount++;
1071 
1072 	/* done with modified vn_open, now finish what sys_open does. */
1073 
1074 	fp->f_flag = flags & FMASK;
1075 	fp->f_type = DTYPE_VNODE;
1076 	fp->f_ops = &vnops;
1077 	fp->f_data = vp;
1078 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1079 		lf.l_whence = SEEK_SET;
1080 		lf.l_start = 0;
1081 		lf.l_len = 0;
1082 		if (flags & O_EXLOCK)
1083 			lf.l_type = F_WRLCK;
1084 		else
1085 			lf.l_type = F_RDLCK;
1086 		type = F_FLOCK;
1087 		if ((flags & FNONBLOCK) == 0)
1088 			type |= F_WAIT;
1089 		VOP_UNLOCK(vp, 0, p);
1090 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1091 		if (error) {
1092 			vp = NULL;	/* closef will vn_close the file */
1093 			goto bad;
1094 		}
1095 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1096 		fp->f_flag |= FHASLOCK;
1097 	}
1098 	VOP_UNLOCK(vp, 0, p);
1099 	*retval = indx;
1100 	FILE_SET_MATURE(fp, p);
1101 
1102 	fdpunlock(fdp);
1103 	return (0);
1104 
1105 bad:
1106 	if (fp) {
1107 		fdremove(fdp, indx);
1108 		closef(fp, p);
1109 		if (vp != NULL)
1110 			vput(vp);
1111 	}
1112 	fdpunlock(fdp);
1113 	return (error);
1114 }
1115 
1116 /* ARGSUSED */
1117 int
1118 sys_fhstat(struct proc *p, void *v, register_t *retval)
1119 {
1120 	struct sys_fhstat_args /* {
1121 		syscallarg(const fhandle_t *) fhp;
1122 		syscallarg(struct stat *) sb;
1123 	} */ *uap = v;
1124 	struct stat sb;
1125 	int error;
1126 	fhandle_t fh;
1127 	struct mount *mp;
1128 	struct vnode *vp;
1129 
1130 	/*
1131 	 * Must be super user
1132 	 */
1133 	if ((error = suser(p, 0)))
1134 		return (error);
1135 
1136 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1137 		return (error);
1138 
1139 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1140 		return (ESTALE);
1141 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1142 		return (error);
1143 	error = vn_stat(vp, &sb, p);
1144 	vput(vp);
1145 	if (error)
1146 		return (error);
1147 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1148 	return (error);
1149 }
1150 
1151 /* ARGSUSED */
1152 int
1153 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1154 {
1155 	struct sys_fhstatfs_args /* {
1156 		syscallarg(const fhandle_t *) fhp;
1157 		syscallarg(struct statfs *) buf;
1158 	} */ *uap = v;
1159 	struct statfs *sp;
1160 	fhandle_t fh;
1161 	struct mount *mp;
1162 	struct vnode *vp;
1163 	int error;
1164 
1165 	/*
1166 	 * Must be super user
1167 	 */
1168 	if ((error = suser(p, 0)))
1169 		return (error);
1170 
1171 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1172 		return (error);
1173 
1174 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1175 		return (ESTALE);
1176 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1177 		return (error);
1178 	mp = vp->v_mount;
1179 	sp = &mp->mnt_stat;
1180 	vput(vp);
1181 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1182 		return (error);
1183 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1184 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1185 }
1186 
1187 /*
1188  * Create a special file.
1189  */
1190 /* ARGSUSED */
1191 int
1192 sys_mknod(struct proc *p, void *v, register_t *retval)
1193 {
1194 	struct sys_mknod_args /* {
1195 		syscallarg(const char *) path;
1196 		syscallarg(mode_t) mode;
1197 		syscallarg(int) dev;
1198 	} */ *uap = v;
1199 
1200 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1201 	    SCARG(uap, dev), retval));
1202 }
1203 
1204 int
1205 sys_mknodat(struct proc *p, void *v, register_t *retval)
1206 {
1207 	struct sys_mknodat_args /* {
1208 		syscallarg(int) fd;
1209 		syscallarg(const char *) path;
1210 		syscallarg(mode_t) mode;
1211 		syscallarg(dev_t) dev;
1212 	} */ *uap = v;
1213 
1214 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1215 	    SCARG(uap, mode), SCARG(uap, dev), retval));
1216 }
1217 
1218 int
1219 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev,
1220     register_t *retval)
1221 {
1222 	struct vnode *vp;
1223 	struct vattr vattr;
1224 	int error;
1225 	struct nameidata nd;
1226 
1227 	if ((error = suser(p, 0)) != 0)
1228 		return (error);
1229 	if (p->p_fd->fd_rdir)
1230 		return (EINVAL);
1231 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1232 	if ((error = namei(&nd)) != 0)
1233 		return (error);
1234 	vp = nd.ni_vp;
1235 	if (vp != NULL)
1236 		error = EEXIST;
1237 	else {
1238 		VATTR_NULL(&vattr);
1239 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1240 		vattr.va_rdev = dev;
1241 
1242 		switch (mode & S_IFMT) {
1243 		case S_IFMT:	/* used by badsect to flag bad sectors */
1244 			vattr.va_type = VBAD;
1245 			break;
1246 		case S_IFCHR:
1247 			vattr.va_type = VCHR;
1248 			break;
1249 		case S_IFBLK:
1250 			vattr.va_type = VBLK;
1251 			break;
1252 		default:
1253 			error = EINVAL;
1254 			break;
1255 		}
1256 	}
1257 	if (!error) {
1258 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1259 	} else {
1260 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1261 		if (nd.ni_dvp == vp)
1262 			vrele(nd.ni_dvp);
1263 		else
1264 			vput(nd.ni_dvp);
1265 		if (vp)
1266 			vrele(vp);
1267 	}
1268 	return (error);
1269 }
1270 
1271 /*
1272  * Create a named pipe.
1273  */
1274 /* ARGSUSED */
1275 int
1276 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1277 {
1278 	struct sys_mkfifo_args /* {
1279 		syscallarg(const char *) path;
1280 		syscallarg(mode_t) mode;
1281 	} */ *uap = v;
1282 
1283 	return (domkfifoat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1284 	    retval));
1285 }
1286 
1287 int
1288 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1289 {
1290 	struct sys_mkfifoat_args /* {
1291 		syscallarg(int) fd;
1292 		syscallarg(const char *) path;
1293 		syscallarg(mode_t) mode;
1294 	} */ *uap = v;
1295 
1296 	return (domkfifoat(p, SCARG(uap, fd), SCARG(uap, path),
1297 	    SCARG(uap, mode), retval));
1298 }
1299 
1300 int
1301 domkfifoat(struct proc *p, int fd, const char *path, mode_t mode, register_t *retval)
1302 {
1303 #ifndef FIFO
1304 	return (EOPNOTSUPP);
1305 #else
1306 	struct vattr vattr;
1307 	int error;
1308 	struct nameidata nd;
1309 
1310 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1311 	if ((error = namei(&nd)) != 0)
1312 		return (error);
1313 	if (nd.ni_vp != NULL) {
1314 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1315 		if (nd.ni_dvp == nd.ni_vp)
1316 			vrele(nd.ni_dvp);
1317 		else
1318 			vput(nd.ni_dvp);
1319 		vrele(nd.ni_vp);
1320 		return (EEXIST);
1321 	}
1322 	VATTR_NULL(&vattr);
1323 	vattr.va_type = VFIFO;
1324 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1325 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
1326 #endif /* FIFO */
1327 }
1328 
1329 /*
1330  * Make a hard file link.
1331  */
1332 /* ARGSUSED */
1333 int
1334 sys_link(struct proc *p, void *v, register_t *retval)
1335 {
1336 	struct sys_link_args /* {
1337 		syscallarg(const char *) path;
1338 		syscallarg(const char *) link;
1339 	} */ *uap = v;
1340 
1341 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1342 	    SCARG(uap, link), AT_SYMLINK_FOLLOW, retval));
1343 }
1344 
1345 int
1346 sys_linkat(struct proc *p, void *v, register_t *retval)
1347 {
1348 	struct sys_linkat_args /* {
1349 		syscallarg(int) fd1;
1350 		syscallarg(const char *) path1;
1351 		syscallarg(int) fd2;
1352 		syscallarg(const char *) path2;
1353 		syscallarg(int) flag;
1354 	} */ *uap = v;
1355 
1356 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1357 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag), retval));
1358 }
1359 
1360 int
1361 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1362     const char *path2, int flag, register_t *retval)
1363 {
1364 	struct vnode *vp;
1365 	struct nameidata nd;
1366 	int error, follow;
1367 	int flags;
1368 
1369 	if (flag & ~AT_SYMLINK_FOLLOW)
1370 		return (EINVAL);
1371 
1372 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1373 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1374 	if ((error = namei(&nd)) != 0)
1375 		return (error);
1376 	vp = nd.ni_vp;
1377 
1378 	flags = LOCKPARENT;
1379 	if (vp->v_type == VDIR) {
1380 		flags |= STRIPSLASHES;
1381 	}
1382 
1383 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1384 	if ((error = namei(&nd)) != 0)
1385 		goto out;
1386 	if (nd.ni_vp) {
1387 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1388 		if (nd.ni_dvp == nd.ni_vp)
1389 			vrele(nd.ni_dvp);
1390 		else
1391 			vput(nd.ni_dvp);
1392 		vrele(nd.ni_vp);
1393 		error = EEXIST;
1394 		goto out;
1395 	}
1396 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1397 out:
1398 	vrele(vp);
1399 	return (error);
1400 }
1401 
1402 /*
1403  * Make a symbolic link.
1404  */
1405 /* ARGSUSED */
1406 int
1407 sys_symlink(struct proc *p, void *v, register_t *retval)
1408 {
1409 	struct sys_symlink_args /* {
1410 		syscallarg(const char *) path;
1411 		syscallarg(const char *) link;
1412 	} */ *uap = v;
1413 
1414 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
1415 	    retval));
1416 }
1417 
1418 int
1419 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1420 {
1421 	struct sys_symlinkat_args /* {
1422 		syscallarg(const char *) path;
1423 		syscallarg(int) fd;
1424 		syscallarg(const char *) link;
1425 	} */ *uap = v;
1426 
1427 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1428 	    SCARG(uap, link), retval));
1429 }
1430 
1431 int
1432 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link,
1433     register_t *retval)
1434 {
1435 	struct vattr vattr;
1436 	char *path;
1437 	int error;
1438 	struct nameidata nd;
1439 
1440 	path = pool_get(&namei_pool, PR_WAITOK);
1441 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1442 	if (error)
1443 		goto out;
1444 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1445 	if ((error = namei(&nd)) != 0)
1446 		goto out;
1447 	if (nd.ni_vp) {
1448 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1449 		if (nd.ni_dvp == nd.ni_vp)
1450 			vrele(nd.ni_dvp);
1451 		else
1452 			vput(nd.ni_dvp);
1453 		vrele(nd.ni_vp);
1454 		error = EEXIST;
1455 		goto out;
1456 	}
1457 	VATTR_NULL(&vattr);
1458 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1459 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1460 out:
1461 	pool_put(&namei_pool, path);
1462 	return (error);
1463 }
1464 
1465 /*
1466  * Delete a name from the filesystem.
1467  */
1468 /* ARGSUSED */
1469 int
1470 sys_unlink(struct proc *p, void *v, register_t *retval)
1471 {
1472 	struct sys_unlink_args /* {
1473 		syscallarg(const char *) path;
1474 	} */ *uap = v;
1475 
1476 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0, retval));
1477 }
1478 
1479 int
1480 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1481 {
1482 	struct sys_unlinkat_args /* {
1483 		syscallarg(int) fd;
1484 		syscallarg(const char *) path;
1485 		syscallarg(int) flag;
1486 	} */ *uap = v;
1487 
1488 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1489 	    SCARG(uap, flag), retval));
1490 }
1491 
1492 int
1493 dounlinkat(struct proc *p, int fd, const char *path, int flag,
1494     register_t *retval)
1495 {
1496 	struct vnode *vp;
1497 	int error;
1498 	struct nameidata nd;
1499 
1500 	if (flag & ~AT_REMOVEDIR)
1501 		return (EINVAL);
1502 
1503 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1504 	    fd, path, p);
1505 	if ((error = namei(&nd)) != 0)
1506 		return (error);
1507 	vp = nd.ni_vp;
1508 
1509 	if (flag & AT_REMOVEDIR) {
1510 		if (vp->v_type != VDIR) {
1511 			error = ENOTDIR;
1512 			goto out;
1513 		}
1514 		/*
1515 		 * No rmdir "." please.
1516 		 */
1517 		if (nd.ni_dvp == vp) {
1518 			error = EBUSY;
1519 			goto out;
1520 		}
1521 	}
1522 
1523 	/*
1524 	 * The root of a mounted filesystem cannot be deleted.
1525 	 */
1526 	if (vp->v_flag & VROOT)
1527 		error = EBUSY;
1528 out:
1529 	if (!error) {
1530 		if (flag & AT_REMOVEDIR) {
1531 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1532 		} else {
1533 			(void)uvm_vnp_uncache(vp);
1534 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1535 		}
1536 	} else {
1537 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1538 		if (nd.ni_dvp == vp)
1539 			vrele(nd.ni_dvp);
1540 		else
1541 			vput(nd.ni_dvp);
1542 		vput(vp);
1543 	}
1544 	return (error);
1545 }
1546 
1547 /*
1548  * Reposition read/write file offset.
1549  */
1550 int
1551 sys_lseek(struct proc *p, void *v, register_t *retval)
1552 {
1553 	struct sys_lseek_args /* {
1554 		syscallarg(int) fd;
1555 		syscallarg(int) pad;
1556 		syscallarg(off_t) offset;
1557 		syscallarg(int) whence;
1558 	} */ *uap = v;
1559 	struct ucred *cred = p->p_ucred;
1560 	struct filedesc *fdp = p->p_fd;
1561 	struct file *fp;
1562 	struct vattr vattr;
1563 	struct vnode *vp;
1564 	off_t offarg, newoff;
1565 	int error, special;
1566 
1567 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1568 		return (EBADF);
1569 	if (fp->f_type != DTYPE_VNODE)
1570 		return (ESPIPE);
1571 	vp = (struct vnode *)fp->f_data;
1572 	if (vp->v_type == VFIFO)
1573 		return (ESPIPE);
1574 	FREF(fp);
1575 	if (vp->v_type == VCHR)
1576 		special = 1;
1577 	else
1578 		special = 0;
1579 	offarg = SCARG(uap, offset);
1580 
1581 	switch (SCARG(uap, whence)) {
1582 	case SEEK_CUR:
1583 		newoff = fp->f_offset + offarg;
1584 		break;
1585 	case SEEK_END:
1586 		error = VOP_GETATTR(vp, &vattr, cred, p);
1587 		if (error)
1588 			goto bad;
1589 		newoff = offarg + (off_t)vattr.va_size;
1590 		break;
1591 	case SEEK_SET:
1592 		newoff = offarg;
1593 		break;
1594 	default:
1595 		error = EINVAL;
1596 		goto bad;
1597 	}
1598 	if (!special) {
1599 		if (newoff < 0) {
1600 			error = EINVAL;
1601 			goto bad;
1602 		}
1603 	}
1604 	*(off_t *)retval = fp->f_offset = newoff;
1605 	fp->f_seek++;
1606 	error = 0;
1607  bad:
1608 	FRELE(fp, p);
1609 	return (error);
1610 }
1611 
1612 /*
1613  * Check access permissions.
1614  */
1615 int
1616 sys_access(struct proc *p, void *v, register_t *retval)
1617 {
1618 	struct sys_access_args /* {
1619 		syscallarg(const char *) path;
1620 		syscallarg(int) flags;
1621 	} */ *uap = v;
1622 
1623 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1624 	    SCARG(uap, flags), 0, retval));
1625 }
1626 
1627 int
1628 sys_faccessat(struct proc *p, void *v, register_t *retval)
1629 {
1630 	struct sys_faccessat_args /* {
1631 		syscallarg(int) fd;
1632 		syscallarg(const char *) path;
1633 		syscallarg(int) amode;
1634 		syscallarg(int) flag;
1635 	} */ *uap = v;
1636 
1637 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1638 	    SCARG(uap, amode), SCARG(uap, flag), retval));
1639 }
1640 
1641 int
1642 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag,
1643     register_t *retval)
1644 {
1645 	struct vnode *vp;
1646 	int error;
1647 	struct nameidata nd;
1648 
1649 	if (amode & ~(R_OK | W_OK | X_OK))
1650 		return (EINVAL);
1651 	if (flag & ~AT_EACCESS)
1652 		return (EINVAL);
1653 
1654 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1655 	if ((error = namei(&nd)) != 0)
1656 		return (error);
1657 	vp = nd.ni_vp;
1658 
1659 	/* Flags == 0 means only check for existence. */
1660 	if (amode) {
1661 		struct ucred *cred = p->p_ucred;
1662 		int vflags = 0;
1663 
1664 		crhold(cred);
1665 
1666 		if (!(flag & AT_EACCESS)) {
1667 			cred = crcopy(cred);
1668 			cred->cr_uid = p->p_cred->p_ruid;
1669 			cred->cr_gid = p->p_cred->p_rgid;
1670 		}
1671 
1672 		if (amode & R_OK)
1673 			vflags |= VREAD;
1674 		if (amode & W_OK)
1675 			vflags |= VWRITE;
1676 		if (amode & X_OK)
1677 			vflags |= VEXEC;
1678 
1679 		error = VOP_ACCESS(vp, vflags, cred, p);
1680 		if (!error && (vflags & VWRITE))
1681 			error = vn_writechk(vp);
1682 
1683 		crfree(cred);
1684 	}
1685 	vput(vp);
1686 	return (error);
1687 }
1688 
1689 /*
1690  * Get file status; this version follows links.
1691  */
1692 /* ARGSUSED */
1693 int
1694 sys_stat(struct proc *p, void *v, register_t *retval)
1695 {
1696 	struct sys_stat_args /* {
1697 		syscallarg(const char *) path;
1698 		syscallarg(struct stat *) ub;
1699 	} */ *uap = v;
1700 
1701 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0,
1702 	    retval));
1703 }
1704 
1705 int
1706 sys_fstatat(struct proc *p, void *v, register_t *retval)
1707 {
1708 	struct sys_fstatat_args /* {
1709 		syscallarg(int) fd;
1710 		syscallarg(const char *) path;
1711 		syscallarg(struct stat *) buf;
1712 		syscallarg(int) flag;
1713 	} */ *uap = v;
1714 
1715 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
1716 	    SCARG(uap, buf), SCARG(uap, flag), retval));
1717 }
1718 
1719 int
1720 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf,
1721     int flag, register_t *retval)
1722 {
1723 	struct stat sb;
1724 	int error, follow;
1725 	struct nameidata nd;
1726 
1727 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1728 		return (EINVAL);
1729 
1730 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1731 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1732 	if ((error = namei(&nd)) != 0)
1733 		return (error);
1734 	error = vn_stat(nd.ni_vp, &sb, p);
1735 	vput(nd.ni_vp);
1736 	if (error)
1737 		return (error);
1738 	/* Don't let non-root see generation numbers (for NFS security) */
1739 	if (suser(p, 0))
1740 		sb.st_gen = 0;
1741 	error = copyout(&sb, buf, sizeof(sb));
1742 #ifdef KTRACE
1743 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
1744 		ktrstat(p, &sb);
1745 #endif
1746 	return (error);
1747 }
1748 
1749 /*
1750  * Get file status; this version does not follow links.
1751  */
1752 /* ARGSUSED */
1753 int
1754 sys_lstat(struct proc *p, void *v, register_t *retval)
1755 {
1756 	struct sys_lstat_args /* {
1757 		syscallarg(const char *) path;
1758 		syscallarg(struct stat *) ub;
1759 	} */ *uap = v;
1760 
1761 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
1762 	    AT_SYMLINK_NOFOLLOW, retval));
1763 }
1764 
1765 /*
1766  * Get configurable pathname variables.
1767  */
1768 /* ARGSUSED */
1769 int
1770 sys_pathconf(struct proc *p, void *v, register_t *retval)
1771 {
1772 	struct sys_pathconf_args /* {
1773 		syscallarg(const char *) path;
1774 		syscallarg(int) name;
1775 	} */ *uap = v;
1776 	int error;
1777 	struct nameidata nd;
1778 
1779 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1780 	    SCARG(uap, path), p);
1781 	if ((error = namei(&nd)) != 0)
1782 		return (error);
1783 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1784 	vput(nd.ni_vp);
1785 	return (error);
1786 }
1787 
1788 /*
1789  * Return target name of a symbolic link.
1790  */
1791 /* ARGSUSED */
1792 int
1793 sys_readlink(struct proc *p, void *v, register_t *retval)
1794 {
1795 	struct sys_readlink_args /* {
1796 		syscallarg(const char *) path;
1797 		syscallarg(char *) buf;
1798 		syscallarg(size_t) count;
1799 	} */ *uap = v;
1800 
1801 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
1802 	    SCARG(uap, count), retval));
1803 }
1804 
1805 int
1806 sys_readlinkat(struct proc *p, void *v, register_t *retval)
1807 {
1808 	struct sys_readlinkat_args /* {
1809 		syscallarg(int) fd;
1810 		syscallarg(const char *) path;
1811 		syscallarg(char *) buf;
1812 		syscallarg(size_t) count;
1813 	} */ *uap = v;
1814 
1815 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1816 	    SCARG(uap, buf), SCARG(uap, count), retval));
1817 }
1818 
1819 int
1820 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
1821     size_t count, register_t *retval)
1822 {
1823 	struct vnode *vp;
1824 	struct iovec aiov;
1825 	struct uio auio;
1826 	int error;
1827 	struct nameidata nd;
1828 
1829 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1830 	if ((error = namei(&nd)) != 0)
1831 		return (error);
1832 	vp = nd.ni_vp;
1833 	if (vp->v_type != VLNK)
1834 		error = EINVAL;
1835 	else {
1836 		aiov.iov_base = buf;
1837 		aiov.iov_len = count;
1838 		auio.uio_iov = &aiov;
1839 		auio.uio_iovcnt = 1;
1840 		auio.uio_offset = 0;
1841 		auio.uio_rw = UIO_READ;
1842 		auio.uio_segflg = UIO_USERSPACE;
1843 		auio.uio_procp = p;
1844 		auio.uio_resid = count;
1845 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1846 	}
1847 	vput(vp);
1848 	*retval = count - auio.uio_resid;
1849 	return (error);
1850 }
1851 
1852 /*
1853  * Change flags of a file given a path name.
1854  */
1855 /* ARGSUSED */
1856 int
1857 sys_chflags(struct proc *p, void *v, register_t *retval)
1858 {
1859 	struct sys_chflags_args /* {
1860 		syscallarg(const char *) path;
1861 		syscallarg(u_int) flags;
1862 	} */ *uap = v;
1863 	struct vnode *vp;
1864 	struct vattr vattr;
1865 	int error;
1866 	struct nameidata nd;
1867 	u_int flags = SCARG(uap, flags);
1868 
1869 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1870 	if ((error = namei(&nd)) != 0)
1871 		return (error);
1872 	vp = nd.ni_vp;
1873 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1874 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1875 		error = EROFS;
1876 	else if (flags == VNOVAL)
1877 		error = EINVAL;
1878 	else {
1879 		if (suser(p, 0)) {
1880 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
1881 				goto out;
1882 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1883 				error = EINVAL;
1884 				goto out;
1885 			}
1886 		}
1887 		VATTR_NULL(&vattr);
1888 		vattr.va_flags = flags;
1889 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1890 	}
1891 out:
1892 	vput(vp);
1893 	return (error);
1894 }
1895 
1896 /*
1897  * Change flags of a file given a file descriptor.
1898  */
1899 /* ARGSUSED */
1900 int
1901 sys_fchflags(struct proc *p, void *v, register_t *retval)
1902 {
1903 	struct sys_fchflags_args /* {
1904 		syscallarg(int) fd;
1905 		syscallarg(u_int) flags;
1906 	} */ *uap = v;
1907 	struct vattr vattr;
1908 	struct vnode *vp;
1909 	struct file *fp;
1910 	int error;
1911 	u_int flags = SCARG(uap, flags);
1912 
1913 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1914 		return (error);
1915 	vp = (struct vnode *)fp->f_data;
1916 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1917 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1918 		error = EROFS;
1919 	else if (flags == VNOVAL)
1920 		error = EINVAL;
1921 	else {
1922 		if (suser(p, 0)) {
1923 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
1924 			    != 0)
1925 				goto out;
1926 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1927 				error = EINVAL;
1928 				goto out;
1929 			}
1930 		}
1931 		VATTR_NULL(&vattr);
1932 		vattr.va_flags = flags;
1933 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1934 	}
1935 out:
1936 	VOP_UNLOCK(vp, 0, p);
1937 	FRELE(fp, p);
1938 	return (error);
1939 }
1940 
1941 /*
1942  * Change mode of a file given path name.
1943  */
1944 /* ARGSUSED */
1945 int
1946 sys_chmod(struct proc *p, void *v, register_t *retval)
1947 {
1948 	struct sys_chmod_args /* {
1949 		syscallarg(const char *) path;
1950 		syscallarg(mode_t) mode;
1951 	} */ *uap = v;
1952 
1953 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1954 	    0, retval));
1955 }
1956 
1957 int
1958 sys_fchmodat(struct proc *p, void *v, register_t *retval)
1959 {
1960 	struct sys_fchmodat_args /* {
1961 		syscallarg(int) fd;
1962 		syscallarg(const char *) path;
1963 		syscallarg(mode_t) mode;
1964 		syscallarg(int) flag;
1965 	} */ *uap = v;
1966 
1967 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
1968 	    SCARG(uap, mode), SCARG(uap, flag), retval));
1969 }
1970 
1971 int
1972 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag,
1973     register_t *retval)
1974 {
1975 	struct vnode *vp;
1976 	struct vattr vattr;
1977 	int error, follow;
1978 	struct nameidata nd;
1979 
1980 	if (mode & ~(S_IFMT | ALLPERMS))
1981 		return (EINVAL);
1982 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1983 		return (EINVAL);
1984 
1985 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1986 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
1987 	if ((error = namei(&nd)) != 0)
1988 		return (error);
1989 	vp = nd.ni_vp;
1990 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1991 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1992 		error = EROFS;
1993 	else {
1994 		VATTR_NULL(&vattr);
1995 		vattr.va_mode = mode & ALLPERMS;
1996 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1997 	}
1998 	vput(vp);
1999 	return (error);
2000 }
2001 
2002 /*
2003  * Change mode of a file given a file descriptor.
2004  */
2005 /* ARGSUSED */
2006 int
2007 sys_fchmod(struct proc *p, void *v, register_t *retval)
2008 {
2009 	struct sys_fchmod_args /* {
2010 		syscallarg(int) fd;
2011 		syscallarg(mode_t) mode;
2012 	} */ *uap = v;
2013 	struct vattr vattr;
2014 	struct vnode *vp;
2015 	struct file *fp;
2016 	int error;
2017 
2018 	if (SCARG(uap, mode) & ~(S_IFMT | ALLPERMS))
2019 		return (EINVAL);
2020 
2021 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2022 		return (error);
2023 	vp = (struct vnode *)fp->f_data;
2024 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2025 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2026 		error = EROFS;
2027 	else {
2028 		VATTR_NULL(&vattr);
2029 		vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
2030 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2031 	}
2032 	VOP_UNLOCK(vp, 0, p);
2033 	FRELE(fp, p);
2034 	return (error);
2035 }
2036 
2037 /*
2038  * Set ownership given a path name.
2039  */
2040 /* ARGSUSED */
2041 int
2042 sys_chown(struct proc *p, void *v, register_t *retval)
2043 {
2044 	struct sys_chown_args /* {
2045 		syscallarg(const char *) path;
2046 		syscallarg(uid_t) uid;
2047 		syscallarg(gid_t) gid;
2048 	} */ *uap = v;
2049 
2050 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2051 	    SCARG(uap, gid), 0, retval));
2052 }
2053 
2054 int
2055 sys_fchownat(struct proc *p, void *v, register_t *retval)
2056 {
2057 	struct sys_fchownat_args /* {
2058 		syscallarg(int) fd;
2059 		syscallarg(const char *) path;
2060 		syscallarg(uid_t) uid;
2061 		syscallarg(gid_t) gid;
2062 		syscallarg(int) flag;
2063 	} */ *uap = v;
2064 
2065 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2066 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag), retval));
2067 }
2068 
2069 int
2070 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2071     int flag, register_t *retval)
2072 {
2073 	struct vnode *vp;
2074 	struct vattr vattr;
2075 	int error, follow;
2076 	struct nameidata nd;
2077 	mode_t mode;
2078 
2079 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2080 		return (EINVAL);
2081 
2082 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2083 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2084 	if ((error = namei(&nd)) != 0)
2085 		return (error);
2086 	vp = nd.ni_vp;
2087 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2088 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2089 		error = EROFS;
2090 	else {
2091 		if ((uid != -1 || gid != -1) &&
2092 		    (suser(p, 0) || suid_clear)) {
2093 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2094 			if (error)
2095 				goto out;
2096 			mode = vattr.va_mode & ~(VSUID | VSGID);
2097 			if (mode == vattr.va_mode)
2098 				mode = VNOVAL;
2099 		}
2100 		else
2101 			mode = VNOVAL;
2102 		VATTR_NULL(&vattr);
2103 		vattr.va_uid = uid;
2104 		vattr.va_gid = gid;
2105 		vattr.va_mode = mode;
2106 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2107 	}
2108 out:
2109 	vput(vp);
2110 	return (error);
2111 }
2112 
2113 /*
2114  * Set ownership given a path name, without following links.
2115  */
2116 /* ARGSUSED */
2117 int
2118 sys_lchown(struct proc *p, void *v, register_t *retval)
2119 {
2120 	struct sys_lchown_args /* {
2121 		syscallarg(const char *) path;
2122 		syscallarg(uid_t) uid;
2123 		syscallarg(gid_t) gid;
2124 	} */ *uap = v;
2125 	struct vnode *vp;
2126 	struct vattr vattr;
2127 	int error;
2128 	struct nameidata nd;
2129 	mode_t mode;
2130 	uid_t uid = SCARG(uap, uid);
2131 	gid_t gid = SCARG(uap, gid);
2132 
2133 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2134 	if ((error = namei(&nd)) != 0)
2135 		return (error);
2136 	vp = nd.ni_vp;
2137 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2138 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2139 		error = EROFS;
2140 	else {
2141 		if ((uid != -1 || gid != -1) &&
2142 		    (suser(p, 0) || suid_clear)) {
2143 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2144 			if (error)
2145 				goto out;
2146 			mode = vattr.va_mode & ~(VSUID | VSGID);
2147 			if (mode == vattr.va_mode)
2148 				mode = VNOVAL;
2149 		}
2150 		else
2151 			mode = VNOVAL;
2152 		VATTR_NULL(&vattr);
2153 		vattr.va_uid = uid;
2154 		vattr.va_gid = gid;
2155 		vattr.va_mode = mode;
2156 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2157 	}
2158 out:
2159 	vput(vp);
2160 	return (error);
2161 }
2162 
2163 /*
2164  * Set ownership given a file descriptor.
2165  */
2166 /* ARGSUSED */
2167 int
2168 sys_fchown(struct proc *p, void *v, register_t *retval)
2169 {
2170 	struct sys_fchown_args /* {
2171 		syscallarg(int) fd;
2172 		syscallarg(uid_t) uid;
2173 		syscallarg(gid_t) gid;
2174 	} */ *uap = v;
2175 	struct vnode *vp;
2176 	struct vattr vattr;
2177 	int error;
2178 	struct file *fp;
2179 	mode_t mode;
2180 	uid_t uid = SCARG(uap, uid);
2181 	gid_t gid = SCARG(uap, gid);
2182 
2183 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2184 		return (error);
2185 	vp = (struct vnode *)fp->f_data;
2186 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2187 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2188 		error = EROFS;
2189 	else {
2190 		if ((uid != -1 || gid != -1) &&
2191 		    (suser(p, 0) || suid_clear)) {
2192 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2193 			if (error)
2194 				goto out;
2195 			mode = vattr.va_mode & ~(VSUID | VSGID);
2196 			if (mode == vattr.va_mode)
2197 				mode = VNOVAL;
2198 		} else
2199 			mode = VNOVAL;
2200 		VATTR_NULL(&vattr);
2201 		vattr.va_uid = uid;
2202 		vattr.va_gid = gid;
2203 		vattr.va_mode = mode;
2204 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2205 	}
2206 out:
2207 	VOP_UNLOCK(vp, 0, p);
2208 	FRELE(fp, p);
2209 	return (error);
2210 }
2211 
2212 /*
2213  * Set the access and modification times given a path name.
2214  */
2215 /* ARGSUSED */
2216 int
2217 sys_utimes(struct proc *p, void *v, register_t *retval)
2218 {
2219 	struct sys_utimes_args /* {
2220 		syscallarg(const char *) path;
2221 		syscallarg(const struct timeval *) tptr;
2222 	} */ *uap = v;
2223 
2224 	struct timespec ts[2];
2225 	struct timeval tv[2];
2226 	const struct timeval *tvp;
2227 	int error;
2228 
2229 	tvp = SCARG(uap, tptr);
2230 	if (tvp != NULL) {
2231 		error = copyin(tvp, tv, sizeof(tv));
2232 		if (error)
2233 			return (error);
2234 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2235 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2236 	} else
2237 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2238 
2239 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0, retval));
2240 }
2241 
2242 int
2243 sys_utimensat(struct proc *p, void *v, register_t *retval)
2244 {
2245 	struct sys_utimensat_args /* {
2246 		syscallarg(int) fd;
2247 		syscallarg(const char *) path;
2248 		syscallarg(const struct timespec *) times;
2249 		syscallarg(int) flag;
2250 	} */ *uap = v;
2251 
2252 	struct timespec ts[2];
2253 	const struct timespec *tsp;
2254 	int error;
2255 
2256 	tsp = SCARG(uap, times);
2257 	if (tsp != NULL) {
2258 		error = copyin(tsp, ts, sizeof(ts));
2259 		if (error)
2260 			return (error);
2261 	} else
2262 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2263 
2264 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2265 	    SCARG(uap, flag), retval));
2266 }
2267 
2268 int
2269 doutimensat(struct proc *p, int fd, const char *path,
2270     struct timespec ts[2], int flag, register_t *retval)
2271 {
2272 	struct vnode *vp;
2273 	int error, follow;
2274 	struct nameidata nd;
2275 
2276 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2277 		return (EINVAL);
2278 
2279 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2280 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2281 	if ((error = namei(&nd)) != 0)
2282 		return (error);
2283 	vp = nd.ni_vp;
2284 
2285 	return (dovutimens(p, vp, ts, retval));
2286 }
2287 
2288 int
2289 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2],
2290     register_t *retval)
2291 {
2292 	struct vattr vattr;
2293 	struct timespec now;
2294 	int error;
2295 
2296 #ifdef KTRACE
2297 	/* if they're both UTIME_NOW, then don't report either */
2298 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2299 	    KTRPOINT(p, KTR_STRUCT)) {
2300 		ktrabstimespec(p, &ts[0]);
2301 		ktrabstimespec(p, &ts[1]);
2302 	}
2303 #endif
2304 
2305 	VATTR_NULL(&vattr);
2306 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2307 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2308 			vattr.va_vaflags |= VA_UTIMES_NULL;
2309 
2310 		getnanotime(&now);
2311 		if (ts[0].tv_nsec == UTIME_NOW)
2312 			ts[0] = now;
2313 		if (ts[1].tv_nsec == UTIME_NOW)
2314 			ts[1] = now;
2315 	}
2316 
2317 	/*
2318 	 * XXX: Ideally the filesystem code would check tv_nsec ==
2319 	 * UTIME_OMIT instead of tv_sec == VNOVAL, but until then we
2320 	 * need to fudge tv_sec if it happens to equal VNOVAL.
2321 	 */
2322 	if (ts[0].tv_nsec == UTIME_OMIT)
2323 		ts[0].tv_sec = VNOVAL;
2324 	else if (ts[0].tv_sec == VNOVAL)
2325 		ts[0].tv_sec = VNOVAL - 1;
2326 
2327 	if (ts[1].tv_nsec == UTIME_OMIT)
2328 		ts[1].tv_sec = VNOVAL;
2329 	else if (ts[1].tv_sec == VNOVAL)
2330 		ts[1].tv_sec = VNOVAL - 1;
2331 
2332 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2333 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2334 		error = EROFS;
2335 	else {
2336 		vattr.va_atime = ts[0];
2337 		vattr.va_mtime = ts[1];
2338 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2339 	}
2340 	vput(vp);
2341 	return (error);
2342 }
2343 
2344 /*
2345  * Set the access and modification times given a file descriptor.
2346  */
2347 /* ARGSUSED */
2348 int
2349 sys_futimes(struct proc *p, void *v, register_t *retval)
2350 {
2351 	struct sys_futimes_args /* {
2352 		syscallarg(int) fd;
2353 		syscallarg(const struct timeval *) tptr;
2354 	} */ *uap = v;
2355 	struct timeval tv[2];
2356 	struct timespec ts[2];
2357 	const struct timeval *tvp;
2358 	int error;
2359 
2360 	tvp = SCARG(uap, tptr);
2361 	if (tvp != NULL) {
2362 		error = copyin(tvp, tv, sizeof(tv));
2363 		if (error)
2364 			return (error);
2365 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2366 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2367 	} else
2368 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2369 
2370 	return (dofutimens(p, SCARG(uap, fd), ts, retval));
2371 }
2372 
2373 int
2374 sys_futimens(struct proc *p, void *v, register_t *retval)
2375 {
2376 	struct sys_futimens_args /* {
2377 		syscallarg(int) fd;
2378 		syscallarg(const struct timespec *) times;
2379 	} */ *uap = v;
2380 	struct timespec ts[2];
2381 	const struct timespec *tsp;
2382 	int error;
2383 
2384 	tsp = SCARG(uap, times);
2385 	if (tsp != NULL) {
2386 		error = copyin(tsp, ts, sizeof(ts));
2387 		if (error)
2388 			return (error);
2389 	} else
2390 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2391 
2392 	return (dofutimens(p, SCARG(uap, fd), ts, retval));
2393 }
2394 
2395 int
2396 dofutimens(struct proc *p, int fd, struct timespec ts[2], register_t *retval)
2397 {
2398 	struct file *fp;
2399 	struct vnode *vp;
2400 	int error;
2401 
2402 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2403 		return (error);
2404 	vp = (struct vnode *)fp->f_data;
2405 	vref(vp);
2406 	FRELE(fp, p);
2407 
2408 	return (dovutimens(p, vp, ts, retval));
2409 }
2410 
2411 /*
2412  * Truncate a file given its path name.
2413  */
2414 /* ARGSUSED */
2415 int
2416 sys_truncate(struct proc *p, void *v, register_t *retval)
2417 {
2418 	struct sys_truncate_args /* {
2419 		syscallarg(const char *) path;
2420 		syscallarg(int) pad;
2421 		syscallarg(off_t) length;
2422 	} */ *uap = v;
2423 	struct vnode *vp;
2424 	struct vattr vattr;
2425 	int error;
2426 	struct nameidata nd;
2427 
2428 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2429 	if ((error = namei(&nd)) != 0)
2430 		return (error);
2431 	vp = nd.ni_vp;
2432 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2433 	if (vp->v_type == VDIR)
2434 		error = EISDIR;
2435 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2436 	    (error = vn_writechk(vp)) == 0) {
2437 		VATTR_NULL(&vattr);
2438 		vattr.va_size = SCARG(uap, length);
2439 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2440 	}
2441 	vput(vp);
2442 	return (error);
2443 }
2444 
2445 /*
2446  * Truncate a file given a file descriptor.
2447  */
2448 /* ARGSUSED */
2449 int
2450 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2451 {
2452 	struct sys_ftruncate_args /* {
2453 		syscallarg(int) fd;
2454 		syscallarg(int) pad;
2455 		syscallarg(off_t) length;
2456 	} */ *uap = v;
2457 	struct vattr vattr;
2458 	struct vnode *vp;
2459 	struct file *fp;
2460 	off_t len;
2461 	int error;
2462 
2463 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2464 		return (error);
2465 	len = SCARG(uap, length);
2466 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2467 		error = EINVAL;
2468 		goto bad;
2469 	}
2470 	vp = (struct vnode *)fp->f_data;
2471 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2472 	if (vp->v_type == VDIR)
2473 		error = EISDIR;
2474 	else if ((error = vn_writechk(vp)) == 0) {
2475 		VATTR_NULL(&vattr);
2476 		vattr.va_size = len;
2477 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2478 	}
2479 	VOP_UNLOCK(vp, 0, p);
2480 bad:
2481 	FRELE(fp, p);
2482 	return (error);
2483 }
2484 
2485 /*
2486  * Sync an open file.
2487  */
2488 /* ARGSUSED */
2489 int
2490 sys_fsync(struct proc *p, void *v, register_t *retval)
2491 {
2492 	struct sys_fsync_args /* {
2493 		syscallarg(int) fd;
2494 	} */ *uap = v;
2495 	struct vnode *vp;
2496 	struct file *fp;
2497 	int error;
2498 
2499 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2500 		return (error);
2501 	vp = (struct vnode *)fp->f_data;
2502 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2503 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2504 #ifdef FFS_SOFTUPDATES
2505 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2506 		error = softdep_fsync(vp);
2507 #endif
2508 
2509 	VOP_UNLOCK(vp, 0, p);
2510 	FRELE(fp, p);
2511 	return (error);
2512 }
2513 
2514 /*
2515  * Rename files.  Source and destination must either both be directories,
2516  * or both not be directories.  If target is a directory, it must be empty.
2517  */
2518 /* ARGSUSED */
2519 int
2520 sys_rename(struct proc *p, void *v, register_t *retval)
2521 {
2522 	struct sys_rename_args /* {
2523 		syscallarg(const char *) from;
2524 		syscallarg(const char *) to;
2525 	} */ *uap = v;
2526 
2527 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2528 	    SCARG(uap, to), retval));
2529 }
2530 
2531 int
2532 sys_renameat(struct proc *p, void *v, register_t *retval)
2533 {
2534 	struct sys_renameat_args /* {
2535 		syscallarg(int) fromfd;
2536 		syscallarg(const char *) from;
2537 		syscallarg(int) tofd;
2538 		syscallarg(const char *) to;
2539 	} */ *uap = v;
2540 
2541 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2542 	    SCARG(uap, tofd), SCARG(uap, to), retval));
2543 }
2544 
2545 int
2546 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2547     const char *to, register_t *retval)
2548 {
2549 	struct vnode *tvp, *fvp, *tdvp;
2550 	struct nameidata fromnd, tond;
2551 	int error;
2552 	int flags;
2553 
2554 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2555 	    fromfd, from, p);
2556 	if ((error = namei(&fromnd)) != 0)
2557 		return (error);
2558 	fvp = fromnd.ni_vp;
2559 
2560 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2561 	/*
2562 	 * rename("foo/", "bar/");  is  OK
2563 	 */
2564 	if (fvp->v_type == VDIR)
2565 		flags |= STRIPSLASHES;
2566 
2567 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2568 	if ((error = namei(&tond)) != 0) {
2569 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2570 		vrele(fromnd.ni_dvp);
2571 		vrele(fvp);
2572 		goto out1;
2573 	}
2574 	tdvp = tond.ni_dvp;
2575 	tvp = tond.ni_vp;
2576 	if (tvp != NULL) {
2577 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2578 			error = ENOTDIR;
2579 			goto out;
2580 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2581 			error = EISDIR;
2582 			goto out;
2583 		}
2584 	}
2585 	if (fvp == tdvp)
2586 		error = EINVAL;
2587 	/*
2588 	 * If source is the same as the destination (that is the
2589 	 * same inode number)
2590 	 */
2591 	if (fvp == tvp)
2592 		error = -1;
2593 out:
2594 	if (!error) {
2595 		if (tvp) {
2596 			(void)uvm_vnp_uncache(tvp);
2597 		}
2598 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2599 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2600 	} else {
2601 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2602 		if (tdvp == tvp)
2603 			vrele(tdvp);
2604 		else
2605 			vput(tdvp);
2606 		if (tvp)
2607 			vput(tvp);
2608 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2609 		vrele(fromnd.ni_dvp);
2610 		vrele(fvp);
2611 	}
2612 	vrele(tond.ni_startdir);
2613 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2614 out1:
2615 	if (fromnd.ni_startdir)
2616 		vrele(fromnd.ni_startdir);
2617 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2618 	if (error == -1)
2619 		return (0);
2620 	return (error);
2621 }
2622 
2623 /*
2624  * Make a directory file.
2625  */
2626 /* ARGSUSED */
2627 int
2628 sys_mkdir(struct proc *p, void *v, register_t *retval)
2629 {
2630 	struct sys_mkdir_args /* {
2631 		syscallarg(const char *) path;
2632 		syscallarg(mode_t) mode;
2633 	} */ *uap = v;
2634 
2635 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2636 	    retval));
2637 }
2638 
2639 int
2640 sys_mkdirat(struct proc *p, void *v, register_t *retval)
2641 {
2642 	struct sys_mkdirat_args /* {
2643 		syscallarg(int) fd;
2644 		syscallarg(const char *) path;
2645 		syscallarg(mode_t) mode;
2646 	} */ *uap = v;
2647 
2648 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
2649 	    SCARG(uap, mode), retval));
2650 }
2651 
2652 int
2653 domkdirat(struct proc *p, int fd, const char *path, mode_t mode,
2654     register_t *retval)
2655 {
2656 	struct vnode *vp;
2657 	struct vattr vattr;
2658 	int error;
2659 	struct nameidata nd;
2660 
2661 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
2662 	    fd, path, p);
2663 	if ((error = namei(&nd)) != 0)
2664 		return (error);
2665 	vp = nd.ni_vp;
2666 	if (vp != NULL) {
2667 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2668 		if (nd.ni_dvp == vp)
2669 			vrele(nd.ni_dvp);
2670 		else
2671 			vput(nd.ni_dvp);
2672 		vrele(vp);
2673 		return (EEXIST);
2674 	}
2675 	VATTR_NULL(&vattr);
2676 	vattr.va_type = VDIR;
2677 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2678 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2679 	if (!error)
2680 		vput(nd.ni_vp);
2681 	return (error);
2682 }
2683 
2684 /*
2685  * Remove a directory file.
2686  */
2687 /* ARGSUSED */
2688 int
2689 sys_rmdir(struct proc *p, void *v, register_t *retval)
2690 {
2691 	struct sys_rmdir_args /* {
2692 		syscallarg(const char *) path;
2693 	} */ *uap = v;
2694 
2695 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR,
2696 	    retval));
2697 }
2698 
2699 /*
2700  * Read a block of directory entries in a file system independent format.
2701  */
2702 int
2703 getdirentries_internal(struct proc *p, int fd, char *buf, int count,
2704     off_t *basep, register_t *retval)
2705 {
2706 	struct vnode *vp;
2707 	struct file *fp;
2708 	struct uio auio;
2709 	struct iovec aiov;
2710 	int error, eofflag;
2711 
2712 	if (count < 0)
2713 		return EINVAL;
2714 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2715 		return (error);
2716 	if ((fp->f_flag & FREAD) == 0) {
2717 		error = EBADF;
2718 		goto bad;
2719 	}
2720 	if (fp->f_offset < 0) {
2721 		error = EINVAL;
2722 		goto bad;
2723 	}
2724 	vp = (struct vnode *)fp->f_data;
2725 	if (vp->v_type != VDIR) {
2726 		error = EINVAL;
2727 		goto bad;
2728 	}
2729 	aiov.iov_base = buf;
2730 	aiov.iov_len = count;
2731 	auio.uio_iov = &aiov;
2732 	auio.uio_iovcnt = 1;
2733 	auio.uio_rw = UIO_READ;
2734 	auio.uio_segflg = UIO_USERSPACE;
2735 	auio.uio_procp = p;
2736 	auio.uio_resid = count;
2737 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2738 	*basep = auio.uio_offset = fp->f_offset;
2739 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0);
2740 	fp->f_offset = auio.uio_offset;
2741 	VOP_UNLOCK(vp, 0, p);
2742 	if (error)
2743 		goto bad;
2744 	*retval = count - auio.uio_resid;
2745 bad:
2746 	FRELE(fp, p);
2747 	return (error);
2748 }
2749 
2750 int
2751 sys_getdirentries(struct proc *p, void *v, register_t *retval)
2752 {
2753 	struct sys_getdirentries_args /* {
2754 		syscallarg(int) fd;
2755 		syscallarg(char *) buf;
2756 		syscallarg(int) count;
2757 		syscallarg(off_t *) basep;
2758 	} */ *uap = v;
2759 	int error;
2760 	off_t off;
2761 
2762 	error = getdirentries_internal(p, SCARG(uap, fd), SCARG(uap, buf),
2763 	    SCARG(uap, count), &off, retval);
2764 	if (!error)
2765 		error = copyout(&off, SCARG(uap, basep), sizeof(off_t));
2766 	return error;
2767 }
2768 
2769 #ifdef COMPAT_O48
2770 int
2771 compat_o48_sys_getdirentries(struct proc *p, void *v, register_t *retval)
2772 {
2773 	struct compat_o48_sys_getdirentries_args /* {
2774 		syscallarg(int) fd;
2775 		syscallarg(char *) buf;
2776 		syscallarg(int) count;
2777 		syscallarg(long *) basep;
2778 	} */ *uap = v;
2779 	int error;
2780 	off_t off;
2781 
2782 	error = getdirentries_internal(p, SCARG(uap, fd), SCARG(uap, buf),
2783 	    SCARG(uap, count), &off, retval);
2784 	if (!error) {
2785 		long loff = (long)off;
2786 		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
2787 	}
2788 	return error;
2789 }
2790 #endif
2791 
2792 /*
2793  * Set the mode mask for creation of filesystem nodes.
2794  */
2795 int
2796 sys_umask(struct proc *p, void *v, register_t *retval)
2797 {
2798 	struct sys_umask_args /* {
2799 		syscallarg(mode_t) newmask;
2800 	} */ *uap = v;
2801 	struct filedesc *fdp;
2802 
2803 	fdp = p->p_fd;
2804 	*retval = fdp->fd_cmask;
2805 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2806 	return (0);
2807 }
2808 
2809 /*
2810  * Void all references to file by ripping underlying filesystem
2811  * away from vnode.
2812  */
2813 /* ARGSUSED */
2814 int
2815 sys_revoke(struct proc *p, void *v, register_t *retval)
2816 {
2817 	struct sys_revoke_args /* {
2818 		syscallarg(const char *) path;
2819 	} */ *uap = v;
2820 	struct vnode *vp;
2821 	struct vattr vattr;
2822 	int error;
2823 	struct nameidata nd;
2824 
2825 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2826 	if ((error = namei(&nd)) != 0)
2827 		return (error);
2828 	vp = nd.ni_vp;
2829 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2830 		goto out;
2831 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2832 	    (error = suser(p, 0)))
2833 		goto out;
2834 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
2835 		VOP_REVOKE(vp, REVOKEALL);
2836 out:
2837 	vrele(vp);
2838 	return (error);
2839 }
2840 
2841 /*
2842  * Convert a user file descriptor to a kernel file entry.
2843  *
2844  * On return *fpp is FREF:ed.
2845  */
2846 int
2847 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2848 {
2849 	struct file *fp;
2850 	struct vnode *vp;
2851 
2852 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2853 		return (EBADF);
2854 
2855 	if (fp->f_type != DTYPE_VNODE)
2856 		return (EINVAL);
2857 
2858 	vp = (struct vnode *)fp->f_data;
2859 	if (vp->v_type == VBAD)
2860 		return (EBADF);
2861 
2862 	FREF(fp);
2863 	*fpp = fp;
2864 
2865 	return (0);
2866 }
2867 
2868 /*
2869  * Positional read system call.
2870  */
2871 int
2872 sys_pread(struct proc *p, void *v, register_t *retval)
2873 {
2874 	struct sys_pread_args /* {
2875 		syscallarg(int) fd;
2876 		syscallarg(void *) buf;
2877 		syscallarg(size_t) nbyte;
2878 		syscallarg(int) pad;
2879 		syscallarg(off_t) offset;
2880 	} */ *uap = v;
2881 	struct iovec iov;
2882 	struct filedesc *fdp = p->p_fd;
2883 	struct file *fp;
2884 	struct vnode *vp;
2885 	off_t offset;
2886 	int fd = SCARG(uap, fd);
2887 
2888 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2889 		return (EBADF);
2890 	if ((fp->f_flag & FREAD) == 0)
2891 		return (EBADF);
2892 
2893 	vp = (struct vnode *)fp->f_data;
2894 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2895 	    (vp->v_flag & VISTTY)) {
2896 		return (ESPIPE);
2897 	}
2898 
2899 	iov.iov_base = SCARG(uap, buf);
2900 	iov.iov_len = SCARG(uap, nbyte);
2901 
2902 	offset = SCARG(uap, offset);
2903 	if (offset < 0 && vp->v_type != VCHR)
2904 		return (EINVAL);
2905 
2906 	FREF(fp);
2907 
2908 	/* dofilereadv() will FRELE the descriptor for us */
2909 	return (dofilereadv(p, fd, fp, &iov, 1, 0, &offset, retval));
2910 }
2911 
2912 /*
2913  * Positional scatter read system call.
2914  */
2915 int
2916 sys_preadv(struct proc *p, void *v, register_t *retval)
2917 {
2918 	struct sys_preadv_args /* {
2919 		syscallarg(int) fd;
2920 		syscallarg(const struct iovec *) iovp;
2921 		syscallarg(int) iovcnt;
2922 		syscallarg(int) pad;
2923 		syscallarg(off_t) offset;
2924 	} */ *uap = v;
2925 	struct filedesc *fdp = p->p_fd;
2926 	struct file *fp;
2927 	struct vnode *vp;
2928 	off_t offset;
2929 	int fd = SCARG(uap, fd);
2930 
2931 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2932 		return (EBADF);
2933 	if ((fp->f_flag & FREAD) == 0)
2934 		return (EBADF);
2935 
2936 	vp = (struct vnode *)fp->f_data;
2937 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2938 	    (vp->v_flag & VISTTY)) {
2939 		return (ESPIPE);
2940 	}
2941 
2942 	offset = SCARG(uap, offset);
2943 	if (offset < 0 && vp->v_type != VCHR)
2944 		return (EINVAL);
2945 
2946 	FREF(fp);
2947 
2948 	/* dofilereadv() will FRELE the descriptor for us */
2949 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1,
2950 	    &offset, retval));
2951 }
2952 
2953 /*
2954  * Positional write system call.
2955  */
2956 int
2957 sys_pwrite(struct proc *p, void *v, register_t *retval)
2958 {
2959 	struct sys_pwrite_args /* {
2960 		syscallarg(int) fd;
2961 		syscallarg(const void *) buf;
2962 		syscallarg(size_t) nbyte;
2963 		syscallarg(int) pad;
2964 		syscallarg(off_t) offset;
2965 	} */ *uap = v;
2966 	struct iovec iov;
2967 	struct filedesc *fdp = p->p_fd;
2968 	struct file *fp;
2969 	struct vnode *vp;
2970 	off_t offset;
2971 	int fd = SCARG(uap, fd);
2972 
2973 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2974 		return (EBADF);
2975 	if ((fp->f_flag & FWRITE) == 0)
2976 		return (EBADF);
2977 
2978 	vp = (struct vnode *)fp->f_data;
2979 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2980 	    (vp->v_flag & VISTTY)) {
2981 		return (ESPIPE);
2982 	}
2983 
2984 	iov.iov_base = (void *)SCARG(uap, buf);
2985 	iov.iov_len = SCARG(uap, nbyte);
2986 
2987 	offset = SCARG(uap, offset);
2988 	if (offset < 0 && vp->v_type != VCHR)
2989 		return (EINVAL);
2990 
2991 	FREF(fp);
2992 
2993 	/* dofilewrite() will FRELE the descriptor for us */
2994 	return (dofilewritev(p, fd, fp, &iov, 1, 0, &offset, retval));
2995 }
2996 
2997 /*
2998  * Positional gather write system call.
2999  */
3000 int
3001 sys_pwritev(struct proc *p, void *v, register_t *retval)
3002 {
3003 	struct sys_pwritev_args /* {
3004 		syscallarg(int) fd;
3005 		syscallarg(const struct iovec *) iovp;
3006 		syscallarg(int) iovcnt;
3007 		syscallarg(int) pad;
3008 		syscallarg(off_t) offset;
3009 	} */ *uap = v;
3010 	struct filedesc *fdp = p->p_fd;
3011 	struct file *fp;
3012 	struct vnode *vp;
3013 	off_t offset;
3014 	int fd = SCARG(uap, fd);
3015 
3016 	if ((fp = fd_getfile(fdp, fd)) == NULL)
3017 		return (EBADF);
3018 	if ((fp->f_flag & FWRITE) == 0)
3019 		return (EBADF);
3020 
3021 	vp = (struct vnode *)fp->f_data;
3022 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
3023 	    (vp->v_flag & VISTTY)) {
3024 		return (ESPIPE);
3025 	}
3026 
3027 	offset = SCARG(uap, offset);
3028 	if (offset < 0 && vp->v_type != VCHR)
3029 		return (EINVAL);
3030 
3031 	FREF(fp);
3032 
3033 	/* dofilewritev() will FRELE the descriptor for us */
3034 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
3035 	    1, &offset, retval));
3036 }
3037