xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.208 2014/07/12 18:43:32 tedu Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <sys/vnode.h>
49 #include <sys/mount.h>
50 #include <sys/proc.h>
51 #include <sys/uio.h>
52 #include <sys/malloc.h>
53 #include <sys/pool.h>
54 #include <sys/dirent.h>
55 #include <sys/dkio.h>
56 #include <sys/disklabel.h>
57 #include <sys/ktrace.h>
58 
59 #include <sys/syscallargs.h>
60 
61 extern int suid_clear;
62 int	usermount = 0;		/* sysctl: by default, users may not mount */
63 
64 static int change_dir(struct nameidata *, struct proc *);
65 
66 void checkdirs(struct vnode *);
67 
68 int copyout_statfs(struct statfs *, void *, struct proc *);
69 
70 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
71 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
72 int domkfifoat(struct proc *, int, const char *, mode_t);
73 int dolinkat(struct proc *, int, const char *, int, const char *, int);
74 int dosymlinkat(struct proc *, const char *, int, const char *);
75 int dounlinkat(struct proc *, int, const char *, int);
76 int dofaccessat(struct proc *, int, const char *, int, int);
77 int dofstatat(struct proc *, int, const char *, struct stat *, int);
78 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
79     register_t *);
80 int dofchmodat(struct proc *, int, const char *, mode_t, int);
81 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
82 int dorenameat(struct proc *, int, const char *, int, const char *);
83 int domkdirat(struct proc *, int, const char *, mode_t);
84 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
85 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
86 int dofutimens(struct proc *, int, struct timespec [2]);
87 
88 /*
89  * Virtual File System System Calls
90  */
91 
92 /*
93  * Mount a file system.
94  */
95 /* ARGSUSED */
96 int
97 sys_mount(struct proc *p, void *v, register_t *retval)
98 {
99 	struct sys_mount_args /* {
100 		syscallarg(const char *) type;
101 		syscallarg(const char *) path;
102 		syscallarg(int) flags;
103 		syscallarg(void *) data;
104 	} */ *uap = v;
105 	struct vnode *vp;
106 	struct mount *mp;
107 	int error, mntflag = 0;
108 	char fstypename[MFSNAMELEN];
109 	char fspath[MNAMELEN];
110 	struct vattr va;
111 	struct nameidata nd;
112 	struct vfsconf *vfsp;
113 	int flags = SCARG(uap, flags);
114 
115 	if (usermount == 0 && (error = suser(p, 0)))
116 		return (error);
117 
118 	/*
119 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
120 	 */
121 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
122 	if (error)
123 		return(error);
124 
125 	/*
126 	 * Get vnode to be covered
127 	 */
128 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
129 	if ((error = namei(&nd)) != 0)
130 		return (error);
131 	vp = nd.ni_vp;
132 	if (flags & MNT_UPDATE) {
133 		if ((vp->v_flag & VROOT) == 0) {
134 			vput(vp);
135 			return (EINVAL);
136 		}
137 		mp = vp->v_mount;
138 		vfsp = mp->mnt_vfc;
139 		mntflag = mp->mnt_flag;
140 		/*
141 		 * We only allow the filesystem to be reloaded if it
142 		 * is currently mounted read-only.
143 		 */
144 		if ((flags & MNT_RELOAD) &&
145 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
146 			vput(vp);
147 			return (EOPNOTSUPP);	/* Needs translation */
148 		}
149 
150 		/*
151 		 * Only root, or the user that did the original mount is
152 		 * permitted to update it.
153 		 */
154 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
155 		    (error = suser(p, 0))) {
156 			vput(vp);
157 			return (error);
158 		}
159 		/*
160 		 * Do not allow NFS export by non-root users. Silently
161 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and
162 		 * inherit MNT_NOEXEC from the mount point.
163 		 */
164 		if (suser(p, 0) != 0) {
165 			if (flags & MNT_EXPORTED) {
166 				vput(vp);
167 				return (EPERM);
168 			}
169 			flags |= MNT_NOSUID | MNT_NODEV;
170 			if (mntflag & MNT_NOEXEC)
171 				flags |= MNT_NOEXEC;
172 		}
173 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
174 			vput(vp);
175 			return (error);
176 		}
177 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
178 		goto update;
179 	}
180 	/*
181 	 * If the user is not root, ensure that they own the directory
182 	 * onto which we are attempting to mount.
183 	 */
184 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
185 	    (va.va_uid != p->p_ucred->cr_uid &&
186 	    (error = suser(p, 0)))) {
187 		vput(vp);
188 		return (error);
189 	}
190 	/*
191 	 * Do not allow NFS export by non-root users. Silently
192 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and inherit
193 	 * MNT_NOEXEC from the mount point.
194 	 */
195 	if (suser(p, 0) != 0) {
196 		if (flags & MNT_EXPORTED) {
197 			vput(vp);
198 			return (EPERM);
199 		}
200 		flags |= MNT_NOSUID | MNT_NODEV;
201 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
202 			flags |= MNT_NOEXEC;
203 	}
204 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
205 		vput(vp);
206 		return (error);
207 	}
208 	if (vp->v_type != VDIR) {
209 		vput(vp);
210 		return (ENOTDIR);
211 	}
212 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
213 	if (error) {
214 		vput(vp);
215 		return (error);
216 	}
217 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
218 		if (!strcmp(vfsp->vfc_name, fstypename))
219 			break;
220 	}
221 
222 	if (vfsp == NULL) {
223 		vput(vp);
224 		return (EOPNOTSUPP);
225 	}
226 
227 	if (vp->v_mountedhere != NULL) {
228 		vput(vp);
229 		return (EBUSY);
230 	}
231 
232 	/*
233 	 * Allocate and initialize the file system.
234 	 */
235 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
236 		M_MOUNT, M_WAITOK|M_ZERO);
237 	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
238 	mp->mnt_op = vfsp->vfc_vfsops;
239 	mp->mnt_vfc = vfsp;
240 	mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
241 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
242 	mp->mnt_vnodecovered = vp;
243 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
244 update:
245 	/*
246 	 * Set the mount level flags.
247 	 */
248 	if (flags & MNT_RDONLY)
249 		mp->mnt_flag |= MNT_RDONLY;
250 	else if (mp->mnt_flag & MNT_RDONLY)
251 		mp->mnt_flag |= MNT_WANTRDWR;
252 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
253 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
254 	    MNT_FORCE);
255 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC |
256 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
257 	    MNT_NOATIME | MNT_FORCE);
258 	/*
259 	 * Mount the filesystem.
260 	 */
261 	error = VFS_MOUNT(mp, fspath, SCARG(uap, data), &nd, p);
262 	if (!error) {
263 		mp->mnt_stat.f_ctime = time_second;
264 	}
265 	if (mp->mnt_flag & MNT_UPDATE) {
266 		vput(vp);
267 		if (mp->mnt_flag & MNT_WANTRDWR)
268 			mp->mnt_flag &= ~MNT_RDONLY;
269 		mp->mnt_flag &=~
270 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
271 		if (error)
272 			mp->mnt_flag = mntflag;
273 
274  		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
275  			if (mp->mnt_syncer == NULL)
276  				error = vfs_allocate_syncvnode(mp);
277  		} else {
278  			if (mp->mnt_syncer != NULL)
279  				vgone(mp->mnt_syncer);
280  			mp->mnt_syncer = NULL;
281  		}
282 
283 		vfs_unbusy(mp);
284 		return (error);
285 	}
286 
287 	vp->v_mountedhere = mp;
288 
289 	/*
290 	 * Put the new filesystem on the mount list after root.
291 	 */
292 	cache_purge(vp);
293 	if (!error) {
294 		vfsp->vfc_refcount++;
295 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
296 		checkdirs(vp);
297 		VOP_UNLOCK(vp, 0, p);
298  		if ((mp->mnt_flag & MNT_RDONLY) == 0)
299  			error = vfs_allocate_syncvnode(mp);
300 		vfs_unbusy(mp);
301 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
302 		if ((error = VFS_START(mp, 0, p)) != 0)
303 			vrele(vp);
304 	} else {
305 		mp->mnt_vnodecovered->v_mountedhere = NULL;
306 		vfs_unbusy(mp);
307 		free(mp, M_MOUNT, 0);
308 		vput(vp);
309 	}
310 	return (error);
311 }
312 
313 /*
314  * Scan all active processes to see if any of them have a current
315  * or root directory onto which the new filesystem has just been
316  * mounted. If so, replace them with the new mount point, keeping
317  * track of how many were replaced.  That's the number of references
318  * the old vnode had that we've replaced, so finish by vrele()'ing
319  * it that many times.  This puts off any possible sleeping until
320  * we've finished walking the allproc list.
321  */
322 void
323 checkdirs(struct vnode *olddp)
324 {
325 	struct filedesc *fdp;
326 	struct vnode *newdp;
327 	struct proc *p;
328 	u_int  free_count = 0;
329 
330 	if (olddp->v_usecount == 1)
331 		return;
332 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
333 		panic("mount: lost mount");
334 	LIST_FOREACH(p, &allproc, p_list) {
335 		fdp = p->p_fd;
336 		if (fdp->fd_cdir == olddp) {
337 			free_count++;
338 			vref(newdp);
339 			fdp->fd_cdir = newdp;
340 		}
341 		if (fdp->fd_rdir == olddp) {
342 			free_count++;
343 			vref(newdp);
344 			fdp->fd_rdir = newdp;
345 		}
346 	}
347 	if (rootvnode == olddp) {
348 		free_count++;
349 		vref(newdp);
350 		rootvnode = newdp;
351 	}
352 	while (free_count-- > 0)
353 		vrele(olddp);
354 	vput(newdp);
355 }
356 
357 /*
358  * Unmount a file system.
359  *
360  * Note: unmount takes a path to the vnode mounted on as argument,
361  * not special file (as before).
362  */
363 /* ARGSUSED */
364 int
365 sys_unmount(struct proc *p, void *v, register_t *retval)
366 {
367 	struct sys_unmount_args /* {
368 		syscallarg(const char *) path;
369 		syscallarg(int) flags;
370 	} */ *uap = v;
371 	struct vnode *vp;
372 	struct mount *mp;
373 	int error;
374 	struct nameidata nd;
375 
376 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
377 	    SCARG(uap, path), p);
378 	if ((error = namei(&nd)) != 0)
379 		return (error);
380 	vp = nd.ni_vp;
381 	mp = vp->v_mount;
382 
383 	/*
384 	 * Only root, or the user that did the original mount is
385 	 * permitted to unmount this filesystem.
386 	 */
387 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
388 	    (error = suser(p, 0))) {
389 		vput(vp);
390 		return (error);
391 	}
392 
393 	/*
394 	 * Don't allow unmounting the root file system.
395 	 */
396 	if (mp->mnt_flag & MNT_ROOTFS) {
397 		vput(vp);
398 		return (EINVAL);
399 	}
400 
401 	/*
402 	 * Must be the root of the filesystem
403 	 */
404 	if ((vp->v_flag & VROOT) == 0) {
405 		vput(vp);
406 		return (EINVAL);
407 	}
408 	vput(vp);
409 
410 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
411 		return (EBUSY);
412 
413 	return (dounmount(mp, SCARG(uap, flags), p, vp));
414 }
415 
416 /*
417  * Do the actual file system unmount.
418  */
419 int
420 dounmount(struct mount *mp, int flags, struct proc *p, struct vnode *olddp)
421 {
422 	struct vnode *coveredvp;
423 	int error;
424 	int hadsyncer = 0;
425 
426  	mp->mnt_flag &=~ MNT_ASYNC;
427  	cache_purgevfs(mp);	/* remove cache entries for this file sys */
428  	if (mp->mnt_syncer != NULL) {
429 		hadsyncer = 1;
430  		vgone(mp->mnt_syncer);
431 		mp->mnt_syncer = NULL;
432 	}
433 	if (((mp->mnt_flag & MNT_RDONLY) ||
434 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
435  	    (flags & MNT_FORCE))
436  		error = VFS_UNMOUNT(mp, flags, p);
437 
438  	if (error && error != EIO && !(flags & MNT_DOOMED)) {
439  		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
440  			(void) vfs_allocate_syncvnode(mp);
441 		vfs_unbusy(mp);
442 		return (error);
443 	}
444 
445 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
446 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
447 		coveredvp->v_mountedhere = NULL;
448  		vrele(coveredvp);
449  	}
450 
451 	mp->mnt_vfc->vfc_refcount--;
452 
453 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
454 		panic("unmount: dangling vnode");
455 
456 	vfs_unbusy(mp);
457 	free(mp, M_MOUNT, 0);
458 
459 	return (0);
460 }
461 
462 /*
463  * Sync each mounted filesystem.
464  */
465 #ifdef DEBUG
466 int syncprt = 0;
467 struct ctldebug debug0 = { "syncprt", &syncprt };
468 #endif
469 
470 /* ARGSUSED */
471 int
472 sys_sync(struct proc *p, void *v, register_t *retval)
473 {
474 	struct mount *mp, *nmp;
475 	int asyncflag;
476 
477 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
478 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
479 			continue;
480 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
481 			asyncflag = mp->mnt_flag & MNT_ASYNC;
482 			mp->mnt_flag &= ~MNT_ASYNC;
483 			uvm_vnp_sync(mp);
484 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
485 			if (asyncflag)
486 				mp->mnt_flag |= MNT_ASYNC;
487 		}
488 		vfs_unbusy(mp);
489 	}
490 
491 	return (0);
492 }
493 
494 /*
495  * Change filesystem quotas.
496  */
497 /* ARGSUSED */
498 int
499 sys_quotactl(struct proc *p, void *v, register_t *retval)
500 {
501 	struct sys_quotactl_args /* {
502 		syscallarg(const char *) path;
503 		syscallarg(int) cmd;
504 		syscallarg(int) uid;
505 		syscallarg(char *) arg;
506 	} */ *uap = v;
507 	struct mount *mp;
508 	int error;
509 	struct nameidata nd;
510 
511 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
512 	if ((error = namei(&nd)) != 0)
513 		return (error);
514 	mp = nd.ni_vp->v_mount;
515 	vrele(nd.ni_vp);
516 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
517 	    SCARG(uap, arg), p));
518 }
519 
520 int
521 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
522 {
523 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
524 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
525 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
526 	char *s, *d;
527 	int error;
528 
529 	/* Don't let non-root see filesystem id (for NFS security) */
530 	if (suser(p, 0)) {
531 		fsid_t fsid;
532 
533 		s = (char *)sp;
534 		d = (char *)uaddr;
535 
536 		memset(&fsid, 0, sizeof(fsid));
537 
538 		if ((error = copyout(s, d, co_sz1)) != 0)
539 			return (error);
540 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
541 			return (error);
542 		return (copyout(s + co_off2, d + co_off2, co_sz2));
543 	}
544 
545 	return (copyout(sp, uaddr, sizeof(*sp)));
546 }
547 
548 /*
549  * Get filesystem statistics.
550  */
551 /* ARGSUSED */
552 int
553 sys_statfs(struct proc *p, void *v, register_t *retval)
554 {
555 	struct sys_statfs_args /* {
556 		syscallarg(const char *) path;
557 		syscallarg(struct statfs *) buf;
558 	} */ *uap = v;
559 	struct mount *mp;
560 	struct statfs *sp;
561 	int error;
562 	struct nameidata nd;
563 
564 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
565 	if ((error = namei(&nd)) != 0)
566 		return (error);
567 	mp = nd.ni_vp->v_mount;
568 	sp = &mp->mnt_stat;
569 	vrele(nd.ni_vp);
570 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
571 		return (error);
572 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
573 
574 	return (copyout_statfs(sp, SCARG(uap, buf), p));
575 }
576 
577 /*
578  * Get filesystem statistics.
579  */
580 /* ARGSUSED */
581 int
582 sys_fstatfs(struct proc *p, void *v, register_t *retval)
583 {
584 	struct sys_fstatfs_args /* {
585 		syscallarg(int) fd;
586 		syscallarg(struct statfs *) buf;
587 	} */ *uap = v;
588 	struct file *fp;
589 	struct mount *mp;
590 	struct statfs *sp;
591 	int error;
592 
593 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
594 		return (error);
595 	mp = ((struct vnode *)fp->f_data)->v_mount;
596 	if (!mp) {
597 		FRELE(fp, p);
598 		return (ENOENT);
599 	}
600 	sp = &mp->mnt_stat;
601 	error = VFS_STATFS(mp, sp, p);
602 	FRELE(fp, p);
603 	if (error)
604 		return (error);
605 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
606 
607 	return (copyout_statfs(sp, SCARG(uap, buf), p));
608 }
609 
610 /*
611  * Get statistics on all filesystems.
612  */
613 int
614 sys_getfsstat(struct proc *p, void *v, register_t *retval)
615 {
616 	struct sys_getfsstat_args /* {
617 		syscallarg(struct statfs *) buf;
618 		syscallarg(size_t) bufsize;
619 		syscallarg(int) flags;
620 	} */ *uap = v;
621 	struct mount *mp, *nmp;
622 	struct statfs *sp;
623 	struct statfs *sfsp;
624 	size_t count, maxcount;
625 	int error, flags = SCARG(uap, flags);
626 
627 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
628 	sfsp = SCARG(uap, buf);
629 	count = 0;
630 
631 	TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, nmp) {
632 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
633 			continue;
634 		if (sfsp && count < maxcount) {
635 			sp = &mp->mnt_stat;
636 
637 			/* Refresh stats unless MNT_NOWAIT is specified */
638 			if (flags != MNT_NOWAIT &&
639 			    flags != MNT_LAZY &&
640 			    (flags == MNT_WAIT ||
641 			    flags == 0) &&
642 			    (error = VFS_STATFS(mp, sp, p))) {
643 				vfs_unbusy(mp);
644  				continue;
645 			}
646 
647 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
648 #if notyet
649 			if (mp->mnt_flag & MNT_SOFTDEP)
650 				sp->f_eflags = STATFS_SOFTUPD;
651 #endif
652 			error = (copyout_statfs(sp, sfsp, p));
653 			if (error) {
654 				vfs_unbusy(mp);
655 				return (error);
656 			}
657 			sfsp++;
658 		}
659 		count++;
660 		vfs_unbusy(mp);
661 	}
662 
663 	if (sfsp && count > maxcount)
664 		*retval = maxcount;
665 	else
666 		*retval = count;
667 
668 	return (0);
669 }
670 
671 /*
672  * Change current working directory to a given file descriptor.
673  */
674 /* ARGSUSED */
675 int
676 sys_fchdir(struct proc *p, void *v, register_t *retval)
677 {
678 	struct sys_fchdir_args /* {
679 		syscallarg(int) fd;
680 	} */ *uap = v;
681 	struct filedesc *fdp = p->p_fd;
682 	struct vnode *vp, *tdp, *old_cdir;
683 	struct mount *mp;
684 	struct file *fp;
685 	int error;
686 
687 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
688 		return (EBADF);
689 	vp = (struct vnode *)fp->f_data;
690 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR)
691 		return (ENOTDIR);
692 	vref(vp);
693 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
694 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
695 
696 	while (!error && (mp = vp->v_mountedhere) != NULL) {
697 		if (vfs_busy(mp, VB_READ|VB_WAIT))
698 			continue;
699 		error = VFS_ROOT(mp, &tdp);
700 		vfs_unbusy(mp);
701 		if (error)
702 			break;
703 		vput(vp);
704 		vp = tdp;
705 	}
706 	if (error) {
707 		vput(vp);
708 		return (error);
709 	}
710 	VOP_UNLOCK(vp, 0, p);
711 	old_cdir = fdp->fd_cdir;
712 	fdp->fd_cdir = vp;
713 	vrele(old_cdir);
714 	return (0);
715 }
716 
717 /*
718  * Change current working directory (``.'').
719  */
720 /* ARGSUSED */
721 int
722 sys_chdir(struct proc *p, void *v, register_t *retval)
723 {
724 	struct sys_chdir_args /* {
725 		syscallarg(const char *) path;
726 	} */ *uap = v;
727 	struct filedesc *fdp = p->p_fd;
728 	struct vnode *old_cdir;
729 	int error;
730 	struct nameidata nd;
731 
732 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
733 	    SCARG(uap, path), p);
734 	if ((error = change_dir(&nd, p)) != 0)
735 		return (error);
736 	old_cdir = fdp->fd_cdir;
737 	fdp->fd_cdir = nd.ni_vp;
738 	vrele(old_cdir);
739 	return (0);
740 }
741 
742 /*
743  * Change notion of root (``/'') directory.
744  */
745 /* ARGSUSED */
746 int
747 sys_chroot(struct proc *p, void *v, register_t *retval)
748 {
749 	struct sys_chroot_args /* {
750 		syscallarg(const char *) path;
751 	} */ *uap = v;
752 	struct filedesc *fdp = p->p_fd;
753 	struct vnode *old_cdir, *old_rdir;
754 	int error;
755 	struct nameidata nd;
756 
757 	if ((error = suser(p, 0)) != 0)
758 		return (error);
759 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
760 	    SCARG(uap, path), p);
761 	if ((error = change_dir(&nd, p)) != 0)
762 		return (error);
763 	if (fdp->fd_rdir != NULL) {
764 		/*
765 		 * A chroot() done inside a changed root environment does
766 		 * an automatic chdir to avoid the out-of-tree experience.
767 		 */
768 		vref(nd.ni_vp);
769 		old_rdir = fdp->fd_rdir;
770 		old_cdir = fdp->fd_cdir;
771 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
772 		vrele(old_rdir);
773 		vrele(old_cdir);
774 	} else
775 		fdp->fd_rdir = nd.ni_vp;
776 	return (0);
777 }
778 
779 /*
780  * Common routine for chroot and chdir.
781  */
782 static int
783 change_dir(struct nameidata *ndp, struct proc *p)
784 {
785 	struct vnode *vp;
786 	int error;
787 
788 	if ((error = namei(ndp)) != 0)
789 		return (error);
790 	vp = ndp->ni_vp;
791 	if (vp->v_type != VDIR)
792 		error = ENOTDIR;
793 	else
794 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
795 	if (error)
796 		vput(vp);
797 	else
798 		VOP_UNLOCK(vp, 0, p);
799 	return (error);
800 }
801 
802 /*
803  * Check permissions, allocate an open file structure,
804  * and call the device open routine if any.
805  */
806 int
807 sys_open(struct proc *p, void *v, register_t *retval)
808 {
809 	struct sys_open_args /* {
810 		syscallarg(const char *) path;
811 		syscallarg(int) flags;
812 		syscallarg(mode_t) mode;
813 	} */ *uap = v;
814 
815 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
816 	    SCARG(uap, mode), retval));
817 }
818 
819 int
820 sys_openat(struct proc *p, void *v, register_t *retval)
821 {
822 	struct sys_openat_args /* {
823 		syscallarg(int) fd;
824 		syscallarg(const char *) path;
825 		syscallarg(int) flags;
826 		syscallarg(mode_t) mode;
827 	} */ *uap = v;
828 
829 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
830 	    SCARG(uap, flags), SCARG(uap, mode), retval));
831 }
832 
833 int
834 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
835     register_t *retval)
836 {
837 	struct filedesc *fdp = p->p_fd;
838 	struct file *fp;
839 	struct vnode *vp;
840 	struct vattr vattr;
841 	int flags, cmode;
842 	int type, indx, error, localtrunc = 0;
843 	struct flock lf;
844 	struct nameidata nd;
845 
846 	fdplock(fdp);
847 
848 	if ((error = falloc(p, &fp, &indx)) != 0)
849 		goto out;
850 	flags = FFLAGS(oflags);
851 	if (flags & O_CLOEXEC)
852 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
853 
854 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
855 	NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
856 	p->p_dupfd = -1;			/* XXX check for fdopen */
857 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
858 		localtrunc = 1;
859 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
860 	}
861 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
862 		if (error == ENODEV &&
863 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
864 		    (error =
865 			dupfdopen(fdp, indx, p->p_dupfd, flags)) == 0) {
866 			closef(fp, p);
867 			*retval = indx;
868 			goto out;
869 		}
870 		if (error == ERESTART)
871 			error = EINTR;
872 		fdremove(fdp, indx);
873 		closef(fp, p);
874 		goto out;
875 	}
876 	p->p_dupfd = 0;
877 	vp = nd.ni_vp;
878 	fp->f_flag = flags & FMASK;
879 	fp->f_type = DTYPE_VNODE;
880 	fp->f_ops = &vnops;
881 	fp->f_data = vp;
882 	if (flags & (O_EXLOCK | O_SHLOCK)) {
883 		lf.l_whence = SEEK_SET;
884 		lf.l_start = 0;
885 		lf.l_len = 0;
886 		if (flags & O_EXLOCK)
887 			lf.l_type = F_WRLCK;
888 		else
889 			lf.l_type = F_RDLCK;
890 		type = F_FLOCK;
891 		if ((flags & FNONBLOCK) == 0)
892 			type |= F_WAIT;
893 		VOP_UNLOCK(vp, 0, p);
894 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
895 		if (error) {
896 			/* closef will vn_close the file for us. */
897 			fdremove(fdp, indx);
898 			closef(fp, p);
899 			goto out;
900 		}
901 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
902 		fp->f_iflags |= FIF_HASLOCK;
903 	}
904 	if (localtrunc) {
905 		if ((fp->f_flag & FWRITE) == 0)
906 			error = EACCES;
907 		else if (vp->v_mount->mnt_flag & MNT_RDONLY)
908 			error = EROFS;
909 		else if (vp->v_type == VDIR)
910 			error = EISDIR;
911 		else if ((error = vn_writechk(vp)) == 0) {
912 			VATTR_NULL(&vattr);
913 			vattr.va_size = 0;
914 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
915 		}
916 		if (error) {
917 			VOP_UNLOCK(vp, 0, p);
918 			/* closef will close the file for us. */
919 			fdremove(fdp, indx);
920 			closef(fp, p);
921 			goto out;
922 		}
923 	}
924 	VOP_UNLOCK(vp, 0, p);
925 	*retval = indx;
926 	FILE_SET_MATURE(fp, p);
927 out:
928 	fdpunlock(fdp);
929 	return (error);
930 }
931 
932 /*
933  * Get file handle system call
934  */
935 int
936 sys_getfh(struct proc *p, void *v, register_t *retval)
937 {
938 	struct sys_getfh_args /* {
939 		syscallarg(const char *) fname;
940 		syscallarg(fhandle_t *) fhp;
941 	} */ *uap = v;
942 	struct vnode *vp;
943 	fhandle_t fh;
944 	int error;
945 	struct nameidata nd;
946 
947 	/*
948 	 * Must be super user
949 	 */
950 	error = suser(p, 0);
951 	if (error)
952 		return (error);
953 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
954 	    SCARG(uap, fname), p);
955 	error = namei(&nd);
956 	if (error)
957 		return (error);
958 	vp = nd.ni_vp;
959 	memset(&fh, 0, sizeof(fh));
960 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
961 	error = VFS_VPTOFH(vp, &fh.fh_fid);
962 	vput(vp);
963 	if (error)
964 		return (error);
965 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
966 	return (error);
967 }
968 
969 /*
970  * Open a file given a file handle.
971  *
972  * Check permissions, allocate an open file structure,
973  * and call the device open routine if any.
974  */
975 int
976 sys_fhopen(struct proc *p, void *v, register_t *retval)
977 {
978 	struct sys_fhopen_args /* {
979 		syscallarg(const fhandle_t *) fhp;
980 		syscallarg(int) flags;
981 	} */ *uap = v;
982 	struct filedesc *fdp = p->p_fd;
983 	struct file *fp;
984 	struct vnode *vp = NULL;
985 	struct mount *mp;
986 	struct ucred *cred = p->p_ucred;
987 	int flags;
988 	int type, indx, error=0;
989 	struct flock lf;
990 	struct vattr va;
991 	fhandle_t fh;
992 
993 	/*
994 	 * Must be super user
995 	 */
996 	if ((error = suser(p, 0)))
997 		return (error);
998 
999 	flags = FFLAGS(SCARG(uap, flags));
1000 	if ((flags & (FREAD | FWRITE)) == 0)
1001 		return (EINVAL);
1002 	if ((flags & O_CREAT))
1003 		return (EINVAL);
1004 
1005 	fdplock(fdp);
1006 	if ((error = falloc(p, &fp, &indx)) != 0) {
1007 		fp = NULL;
1008 		goto bad;
1009 	}
1010 	if (flags & O_CLOEXEC)
1011 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
1012 
1013 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1014 		goto bad;
1015 
1016 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1017 		error = ESTALE;
1018 		goto bad;
1019 	}
1020 
1021 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1022 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1023 		goto bad;
1024 	}
1025 
1026 	/* Now do an effective vn_open */
1027 
1028 	if (vp->v_type == VSOCK) {
1029 		error = EOPNOTSUPP;
1030 		goto bad;
1031 	}
1032 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1033 		error = ENOTDIR;
1034 		goto bad;
1035 	}
1036 	if (flags & FREAD) {
1037 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1038 			goto bad;
1039 	}
1040 	if (flags & (FWRITE | O_TRUNC)) {
1041 		if (vp->v_type == VDIR) {
1042 			error = EISDIR;
1043 			goto bad;
1044 		}
1045 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1046 		    (error = vn_writechk(vp)) != 0)
1047 			goto bad;
1048 	}
1049 	if (flags & O_TRUNC) {
1050 		VATTR_NULL(&va);
1051 		va.va_size = 0;
1052 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1053 			goto bad;
1054 	}
1055 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1056 		goto bad;
1057 	if (flags & FWRITE)
1058 		vp->v_writecount++;
1059 
1060 	/* done with modified vn_open, now finish what sys_open does. */
1061 
1062 	fp->f_flag = flags & FMASK;
1063 	fp->f_type = DTYPE_VNODE;
1064 	fp->f_ops = &vnops;
1065 	fp->f_data = vp;
1066 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1067 		lf.l_whence = SEEK_SET;
1068 		lf.l_start = 0;
1069 		lf.l_len = 0;
1070 		if (flags & O_EXLOCK)
1071 			lf.l_type = F_WRLCK;
1072 		else
1073 			lf.l_type = F_RDLCK;
1074 		type = F_FLOCK;
1075 		if ((flags & FNONBLOCK) == 0)
1076 			type |= F_WAIT;
1077 		VOP_UNLOCK(vp, 0, p);
1078 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1079 		if (error) {
1080 			vp = NULL;	/* closef will vn_close the file */
1081 			goto bad;
1082 		}
1083 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1084 		fp->f_iflags |= FIF_HASLOCK;
1085 	}
1086 	VOP_UNLOCK(vp, 0, p);
1087 	*retval = indx;
1088 	FILE_SET_MATURE(fp, p);
1089 
1090 	fdpunlock(fdp);
1091 	return (0);
1092 
1093 bad:
1094 	if (fp) {
1095 		fdremove(fdp, indx);
1096 		closef(fp, p);
1097 		if (vp != NULL)
1098 			vput(vp);
1099 	}
1100 	fdpunlock(fdp);
1101 	return (error);
1102 }
1103 
1104 /* ARGSUSED */
1105 int
1106 sys_fhstat(struct proc *p, void *v, register_t *retval)
1107 {
1108 	struct sys_fhstat_args /* {
1109 		syscallarg(const fhandle_t *) fhp;
1110 		syscallarg(struct stat *) sb;
1111 	} */ *uap = v;
1112 	struct stat sb;
1113 	int error;
1114 	fhandle_t fh;
1115 	struct mount *mp;
1116 	struct vnode *vp;
1117 
1118 	/*
1119 	 * Must be super user
1120 	 */
1121 	if ((error = suser(p, 0)))
1122 		return (error);
1123 
1124 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1125 		return (error);
1126 
1127 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1128 		return (ESTALE);
1129 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1130 		return (error);
1131 	error = vn_stat(vp, &sb, p);
1132 	vput(vp);
1133 	if (error)
1134 		return (error);
1135 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1136 	return (error);
1137 }
1138 
1139 /* ARGSUSED */
1140 int
1141 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1142 {
1143 	struct sys_fhstatfs_args /* {
1144 		syscallarg(const fhandle_t *) fhp;
1145 		syscallarg(struct statfs *) buf;
1146 	} */ *uap = v;
1147 	struct statfs *sp;
1148 	fhandle_t fh;
1149 	struct mount *mp;
1150 	struct vnode *vp;
1151 	int error;
1152 
1153 	/*
1154 	 * Must be super user
1155 	 */
1156 	if ((error = suser(p, 0)))
1157 		return (error);
1158 
1159 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1160 		return (error);
1161 
1162 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1163 		return (ESTALE);
1164 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1165 		return (error);
1166 	mp = vp->v_mount;
1167 	sp = &mp->mnt_stat;
1168 	vput(vp);
1169 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1170 		return (error);
1171 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1172 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1173 }
1174 
1175 /*
1176  * Create a special file.
1177  */
1178 /* ARGSUSED */
1179 int
1180 sys_mknod(struct proc *p, void *v, register_t *retval)
1181 {
1182 	struct sys_mknod_args /* {
1183 		syscallarg(const char *) path;
1184 		syscallarg(mode_t) mode;
1185 		syscallarg(int) dev;
1186 	} */ *uap = v;
1187 
1188 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1189 	    SCARG(uap, dev)));
1190 }
1191 
1192 int
1193 sys_mknodat(struct proc *p, void *v, register_t *retval)
1194 {
1195 	struct sys_mknodat_args /* {
1196 		syscallarg(int) fd;
1197 		syscallarg(const char *) path;
1198 		syscallarg(mode_t) mode;
1199 		syscallarg(dev_t) dev;
1200 	} */ *uap = v;
1201 
1202 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1203 	    SCARG(uap, mode), SCARG(uap, dev)));
1204 }
1205 
1206 int
1207 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1208 {
1209 	struct vnode *vp;
1210 	struct vattr vattr;
1211 	int error;
1212 	struct nameidata nd;
1213 
1214 	if ((error = suser(p, 0)) != 0)
1215 		return (error);
1216 	if (p->p_fd->fd_rdir)
1217 		return (EINVAL);
1218 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1219 	if ((error = namei(&nd)) != 0)
1220 		return (error);
1221 	vp = nd.ni_vp;
1222 	if (vp != NULL)
1223 		error = EEXIST;
1224 	else {
1225 		VATTR_NULL(&vattr);
1226 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1227 		vattr.va_rdev = dev;
1228 
1229 		switch (mode & S_IFMT) {
1230 		case S_IFMT:	/* used by badsect to flag bad sectors */
1231 			vattr.va_type = VBAD;
1232 			break;
1233 		case S_IFCHR:
1234 			vattr.va_type = VCHR;
1235 			break;
1236 		case S_IFBLK:
1237 			vattr.va_type = VBLK;
1238 			break;
1239 		default:
1240 			error = EINVAL;
1241 			break;
1242 		}
1243 	}
1244 	if (!error) {
1245 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1246 	} else {
1247 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1248 		if (nd.ni_dvp == vp)
1249 			vrele(nd.ni_dvp);
1250 		else
1251 			vput(nd.ni_dvp);
1252 		if (vp)
1253 			vrele(vp);
1254 	}
1255 	return (error);
1256 }
1257 
1258 /*
1259  * Create a named pipe.
1260  */
1261 /* ARGSUSED */
1262 int
1263 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1264 {
1265 	struct sys_mkfifo_args /* {
1266 		syscallarg(const char *) path;
1267 		syscallarg(mode_t) mode;
1268 	} */ *uap = v;
1269 
1270 	return (domkfifoat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
1271 }
1272 
1273 int
1274 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1275 {
1276 	struct sys_mkfifoat_args /* {
1277 		syscallarg(int) fd;
1278 		syscallarg(const char *) path;
1279 		syscallarg(mode_t) mode;
1280 	} */ *uap = v;
1281 
1282 	return (domkfifoat(p, SCARG(uap, fd), SCARG(uap, path),
1283 	    SCARG(uap, mode)));
1284 }
1285 
1286 int
1287 domkfifoat(struct proc *p, int fd, const char *path, mode_t mode)
1288 {
1289 #ifndef FIFO
1290 	return (EOPNOTSUPP);
1291 #else
1292 	struct vattr vattr;
1293 	int error;
1294 	struct nameidata nd;
1295 
1296 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1297 	if ((error = namei(&nd)) != 0)
1298 		return (error);
1299 	if (nd.ni_vp != NULL) {
1300 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1301 		if (nd.ni_dvp == nd.ni_vp)
1302 			vrele(nd.ni_dvp);
1303 		else
1304 			vput(nd.ni_dvp);
1305 		vrele(nd.ni_vp);
1306 		return (EEXIST);
1307 	}
1308 	VATTR_NULL(&vattr);
1309 	vattr.va_type = VFIFO;
1310 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1311 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
1312 #endif /* FIFO */
1313 }
1314 
1315 /*
1316  * Make a hard file link.
1317  */
1318 /* ARGSUSED */
1319 int
1320 sys_link(struct proc *p, void *v, register_t *retval)
1321 {
1322 	struct sys_link_args /* {
1323 		syscallarg(const char *) path;
1324 		syscallarg(const char *) link;
1325 	} */ *uap = v;
1326 
1327 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1328 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1329 }
1330 
1331 int
1332 sys_linkat(struct proc *p, void *v, register_t *retval)
1333 {
1334 	struct sys_linkat_args /* {
1335 		syscallarg(int) fd1;
1336 		syscallarg(const char *) path1;
1337 		syscallarg(int) fd2;
1338 		syscallarg(const char *) path2;
1339 		syscallarg(int) flag;
1340 	} */ *uap = v;
1341 
1342 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1343 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1344 }
1345 
1346 int
1347 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1348     const char *path2, int flag)
1349 {
1350 	struct vnode *vp;
1351 	struct nameidata nd;
1352 	int error, follow;
1353 	int flags;
1354 
1355 	if (flag & ~AT_SYMLINK_FOLLOW)
1356 		return (EINVAL);
1357 
1358 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1359 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1360 	if ((error = namei(&nd)) != 0)
1361 		return (error);
1362 	vp = nd.ni_vp;
1363 
1364 	flags = LOCKPARENT;
1365 	if (vp->v_type == VDIR) {
1366 		flags |= STRIPSLASHES;
1367 	}
1368 
1369 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1370 	if ((error = namei(&nd)) != 0)
1371 		goto out;
1372 	if (nd.ni_vp) {
1373 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1374 		if (nd.ni_dvp == nd.ni_vp)
1375 			vrele(nd.ni_dvp);
1376 		else
1377 			vput(nd.ni_dvp);
1378 		vrele(nd.ni_vp);
1379 		error = EEXIST;
1380 		goto out;
1381 	}
1382 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1383 out:
1384 	vrele(vp);
1385 	return (error);
1386 }
1387 
1388 /*
1389  * Make a symbolic link.
1390  */
1391 /* ARGSUSED */
1392 int
1393 sys_symlink(struct proc *p, void *v, register_t *retval)
1394 {
1395 	struct sys_symlink_args /* {
1396 		syscallarg(const char *) path;
1397 		syscallarg(const char *) link;
1398 	} */ *uap = v;
1399 
1400 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1401 }
1402 
1403 int
1404 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1405 {
1406 	struct sys_symlinkat_args /* {
1407 		syscallarg(const char *) path;
1408 		syscallarg(int) fd;
1409 		syscallarg(const char *) link;
1410 	} */ *uap = v;
1411 
1412 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1413 	    SCARG(uap, link)));
1414 }
1415 
1416 int
1417 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1418 {
1419 	struct vattr vattr;
1420 	char *path;
1421 	int error;
1422 	struct nameidata nd;
1423 
1424 	path = pool_get(&namei_pool, PR_WAITOK);
1425 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1426 	if (error)
1427 		goto out;
1428 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1429 	if ((error = namei(&nd)) != 0)
1430 		goto out;
1431 	if (nd.ni_vp) {
1432 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1433 		if (nd.ni_dvp == nd.ni_vp)
1434 			vrele(nd.ni_dvp);
1435 		else
1436 			vput(nd.ni_dvp);
1437 		vrele(nd.ni_vp);
1438 		error = EEXIST;
1439 		goto out;
1440 	}
1441 	VATTR_NULL(&vattr);
1442 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1443 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1444 out:
1445 	pool_put(&namei_pool, path);
1446 	return (error);
1447 }
1448 
1449 /*
1450  * Delete a name from the filesystem.
1451  */
1452 /* ARGSUSED */
1453 int
1454 sys_unlink(struct proc *p, void *v, register_t *retval)
1455 {
1456 	struct sys_unlink_args /* {
1457 		syscallarg(const char *) path;
1458 	} */ *uap = v;
1459 
1460 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1461 }
1462 
1463 int
1464 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1465 {
1466 	struct sys_unlinkat_args /* {
1467 		syscallarg(int) fd;
1468 		syscallarg(const char *) path;
1469 		syscallarg(int) flag;
1470 	} */ *uap = v;
1471 
1472 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1473 	    SCARG(uap, flag)));
1474 }
1475 
1476 int
1477 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1478 {
1479 	struct vnode *vp;
1480 	int error;
1481 	struct nameidata nd;
1482 
1483 	if (flag & ~AT_REMOVEDIR)
1484 		return (EINVAL);
1485 
1486 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1487 	    fd, path, p);
1488 	if ((error = namei(&nd)) != 0)
1489 		return (error);
1490 	vp = nd.ni_vp;
1491 
1492 	if (flag & AT_REMOVEDIR) {
1493 		if (vp->v_type != VDIR) {
1494 			error = ENOTDIR;
1495 			goto out;
1496 		}
1497 		/*
1498 		 * No rmdir "." please.
1499 		 */
1500 		if (nd.ni_dvp == vp) {
1501 			error = EBUSY;
1502 			goto out;
1503 		}
1504 	}
1505 
1506 	/*
1507 	 * The root of a mounted filesystem cannot be deleted.
1508 	 */
1509 	if (vp->v_flag & VROOT)
1510 		error = EBUSY;
1511 out:
1512 	if (!error) {
1513 		if (flag & AT_REMOVEDIR) {
1514 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1515 		} else {
1516 			(void)uvm_vnp_uncache(vp);
1517 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1518 		}
1519 	} else {
1520 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1521 		if (nd.ni_dvp == vp)
1522 			vrele(nd.ni_dvp);
1523 		else
1524 			vput(nd.ni_dvp);
1525 		vput(vp);
1526 	}
1527 	return (error);
1528 }
1529 
1530 /*
1531  * Reposition read/write file offset.
1532  */
1533 int
1534 sys_lseek(struct proc *p, void *v, register_t *retval)
1535 {
1536 	struct sys_lseek_args /* {
1537 		syscallarg(int) fd;
1538 		syscallarg(int) pad;
1539 		syscallarg(off_t) offset;
1540 		syscallarg(int) whence;
1541 	} */ *uap = v;
1542 	struct ucred *cred = p->p_ucred;
1543 	struct filedesc *fdp = p->p_fd;
1544 	struct file *fp;
1545 	struct vattr vattr;
1546 	struct vnode *vp;
1547 	off_t offarg, newoff;
1548 	int error, special;
1549 
1550 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1551 		return (EBADF);
1552 	if (fp->f_type != DTYPE_VNODE)
1553 		return (ESPIPE);
1554 	vp = (struct vnode *)fp->f_data;
1555 	if (vp->v_type == VFIFO)
1556 		return (ESPIPE);
1557 	FREF(fp);
1558 	if (vp->v_type == VCHR)
1559 		special = 1;
1560 	else
1561 		special = 0;
1562 	offarg = SCARG(uap, offset);
1563 
1564 	switch (SCARG(uap, whence)) {
1565 	case SEEK_CUR:
1566 		newoff = fp->f_offset + offarg;
1567 		break;
1568 	case SEEK_END:
1569 		error = VOP_GETATTR(vp, &vattr, cred, p);
1570 		if (error)
1571 			goto bad;
1572 		newoff = offarg + (off_t)vattr.va_size;
1573 		break;
1574 	case SEEK_SET:
1575 		newoff = offarg;
1576 		break;
1577 	default:
1578 		error = EINVAL;
1579 		goto bad;
1580 	}
1581 	if (!special) {
1582 		if (newoff < 0) {
1583 			error = EINVAL;
1584 			goto bad;
1585 		}
1586 	}
1587 	*(off_t *)retval = fp->f_offset = newoff;
1588 	fp->f_seek++;
1589 	error = 0;
1590  bad:
1591 	FRELE(fp, p);
1592 	return (error);
1593 }
1594 
1595 /*
1596  * Check access permissions.
1597  */
1598 int
1599 sys_access(struct proc *p, void *v, register_t *retval)
1600 {
1601 	struct sys_access_args /* {
1602 		syscallarg(const char *) path;
1603 		syscallarg(int) flags;
1604 	} */ *uap = v;
1605 
1606 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1607 	    SCARG(uap, flags), 0));
1608 }
1609 
1610 int
1611 sys_faccessat(struct proc *p, void *v, register_t *retval)
1612 {
1613 	struct sys_faccessat_args /* {
1614 		syscallarg(int) fd;
1615 		syscallarg(const char *) path;
1616 		syscallarg(int) amode;
1617 		syscallarg(int) flag;
1618 	} */ *uap = v;
1619 
1620 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1621 	    SCARG(uap, amode), SCARG(uap, flag)));
1622 }
1623 
1624 int
1625 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1626 {
1627 	struct vnode *vp;
1628 	struct ucred *newcred, *oldcred;
1629 	struct nameidata nd;
1630 	int error;
1631 
1632 	if (amode & ~(R_OK | W_OK | X_OK))
1633 		return (EINVAL);
1634 	if (flag & ~AT_EACCESS)
1635 		return (EINVAL);
1636 
1637 	newcred = NULL;
1638 	oldcred = p->p_ucred;
1639 
1640 	/*
1641 	 * If access as real ids was requested and they really differ,
1642 	 * give the thread new creds with them reset
1643 	 */
1644 	if ((flag & AT_EACCESS) == 0 &&
1645 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1646 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1647 		p->p_ucred = newcred = crdup(oldcred);
1648 		newcred->cr_uid = newcred->cr_ruid;
1649 		newcred->cr_gid = newcred->cr_rgid;
1650 	}
1651 
1652 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1653 	if ((error = namei(&nd)) != 0)
1654 		goto out;
1655 	vp = nd.ni_vp;
1656 
1657 	/* Flags == 0 means only check for existence. */
1658 	if (amode) {
1659 		int vflags = 0;
1660 
1661 		if (amode & R_OK)
1662 			vflags |= VREAD;
1663 		if (amode & W_OK)
1664 			vflags |= VWRITE;
1665 		if (amode & X_OK)
1666 			vflags |= VEXEC;
1667 
1668 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
1669 		if (!error && (vflags & VWRITE))
1670 			error = vn_writechk(vp);
1671 	}
1672 	vput(vp);
1673 out:
1674 	if (newcred != NULL) {
1675 		p->p_ucred = oldcred;
1676 		crfree(newcred);
1677 	}
1678 	return (error);
1679 }
1680 
1681 /*
1682  * Get file status; this version follows links.
1683  */
1684 /* ARGSUSED */
1685 int
1686 sys_stat(struct proc *p, void *v, register_t *retval)
1687 {
1688 	struct sys_stat_args /* {
1689 		syscallarg(const char *) path;
1690 		syscallarg(struct stat *) ub;
1691 	} */ *uap = v;
1692 
1693 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
1694 }
1695 
1696 int
1697 sys_fstatat(struct proc *p, void *v, register_t *retval)
1698 {
1699 	struct sys_fstatat_args /* {
1700 		syscallarg(int) fd;
1701 		syscallarg(const char *) path;
1702 		syscallarg(struct stat *) buf;
1703 		syscallarg(int) flag;
1704 	} */ *uap = v;
1705 
1706 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
1707 	    SCARG(uap, buf), SCARG(uap, flag)));
1708 }
1709 
1710 int
1711 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
1712 {
1713 	struct stat sb;
1714 	int error, follow;
1715 	struct nameidata nd;
1716 
1717 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1718 		return (EINVAL);
1719 
1720 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1721 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1722 	if ((error = namei(&nd)) != 0)
1723 		return (error);
1724 	error = vn_stat(nd.ni_vp, &sb, p);
1725 	vput(nd.ni_vp);
1726 	if (error)
1727 		return (error);
1728 	/* Don't let non-root see generation numbers (for NFS security) */
1729 	if (suser(p, 0))
1730 		sb.st_gen = 0;
1731 	error = copyout(&sb, buf, sizeof(sb));
1732 #ifdef KTRACE
1733 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
1734 		ktrstat(p, &sb);
1735 #endif
1736 	return (error);
1737 }
1738 
1739 /*
1740  * Get file status; this version does not follow links.
1741  */
1742 /* ARGSUSED */
1743 int
1744 sys_lstat(struct proc *p, void *v, register_t *retval)
1745 {
1746 	struct sys_lstat_args /* {
1747 		syscallarg(const char *) path;
1748 		syscallarg(struct stat *) ub;
1749 	} */ *uap = v;
1750 
1751 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
1752 	    AT_SYMLINK_NOFOLLOW));
1753 }
1754 
1755 /*
1756  * Get configurable pathname variables.
1757  */
1758 /* ARGSUSED */
1759 int
1760 sys_pathconf(struct proc *p, void *v, register_t *retval)
1761 {
1762 	struct sys_pathconf_args /* {
1763 		syscallarg(const char *) path;
1764 		syscallarg(int) name;
1765 	} */ *uap = v;
1766 	int error;
1767 	struct nameidata nd;
1768 
1769 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1770 	    SCARG(uap, path), p);
1771 	if ((error = namei(&nd)) != 0)
1772 		return (error);
1773 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1774 	vput(nd.ni_vp);
1775 	return (error);
1776 }
1777 
1778 /*
1779  * Return target name of a symbolic link.
1780  */
1781 /* ARGSUSED */
1782 int
1783 sys_readlink(struct proc *p, void *v, register_t *retval)
1784 {
1785 	struct sys_readlink_args /* {
1786 		syscallarg(const char *) path;
1787 		syscallarg(char *) buf;
1788 		syscallarg(size_t) count;
1789 	} */ *uap = v;
1790 
1791 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
1792 	    SCARG(uap, count), retval));
1793 }
1794 
1795 int
1796 sys_readlinkat(struct proc *p, void *v, register_t *retval)
1797 {
1798 	struct sys_readlinkat_args /* {
1799 		syscallarg(int) fd;
1800 		syscallarg(const char *) path;
1801 		syscallarg(char *) buf;
1802 		syscallarg(size_t) count;
1803 	} */ *uap = v;
1804 
1805 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1806 	    SCARG(uap, buf), SCARG(uap, count), retval));
1807 }
1808 
1809 int
1810 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
1811     size_t count, register_t *retval)
1812 {
1813 	struct vnode *vp;
1814 	struct iovec aiov;
1815 	struct uio auio;
1816 	int error;
1817 	struct nameidata nd;
1818 
1819 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1820 	if ((error = namei(&nd)) != 0)
1821 		return (error);
1822 	vp = nd.ni_vp;
1823 	if (vp->v_type != VLNK)
1824 		error = EINVAL;
1825 	else {
1826 		aiov.iov_base = buf;
1827 		aiov.iov_len = count;
1828 		auio.uio_iov = &aiov;
1829 		auio.uio_iovcnt = 1;
1830 		auio.uio_offset = 0;
1831 		auio.uio_rw = UIO_READ;
1832 		auio.uio_segflg = UIO_USERSPACE;
1833 		auio.uio_procp = p;
1834 		auio.uio_resid = count;
1835 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1836 		*retval = count - auio.uio_resid;
1837 	}
1838 	vput(vp);
1839 	return (error);
1840 }
1841 
1842 /*
1843  * Change flags of a file given a path name.
1844  */
1845 /* ARGSUSED */
1846 int
1847 sys_chflags(struct proc *p, void *v, register_t *retval)
1848 {
1849 	struct sys_chflags_args /* {
1850 		syscallarg(const char *) path;
1851 		syscallarg(u_int) flags;
1852 	} */ *uap = v;
1853 	struct vnode *vp;
1854 	struct vattr vattr;
1855 	int error;
1856 	struct nameidata nd;
1857 	u_int flags = SCARG(uap, flags);
1858 
1859 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1860 	if ((error = namei(&nd)) != 0)
1861 		return (error);
1862 	vp = nd.ni_vp;
1863 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1864 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1865 		error = EROFS;
1866 	else if (flags == VNOVAL)
1867 		error = EINVAL;
1868 	else {
1869 		if (suser(p, 0)) {
1870 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
1871 				goto out;
1872 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1873 				error = EINVAL;
1874 				goto out;
1875 			}
1876 		}
1877 		VATTR_NULL(&vattr);
1878 		vattr.va_flags = flags;
1879 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1880 	}
1881 out:
1882 	vput(vp);
1883 	return (error);
1884 }
1885 
1886 /*
1887  * Change flags of a file given a file descriptor.
1888  */
1889 /* ARGSUSED */
1890 int
1891 sys_fchflags(struct proc *p, void *v, register_t *retval)
1892 {
1893 	struct sys_fchflags_args /* {
1894 		syscallarg(int) fd;
1895 		syscallarg(u_int) flags;
1896 	} */ *uap = v;
1897 	struct vattr vattr;
1898 	struct vnode *vp;
1899 	struct file *fp;
1900 	int error;
1901 	u_int flags = SCARG(uap, flags);
1902 
1903 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1904 		return (error);
1905 	vp = (struct vnode *)fp->f_data;
1906 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1907 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1908 		error = EROFS;
1909 	else if (flags == VNOVAL)
1910 		error = EINVAL;
1911 	else {
1912 		if (suser(p, 0)) {
1913 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
1914 			    != 0)
1915 				goto out;
1916 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1917 				error = EINVAL;
1918 				goto out;
1919 			}
1920 		}
1921 		VATTR_NULL(&vattr);
1922 		vattr.va_flags = flags;
1923 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1924 	}
1925 out:
1926 	VOP_UNLOCK(vp, 0, p);
1927 	FRELE(fp, p);
1928 	return (error);
1929 }
1930 
1931 /*
1932  * Change mode of a file given path name.
1933  */
1934 /* ARGSUSED */
1935 int
1936 sys_chmod(struct proc *p, void *v, register_t *retval)
1937 {
1938 	struct sys_chmod_args /* {
1939 		syscallarg(const char *) path;
1940 		syscallarg(mode_t) mode;
1941 	} */ *uap = v;
1942 
1943 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
1944 }
1945 
1946 int
1947 sys_fchmodat(struct proc *p, void *v, register_t *retval)
1948 {
1949 	struct sys_fchmodat_args /* {
1950 		syscallarg(int) fd;
1951 		syscallarg(const char *) path;
1952 		syscallarg(mode_t) mode;
1953 		syscallarg(int) flag;
1954 	} */ *uap = v;
1955 
1956 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
1957 	    SCARG(uap, mode), SCARG(uap, flag)));
1958 }
1959 
1960 int
1961 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
1962 {
1963 	struct vnode *vp;
1964 	struct vattr vattr;
1965 	int error, follow;
1966 	struct nameidata nd;
1967 
1968 	if (mode & ~(S_IFMT | ALLPERMS))
1969 		return (EINVAL);
1970 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1971 		return (EINVAL);
1972 
1973 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1974 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
1975 	if ((error = namei(&nd)) != 0)
1976 		return (error);
1977 	vp = nd.ni_vp;
1978 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1979 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1980 		error = EROFS;
1981 	else {
1982 		VATTR_NULL(&vattr);
1983 		vattr.va_mode = mode & ALLPERMS;
1984 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1985 	}
1986 	vput(vp);
1987 	return (error);
1988 }
1989 
1990 /*
1991  * Change mode of a file given a file descriptor.
1992  */
1993 /* ARGSUSED */
1994 int
1995 sys_fchmod(struct proc *p, void *v, register_t *retval)
1996 {
1997 	struct sys_fchmod_args /* {
1998 		syscallarg(int) fd;
1999 		syscallarg(mode_t) mode;
2000 	} */ *uap = v;
2001 	struct vattr vattr;
2002 	struct vnode *vp;
2003 	struct file *fp;
2004 	int error;
2005 
2006 	if (SCARG(uap, mode) & ~(S_IFMT | ALLPERMS))
2007 		return (EINVAL);
2008 
2009 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2010 		return (error);
2011 	vp = (struct vnode *)fp->f_data;
2012 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2013 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2014 		error = EROFS;
2015 	else {
2016 		VATTR_NULL(&vattr);
2017 		vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
2018 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2019 	}
2020 	VOP_UNLOCK(vp, 0, p);
2021 	FRELE(fp, p);
2022 	return (error);
2023 }
2024 
2025 /*
2026  * Set ownership given a path name.
2027  */
2028 /* ARGSUSED */
2029 int
2030 sys_chown(struct proc *p, void *v, register_t *retval)
2031 {
2032 	struct sys_chown_args /* {
2033 		syscallarg(const char *) path;
2034 		syscallarg(uid_t) uid;
2035 		syscallarg(gid_t) gid;
2036 	} */ *uap = v;
2037 
2038 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2039 	    SCARG(uap, gid), 0));
2040 }
2041 
2042 int
2043 sys_fchownat(struct proc *p, void *v, register_t *retval)
2044 {
2045 	struct sys_fchownat_args /* {
2046 		syscallarg(int) fd;
2047 		syscallarg(const char *) path;
2048 		syscallarg(uid_t) uid;
2049 		syscallarg(gid_t) gid;
2050 		syscallarg(int) flag;
2051 	} */ *uap = v;
2052 
2053 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2054 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2055 }
2056 
2057 int
2058 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2059     int flag)
2060 {
2061 	struct vnode *vp;
2062 	struct vattr vattr;
2063 	int error, follow;
2064 	struct nameidata nd;
2065 	mode_t mode;
2066 
2067 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2068 		return (EINVAL);
2069 
2070 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2071 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2072 	if ((error = namei(&nd)) != 0)
2073 		return (error);
2074 	vp = nd.ni_vp;
2075 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2076 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2077 		error = EROFS;
2078 	else {
2079 		if ((uid != -1 || gid != -1) &&
2080 		    (suser(p, 0) || suid_clear)) {
2081 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2082 			if (error)
2083 				goto out;
2084 			mode = vattr.va_mode & ~(VSUID | VSGID);
2085 			if (mode == vattr.va_mode)
2086 				mode = VNOVAL;
2087 		}
2088 		else
2089 			mode = VNOVAL;
2090 		VATTR_NULL(&vattr);
2091 		vattr.va_uid = uid;
2092 		vattr.va_gid = gid;
2093 		vattr.va_mode = mode;
2094 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2095 	}
2096 out:
2097 	vput(vp);
2098 	return (error);
2099 }
2100 
2101 /*
2102  * Set ownership given a path name, without following links.
2103  */
2104 /* ARGSUSED */
2105 int
2106 sys_lchown(struct proc *p, void *v, register_t *retval)
2107 {
2108 	struct sys_lchown_args /* {
2109 		syscallarg(const char *) path;
2110 		syscallarg(uid_t) uid;
2111 		syscallarg(gid_t) gid;
2112 	} */ *uap = v;
2113 	struct vnode *vp;
2114 	struct vattr vattr;
2115 	int error;
2116 	struct nameidata nd;
2117 	mode_t mode;
2118 	uid_t uid = SCARG(uap, uid);
2119 	gid_t gid = SCARG(uap, gid);
2120 
2121 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2122 	if ((error = namei(&nd)) != 0)
2123 		return (error);
2124 	vp = nd.ni_vp;
2125 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2126 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2127 		error = EROFS;
2128 	else {
2129 		if ((uid != -1 || gid != -1) &&
2130 		    (suser(p, 0) || suid_clear)) {
2131 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2132 			if (error)
2133 				goto out;
2134 			mode = vattr.va_mode & ~(VSUID | VSGID);
2135 			if (mode == vattr.va_mode)
2136 				mode = VNOVAL;
2137 		}
2138 		else
2139 			mode = VNOVAL;
2140 		VATTR_NULL(&vattr);
2141 		vattr.va_uid = uid;
2142 		vattr.va_gid = gid;
2143 		vattr.va_mode = mode;
2144 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2145 	}
2146 out:
2147 	vput(vp);
2148 	return (error);
2149 }
2150 
2151 /*
2152  * Set ownership given a file descriptor.
2153  */
2154 /* ARGSUSED */
2155 int
2156 sys_fchown(struct proc *p, void *v, register_t *retval)
2157 {
2158 	struct sys_fchown_args /* {
2159 		syscallarg(int) fd;
2160 		syscallarg(uid_t) uid;
2161 		syscallarg(gid_t) gid;
2162 	} */ *uap = v;
2163 	struct vnode *vp;
2164 	struct vattr vattr;
2165 	int error;
2166 	struct file *fp;
2167 	mode_t mode;
2168 	uid_t uid = SCARG(uap, uid);
2169 	gid_t gid = SCARG(uap, gid);
2170 
2171 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2172 		return (error);
2173 	vp = (struct vnode *)fp->f_data;
2174 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2175 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2176 		error = EROFS;
2177 	else {
2178 		if ((uid != -1 || gid != -1) &&
2179 		    (suser(p, 0) || suid_clear)) {
2180 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2181 			if (error)
2182 				goto out;
2183 			mode = vattr.va_mode & ~(VSUID | VSGID);
2184 			if (mode == vattr.va_mode)
2185 				mode = VNOVAL;
2186 		} else
2187 			mode = VNOVAL;
2188 		VATTR_NULL(&vattr);
2189 		vattr.va_uid = uid;
2190 		vattr.va_gid = gid;
2191 		vattr.va_mode = mode;
2192 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2193 	}
2194 out:
2195 	VOP_UNLOCK(vp, 0, p);
2196 	FRELE(fp, p);
2197 	return (error);
2198 }
2199 
2200 /*
2201  * Set the access and modification times given a path name.
2202  */
2203 /* ARGSUSED */
2204 int
2205 sys_utimes(struct proc *p, void *v, register_t *retval)
2206 {
2207 	struct sys_utimes_args /* {
2208 		syscallarg(const char *) path;
2209 		syscallarg(const struct timeval *) tptr;
2210 	} */ *uap = v;
2211 
2212 	struct timespec ts[2];
2213 	struct timeval tv[2];
2214 	const struct timeval *tvp;
2215 	int error;
2216 
2217 	tvp = SCARG(uap, tptr);
2218 	if (tvp != NULL) {
2219 		error = copyin(tvp, tv, sizeof(tv));
2220 		if (error)
2221 			return (error);
2222 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2223 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2224 	} else
2225 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2226 
2227 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2228 }
2229 
2230 int
2231 sys_utimensat(struct proc *p, void *v, register_t *retval)
2232 {
2233 	struct sys_utimensat_args /* {
2234 		syscallarg(int) fd;
2235 		syscallarg(const char *) path;
2236 		syscallarg(const struct timespec *) times;
2237 		syscallarg(int) flag;
2238 	} */ *uap = v;
2239 
2240 	struct timespec ts[2];
2241 	const struct timespec *tsp;
2242 	int error;
2243 
2244 	tsp = SCARG(uap, times);
2245 	if (tsp != NULL) {
2246 		error = copyin(tsp, ts, sizeof(ts));
2247 		if (error)
2248 			return (error);
2249 	} else
2250 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2251 
2252 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2253 	    SCARG(uap, flag)));
2254 }
2255 
2256 int
2257 doutimensat(struct proc *p, int fd, const char *path,
2258     struct timespec ts[2], int flag)
2259 {
2260 	struct vnode *vp;
2261 	int error, follow;
2262 	struct nameidata nd;
2263 
2264 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2265 		return (EINVAL);
2266 
2267 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2268 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2269 	if ((error = namei(&nd)) != 0)
2270 		return (error);
2271 	vp = nd.ni_vp;
2272 
2273 	return (dovutimens(p, vp, ts));
2274 }
2275 
2276 int
2277 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2278 {
2279 	struct vattr vattr;
2280 	struct timespec now;
2281 	int error;
2282 
2283 #ifdef KTRACE
2284 	/* if they're both UTIME_NOW, then don't report either */
2285 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2286 	    KTRPOINT(p, KTR_STRUCT)) {
2287 		ktrabstimespec(p, &ts[0]);
2288 		ktrabstimespec(p, &ts[1]);
2289 	}
2290 #endif
2291 
2292 	VATTR_NULL(&vattr);
2293 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2294 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2295 			vattr.va_vaflags |= VA_UTIMES_NULL;
2296 
2297 		getnanotime(&now);
2298 		if (ts[0].tv_nsec == UTIME_NOW)
2299 			ts[0] = now;
2300 		if (ts[1].tv_nsec == UTIME_NOW)
2301 			ts[1] = now;
2302 	}
2303 
2304 	/*
2305 	 * XXX: Ideally the filesystem code would check tv_nsec ==
2306 	 * UTIME_OMIT instead of tv_sec == VNOVAL, but until then we
2307 	 * need to fudge tv_sec if it happens to equal VNOVAL.
2308 	 */
2309 	if (ts[0].tv_nsec == UTIME_OMIT)
2310 		ts[0].tv_sec = VNOVAL;
2311 	else if (ts[0].tv_sec == VNOVAL)
2312 		ts[0].tv_sec = VNOVAL - 1;
2313 
2314 	if (ts[1].tv_nsec == UTIME_OMIT)
2315 		ts[1].tv_sec = VNOVAL;
2316 	else if (ts[1].tv_sec == VNOVAL)
2317 		ts[1].tv_sec = VNOVAL - 1;
2318 
2319 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2320 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2321 		error = EROFS;
2322 	else {
2323 		vattr.va_atime = ts[0];
2324 		vattr.va_mtime = ts[1];
2325 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2326 	}
2327 	vput(vp);
2328 	return (error);
2329 }
2330 
2331 /*
2332  * Set the access and modification times given a file descriptor.
2333  */
2334 /* ARGSUSED */
2335 int
2336 sys_futimes(struct proc *p, void *v, register_t *retval)
2337 {
2338 	struct sys_futimes_args /* {
2339 		syscallarg(int) fd;
2340 		syscallarg(const struct timeval *) tptr;
2341 	} */ *uap = v;
2342 	struct timeval tv[2];
2343 	struct timespec ts[2];
2344 	const struct timeval *tvp;
2345 	int error;
2346 
2347 	tvp = SCARG(uap, tptr);
2348 	if (tvp != NULL) {
2349 		error = copyin(tvp, tv, sizeof(tv));
2350 		if (error)
2351 			return (error);
2352 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2353 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2354 	} else
2355 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2356 
2357 	return (dofutimens(p, SCARG(uap, fd), ts));
2358 }
2359 
2360 int
2361 sys_futimens(struct proc *p, void *v, register_t *retval)
2362 {
2363 	struct sys_futimens_args /* {
2364 		syscallarg(int) fd;
2365 		syscallarg(const struct timespec *) times;
2366 	} */ *uap = v;
2367 	struct timespec ts[2];
2368 	const struct timespec *tsp;
2369 	int error;
2370 
2371 	tsp = SCARG(uap, times);
2372 	if (tsp != NULL) {
2373 		error = copyin(tsp, ts, sizeof(ts));
2374 		if (error)
2375 			return (error);
2376 	} else
2377 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2378 
2379 	return (dofutimens(p, SCARG(uap, fd), ts));
2380 }
2381 
2382 int
2383 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2384 {
2385 	struct file *fp;
2386 	struct vnode *vp;
2387 	int error;
2388 
2389 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2390 		return (error);
2391 	vp = (struct vnode *)fp->f_data;
2392 	vref(vp);
2393 	FRELE(fp, p);
2394 
2395 	return (dovutimens(p, vp, ts));
2396 }
2397 
2398 /*
2399  * Truncate a file given its path name.
2400  */
2401 /* ARGSUSED */
2402 int
2403 sys_truncate(struct proc *p, void *v, register_t *retval)
2404 {
2405 	struct sys_truncate_args /* {
2406 		syscallarg(const char *) path;
2407 		syscallarg(int) pad;
2408 		syscallarg(off_t) length;
2409 	} */ *uap = v;
2410 	struct vnode *vp;
2411 	struct vattr vattr;
2412 	int error;
2413 	struct nameidata nd;
2414 
2415 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2416 	if ((error = namei(&nd)) != 0)
2417 		return (error);
2418 	vp = nd.ni_vp;
2419 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2420 	if (vp->v_type == VDIR)
2421 		error = EISDIR;
2422 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2423 	    (error = vn_writechk(vp)) == 0) {
2424 		VATTR_NULL(&vattr);
2425 		vattr.va_size = SCARG(uap, length);
2426 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2427 	}
2428 	vput(vp);
2429 	return (error);
2430 }
2431 
2432 /*
2433  * Truncate a file given a file descriptor.
2434  */
2435 /* ARGSUSED */
2436 int
2437 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2438 {
2439 	struct sys_ftruncate_args /* {
2440 		syscallarg(int) fd;
2441 		syscallarg(int) pad;
2442 		syscallarg(off_t) length;
2443 	} */ *uap = v;
2444 	struct vattr vattr;
2445 	struct vnode *vp;
2446 	struct file *fp;
2447 	off_t len;
2448 	int error;
2449 
2450 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2451 		return (error);
2452 	len = SCARG(uap, length);
2453 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2454 		error = EINVAL;
2455 		goto bad;
2456 	}
2457 	vp = (struct vnode *)fp->f_data;
2458 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2459 	if (vp->v_type == VDIR)
2460 		error = EISDIR;
2461 	else if ((error = vn_writechk(vp)) == 0) {
2462 		VATTR_NULL(&vattr);
2463 		vattr.va_size = len;
2464 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2465 	}
2466 	VOP_UNLOCK(vp, 0, p);
2467 bad:
2468 	FRELE(fp, p);
2469 	return (error);
2470 }
2471 
2472 /*
2473  * Sync an open file.
2474  */
2475 /* ARGSUSED */
2476 int
2477 sys_fsync(struct proc *p, void *v, register_t *retval)
2478 {
2479 	struct sys_fsync_args /* {
2480 		syscallarg(int) fd;
2481 	} */ *uap = v;
2482 	struct vnode *vp;
2483 	struct file *fp;
2484 	int error;
2485 
2486 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2487 		return (error);
2488 	vp = (struct vnode *)fp->f_data;
2489 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2490 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2491 #ifdef FFS_SOFTUPDATES
2492 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2493 		error = softdep_fsync(vp);
2494 #endif
2495 
2496 	VOP_UNLOCK(vp, 0, p);
2497 	FRELE(fp, p);
2498 	return (error);
2499 }
2500 
2501 /*
2502  * Rename files.  Source and destination must either both be directories,
2503  * or both not be directories.  If target is a directory, it must be empty.
2504  */
2505 /* ARGSUSED */
2506 int
2507 sys_rename(struct proc *p, void *v, register_t *retval)
2508 {
2509 	struct sys_rename_args /* {
2510 		syscallarg(const char *) from;
2511 		syscallarg(const char *) to;
2512 	} */ *uap = v;
2513 
2514 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2515 	    SCARG(uap, to)));
2516 }
2517 
2518 int
2519 sys_renameat(struct proc *p, void *v, register_t *retval)
2520 {
2521 	struct sys_renameat_args /* {
2522 		syscallarg(int) fromfd;
2523 		syscallarg(const char *) from;
2524 		syscallarg(int) tofd;
2525 		syscallarg(const char *) to;
2526 	} */ *uap = v;
2527 
2528 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2529 	    SCARG(uap, tofd), SCARG(uap, to)));
2530 }
2531 
2532 int
2533 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2534     const char *to)
2535 {
2536 	struct vnode *tvp, *fvp, *tdvp;
2537 	struct nameidata fromnd, tond;
2538 	int error;
2539 	int flags;
2540 
2541 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2542 	    fromfd, from, p);
2543 	if ((error = namei(&fromnd)) != 0)
2544 		return (error);
2545 	fvp = fromnd.ni_vp;
2546 
2547 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2548 	/*
2549 	 * rename("foo/", "bar/");  is  OK
2550 	 */
2551 	if (fvp->v_type == VDIR)
2552 		flags |= STRIPSLASHES;
2553 
2554 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2555 	if ((error = namei(&tond)) != 0) {
2556 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2557 		vrele(fromnd.ni_dvp);
2558 		vrele(fvp);
2559 		goto out1;
2560 	}
2561 	tdvp = tond.ni_dvp;
2562 	tvp = tond.ni_vp;
2563 	if (tvp != NULL) {
2564 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2565 			error = ENOTDIR;
2566 			goto out;
2567 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2568 			error = EISDIR;
2569 			goto out;
2570 		}
2571 	}
2572 	if (fvp == tdvp)
2573 		error = EINVAL;
2574 	/*
2575 	 * If source is the same as the destination (that is the
2576 	 * same inode number)
2577 	 */
2578 	if (fvp == tvp)
2579 		error = -1;
2580 out:
2581 	if (!error) {
2582 		if (tvp) {
2583 			(void)uvm_vnp_uncache(tvp);
2584 		}
2585 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2586 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2587 	} else {
2588 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2589 		if (tdvp == tvp)
2590 			vrele(tdvp);
2591 		else
2592 			vput(tdvp);
2593 		if (tvp)
2594 			vput(tvp);
2595 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2596 		vrele(fromnd.ni_dvp);
2597 		vrele(fvp);
2598 	}
2599 	vrele(tond.ni_startdir);
2600 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2601 out1:
2602 	if (fromnd.ni_startdir)
2603 		vrele(fromnd.ni_startdir);
2604 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2605 	if (error == -1)
2606 		return (0);
2607 	return (error);
2608 }
2609 
2610 /*
2611  * Make a directory file.
2612  */
2613 /* ARGSUSED */
2614 int
2615 sys_mkdir(struct proc *p, void *v, register_t *retval)
2616 {
2617 	struct sys_mkdir_args /* {
2618 		syscallarg(const char *) path;
2619 		syscallarg(mode_t) mode;
2620 	} */ *uap = v;
2621 
2622 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
2623 }
2624 
2625 int
2626 sys_mkdirat(struct proc *p, void *v, register_t *retval)
2627 {
2628 	struct sys_mkdirat_args /* {
2629 		syscallarg(int) fd;
2630 		syscallarg(const char *) path;
2631 		syscallarg(mode_t) mode;
2632 	} */ *uap = v;
2633 
2634 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
2635 	    SCARG(uap, mode)));
2636 }
2637 
2638 int
2639 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
2640 {
2641 	struct vnode *vp;
2642 	struct vattr vattr;
2643 	int error;
2644 	struct nameidata nd;
2645 
2646 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
2647 	    fd, path, p);
2648 	if ((error = namei(&nd)) != 0)
2649 		return (error);
2650 	vp = nd.ni_vp;
2651 	if (vp != NULL) {
2652 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2653 		if (nd.ni_dvp == vp)
2654 			vrele(nd.ni_dvp);
2655 		else
2656 			vput(nd.ni_dvp);
2657 		vrele(vp);
2658 		return (EEXIST);
2659 	}
2660 	VATTR_NULL(&vattr);
2661 	vattr.va_type = VDIR;
2662 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2663 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2664 	if (!error)
2665 		vput(nd.ni_vp);
2666 	return (error);
2667 }
2668 
2669 /*
2670  * Remove a directory file.
2671  */
2672 /* ARGSUSED */
2673 int
2674 sys_rmdir(struct proc *p, void *v, register_t *retval)
2675 {
2676 	struct sys_rmdir_args /* {
2677 		syscallarg(const char *) path;
2678 	} */ *uap = v;
2679 
2680 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
2681 }
2682 
2683 /*
2684  * Read a block of directory entries in a file system independent format.
2685  */
2686 int
2687 sys_getdents(struct proc *p, void *v, register_t *retval)
2688 {
2689 	struct sys_getdents_args /* {
2690 		syscallarg(int) fd;
2691 		syscallarg(void *) buf;
2692 		syscallarg(size_t) buflen;
2693 	} */ *uap = v;
2694 	struct vnode *vp;
2695 	struct file *fp;
2696 	struct uio auio;
2697 	struct iovec aiov;
2698 	size_t buflen;
2699 	int error, eofflag;
2700 
2701 	buflen = SCARG(uap, buflen);
2702 
2703 	if (buflen > INT_MAX)
2704 		return EINVAL;
2705 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2706 		return (error);
2707 	if ((fp->f_flag & FREAD) == 0) {
2708 		error = EBADF;
2709 		goto bad;
2710 	}
2711 	if (fp->f_offset < 0) {
2712 		error = EINVAL;
2713 		goto bad;
2714 	}
2715 	vp = fp->f_data;
2716 	if (vp->v_type != VDIR) {
2717 		error = EINVAL;
2718 		goto bad;
2719 	}
2720 	aiov.iov_base = SCARG(uap, buf);
2721 	aiov.iov_len = buflen;
2722 	auio.uio_iov = &aiov;
2723 	auio.uio_iovcnt = 1;
2724 	auio.uio_rw = UIO_READ;
2725 	auio.uio_segflg = UIO_USERSPACE;
2726 	auio.uio_procp = p;
2727 	auio.uio_resid = buflen;
2728 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2729 	auio.uio_offset = fp->f_offset;
2730 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
2731 	fp->f_offset = auio.uio_offset;
2732 	VOP_UNLOCK(vp, 0, p);
2733 	if (error)
2734 		goto bad;
2735 	*retval = buflen - auio.uio_resid;
2736 bad:
2737 	FRELE(fp, p);
2738 	return (error);
2739 }
2740 
2741 /*
2742  * Set the mode mask for creation of filesystem nodes.
2743  */
2744 int
2745 sys_umask(struct proc *p, void *v, register_t *retval)
2746 {
2747 	struct sys_umask_args /* {
2748 		syscallarg(mode_t) newmask;
2749 	} */ *uap = v;
2750 	struct filedesc *fdp;
2751 
2752 	fdp = p->p_fd;
2753 	*retval = fdp->fd_cmask;
2754 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2755 	return (0);
2756 }
2757 
2758 /*
2759  * Void all references to file by ripping underlying filesystem
2760  * away from vnode.
2761  */
2762 /* ARGSUSED */
2763 int
2764 sys_revoke(struct proc *p, void *v, register_t *retval)
2765 {
2766 	struct sys_revoke_args /* {
2767 		syscallarg(const char *) path;
2768 	} */ *uap = v;
2769 	struct vnode *vp;
2770 	struct vattr vattr;
2771 	int error;
2772 	struct nameidata nd;
2773 
2774 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2775 	if ((error = namei(&nd)) != 0)
2776 		return (error);
2777 	vp = nd.ni_vp;
2778 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2779 		goto out;
2780 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2781 	    (error = suser(p, 0)))
2782 		goto out;
2783 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
2784 		VOP_REVOKE(vp, REVOKEALL);
2785 out:
2786 	vrele(vp);
2787 	return (error);
2788 }
2789 
2790 /*
2791  * Convert a user file descriptor to a kernel file entry.
2792  *
2793  * On return *fpp is FREF:ed.
2794  */
2795 int
2796 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2797 {
2798 	struct file *fp;
2799 	struct vnode *vp;
2800 
2801 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2802 		return (EBADF);
2803 
2804 	if (fp->f_type != DTYPE_VNODE)
2805 		return (EINVAL);
2806 
2807 	vp = (struct vnode *)fp->f_data;
2808 	if (vp->v_type == VBAD)
2809 		return (EBADF);
2810 
2811 	FREF(fp);
2812 	*fpp = fp;
2813 
2814 	return (0);
2815 }
2816 
2817 /*
2818  * Positional read system call.
2819  */
2820 int
2821 sys_pread(struct proc *p, void *v, register_t *retval)
2822 {
2823 	struct sys_pread_args /* {
2824 		syscallarg(int) fd;
2825 		syscallarg(void *) buf;
2826 		syscallarg(size_t) nbyte;
2827 		syscallarg(int) pad;
2828 		syscallarg(off_t) offset;
2829 	} */ *uap = v;
2830 	struct iovec iov;
2831 	struct filedesc *fdp = p->p_fd;
2832 	struct file *fp;
2833 	struct vnode *vp;
2834 	off_t offset;
2835 	int fd = SCARG(uap, fd);
2836 
2837 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2838 		return (EBADF);
2839 	if ((fp->f_flag & FREAD) == 0)
2840 		return (EBADF);
2841 
2842 	vp = (struct vnode *)fp->f_data;
2843 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2844 	    (vp->v_flag & VISTTY)) {
2845 		return (ESPIPE);
2846 	}
2847 
2848 	iov.iov_base = SCARG(uap, buf);
2849 	iov.iov_len = SCARG(uap, nbyte);
2850 
2851 	offset = SCARG(uap, offset);
2852 	if (offset < 0 && vp->v_type != VCHR)
2853 		return (EINVAL);
2854 
2855 	FREF(fp);
2856 
2857 	/* dofilereadv() will FRELE the descriptor for us */
2858 	return (dofilereadv(p, fd, fp, &iov, 1, 0, &offset, retval));
2859 }
2860 
2861 /*
2862  * Positional scatter read system call.
2863  */
2864 int
2865 sys_preadv(struct proc *p, void *v, register_t *retval)
2866 {
2867 	struct sys_preadv_args /* {
2868 		syscallarg(int) fd;
2869 		syscallarg(const struct iovec *) iovp;
2870 		syscallarg(int) iovcnt;
2871 		syscallarg(int) pad;
2872 		syscallarg(off_t) offset;
2873 	} */ *uap = v;
2874 	struct filedesc *fdp = p->p_fd;
2875 	struct file *fp;
2876 	struct vnode *vp;
2877 	off_t offset;
2878 	int fd = SCARG(uap, fd);
2879 
2880 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2881 		return (EBADF);
2882 	if ((fp->f_flag & FREAD) == 0)
2883 		return (EBADF);
2884 
2885 	vp = (struct vnode *)fp->f_data;
2886 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2887 	    (vp->v_flag & VISTTY)) {
2888 		return (ESPIPE);
2889 	}
2890 
2891 	offset = SCARG(uap, offset);
2892 	if (offset < 0 && vp->v_type != VCHR)
2893 		return (EINVAL);
2894 
2895 	FREF(fp);
2896 
2897 	/* dofilereadv() will FRELE the descriptor for us */
2898 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1,
2899 	    &offset, retval));
2900 }
2901 
2902 /*
2903  * Positional write system call.
2904  */
2905 int
2906 sys_pwrite(struct proc *p, void *v, register_t *retval)
2907 {
2908 	struct sys_pwrite_args /* {
2909 		syscallarg(int) fd;
2910 		syscallarg(const void *) buf;
2911 		syscallarg(size_t) nbyte;
2912 		syscallarg(int) pad;
2913 		syscallarg(off_t) offset;
2914 	} */ *uap = v;
2915 	struct iovec iov;
2916 	struct filedesc *fdp = p->p_fd;
2917 	struct file *fp;
2918 	struct vnode *vp;
2919 	off_t offset;
2920 	int fd = SCARG(uap, fd);
2921 
2922 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2923 		return (EBADF);
2924 	if ((fp->f_flag & FWRITE) == 0)
2925 		return (EBADF);
2926 
2927 	vp = (struct vnode *)fp->f_data;
2928 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2929 	    (vp->v_flag & VISTTY)) {
2930 		return (ESPIPE);
2931 	}
2932 
2933 	iov.iov_base = (void *)SCARG(uap, buf);
2934 	iov.iov_len = SCARG(uap, nbyte);
2935 
2936 	offset = SCARG(uap, offset);
2937 	if (offset < 0 && vp->v_type != VCHR)
2938 		return (EINVAL);
2939 
2940 	FREF(fp);
2941 
2942 	/* dofilewritev() will FRELE the descriptor for us */
2943 	return (dofilewritev(p, fd, fp, &iov, 1, 0, &offset, retval));
2944 }
2945 
2946 /*
2947  * Positional gather write system call.
2948  */
2949 int
2950 sys_pwritev(struct proc *p, void *v, register_t *retval)
2951 {
2952 	struct sys_pwritev_args /* {
2953 		syscallarg(int) fd;
2954 		syscallarg(const struct iovec *) iovp;
2955 		syscallarg(int) iovcnt;
2956 		syscallarg(int) pad;
2957 		syscallarg(off_t) offset;
2958 	} */ *uap = v;
2959 	struct filedesc *fdp = p->p_fd;
2960 	struct file *fp;
2961 	struct vnode *vp;
2962 	off_t offset;
2963 	int fd = SCARG(uap, fd);
2964 
2965 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2966 		return (EBADF);
2967 	if ((fp->f_flag & FWRITE) == 0)
2968 		return (EBADF);
2969 
2970 	vp = (struct vnode *)fp->f_data;
2971 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2972 	    (vp->v_flag & VISTTY)) {
2973 		return (ESPIPE);
2974 	}
2975 
2976 	offset = SCARG(uap, offset);
2977 	if (offset < 0 && vp->v_type != VCHR)
2978 		return (EINVAL);
2979 
2980 	FREF(fp);
2981 
2982 	/* dofilewritev() will FRELE the descriptor for us */
2983 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2984 	    1, &offset, retval));
2985 }
2986 
2987