xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.342 2020/01/30 15:36:11 visa Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 
94 /*
95  * Virtual File System System Calls
96  */
97 
98 /*
99  * Mount a file system.
100  */
101 int
102 sys_mount(struct proc *p, void *v, register_t *retval)
103 {
104 	struct sys_mount_args /* {
105 		syscallarg(const char *) type;
106 		syscallarg(const char *) path;
107 		syscallarg(int) flags;
108 		syscallarg(void *) data;
109 	} */ *uap = v;
110 	struct vnode *vp;
111 	struct mount *mp;
112 	int error, mntflag = 0;
113 	char fstypename[MFSNAMELEN];
114 	char fspath[MNAMELEN];
115 	struct nameidata nd;
116 	struct vfsconf *vfsp;
117 	int flags = SCARG(uap, flags);
118 	void *args = NULL;
119 
120 	if ((error = suser(p)))
121 		return (error);
122 
123 	/*
124 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
125 	 */
126 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
127 	if (error)
128 		return(error);
129 
130 	/*
131 	 * Get vnode to be covered
132 	 */
133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
134 	if ((error = namei(&nd)) != 0)
135 		goto fail;
136 	vp = nd.ni_vp;
137 	if (flags & MNT_UPDATE) {
138 		if ((vp->v_flag & VROOT) == 0) {
139 			vput(vp);
140 			error = EINVAL;
141 			goto fail;
142 		}
143 		mp = vp->v_mount;
144 		vfsp = mp->mnt_vfc;
145 
146 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
147 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
148 		if (error) {
149 			vput(vp);
150 			goto fail;
151 		}
152 
153 		mntflag = mp->mnt_flag;
154 		/*
155 		 * We only allow the filesystem to be reloaded if it
156 		 * is currently mounted read-only.
157 		 */
158 		if ((flags & MNT_RELOAD) &&
159 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 			vput(vp);
161 			error = EOPNOTSUPP;	/* Needs translation */
162 			goto fail;
163 		}
164 
165 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
166 			vput(vp);
167 			goto fail;
168 		}
169 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
170 		goto update;
171 	}
172 	/*
173 	 * Do not allow disabling of permission checks unless exec and access to
174 	 * device files is disabled too.
175 	 */
176 	if ((flags & MNT_NOPERM) &&
177 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
178 		vput(vp);
179 		error = EPERM;
180 		goto fail;
181 	}
182 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
183 		vput(vp);
184 		goto fail;
185 	}
186 	if (vp->v_type != VDIR) {
187 		vput(vp);
188 		goto fail;
189 	}
190 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
191 	if (error) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	vfsp = vfs_byname(fstypename);
196 	if (vfsp == NULL) {
197 		vput(vp);
198 		error = EOPNOTSUPP;
199 		goto fail;
200 	}
201 
202 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
203 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
204 	if (error) {
205 		vput(vp);
206 		goto fail;
207 	}
208 
209 	if (vp->v_mountedhere != NULL) {
210 		vput(vp);
211 		error = EBUSY;
212 		goto fail;
213 	}
214 
215 	/*
216 	 * Allocate and initialize the file system.
217 	 */
218 	mp = vfs_mount_alloc(vp, vfsp);
219 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
220 
221 update:
222 	/* Ensure that the parent mountpoint does not get unmounted. */
223 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
224 	if (error) {
225 		if (mp->mnt_flag & MNT_UPDATE) {
226 			mp->mnt_flag = mntflag;
227 			vfs_unbusy(mp);
228 		} else {
229 			vfs_unbusy(mp);
230 			vfs_mount_free(mp);
231 		}
232 		vput(vp);
233 		goto fail;
234 	}
235 
236 	/*
237 	 * Set the mount level flags.
238 	 */
239 	if (flags & MNT_RDONLY)
240 		mp->mnt_flag |= MNT_RDONLY;
241 	else if (mp->mnt_flag & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_WANTRDWR;
243 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
244 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
245 	    MNT_NOPERM | MNT_FORCE);
246 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
247 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
248 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
249 	/*
250 	 * Mount the filesystem.
251 	 */
252 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
253 	if (!error) {
254 		mp->mnt_stat.f_ctime = time_second;
255 	}
256 	if (mp->mnt_flag & MNT_UPDATE) {
257 		vfs_unbusy(vp->v_mount);
258 		vput(vp);
259 		if (mp->mnt_flag & MNT_WANTRDWR)
260 			mp->mnt_flag &= ~MNT_RDONLY;
261 		mp->mnt_flag &= ~MNT_OP_FLAGS;
262 		if (error)
263 			mp->mnt_flag = mntflag;
264 
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
266 			if (mp->mnt_syncer == NULL)
267 				error = vfs_allocate_syncvnode(mp);
268 		} else {
269 			if (mp->mnt_syncer != NULL)
270 				vgone(mp->mnt_syncer);
271 			mp->mnt_syncer = NULL;
272 		}
273 
274 		vfs_unbusy(mp);
275 		goto fail;
276 	}
277 
278 	mp->mnt_flag &= ~MNT_OP_FLAGS;
279 	vp->v_mountedhere = mp;
280 
281 	/*
282 	 * Put the new filesystem on the mount list after root.
283 	 */
284 	cache_purge(vp);
285 	if (!error) {
286 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
287 		checkdirs(vp);
288 		vfs_unbusy(vp->v_mount);
289 		VOP_UNLOCK(vp);
290 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
291 			error = vfs_allocate_syncvnode(mp);
292 		vfs_unbusy(mp);
293 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
294 		if ((error = VFS_START(mp, 0, p)) != 0)
295 			vrele(vp);
296 	} else {
297 		mp->mnt_vnodecovered->v_mountedhere = NULL;
298 		vfs_unbusy(mp);
299 		vfs_mount_free(mp);
300 		vfs_unbusy(vp->v_mount);
301 		vput(vp);
302 	}
303 fail:
304 	if (args)
305 		free(args, M_TEMP, vfsp->vfc_datasize);
306 	return (error);
307 }
308 
309 /*
310  * Scan all active processes to see if any of them have a current
311  * or root directory onto which the new filesystem has just been
312  * mounted. If so, replace them with the new mount point, keeping
313  * track of how many were replaced.  That's the number of references
314  * the old vnode had that we've replaced, so finish by vrele()'ing
315  * it that many times.  This puts off any possible sleeping until
316  * we've finished walking the allprocess list.
317  */
318 void
319 checkdirs(struct vnode *olddp)
320 {
321 	struct filedesc *fdp;
322 	struct vnode *newdp;
323 	struct process *pr;
324 	u_int  free_count = 0;
325 
326 	if (olddp->v_usecount == 1)
327 		return;
328 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
329 		panic("mount: lost mount");
330 	LIST_FOREACH(pr, &allprocess, ps_list) {
331 		fdp = pr->ps_fd;
332 		if (fdp->fd_cdir == olddp) {
333 			free_count++;
334 			vref(newdp);
335 			fdp->fd_cdir = newdp;
336 		}
337 		if (fdp->fd_rdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_rdir = newdp;
341 		}
342 		pr->ps_uvpcwd = NULL;
343 	}
344 	if (rootvnode == olddp) {
345 		free_count++;
346 		vref(newdp);
347 		rootvnode = newdp;
348 	}
349 	while (free_count-- > 0)
350 		vrele(olddp);
351 	vput(newdp);
352 }
353 
354 /*
355  * Unmount a file system.
356  *
357  * Note: unmount takes a path to the vnode mounted on as argument,
358  * not special file (as before).
359  */
360 int
361 sys_unmount(struct proc *p, void *v, register_t *retval)
362 {
363 	struct sys_unmount_args /* {
364 		syscallarg(const char *) path;
365 		syscallarg(int) flags;
366 	} */ *uap = v;
367 	struct vnode *vp;
368 	struct mount *mp;
369 	int error;
370 	struct nameidata nd;
371 
372 	if ((error = suser(p)) != 0)
373 		return (error);
374 
375 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
376 	    SCARG(uap, path), p);
377 	if ((error = namei(&nd)) != 0)
378 		return (error);
379 	vp = nd.ni_vp;
380 	mp = vp->v_mount;
381 
382 	/*
383 	 * Don't allow unmounting the root file system.
384 	 */
385 	if (mp->mnt_flag & MNT_ROOTFS) {
386 		vput(vp);
387 		return (EINVAL);
388 	}
389 
390 	/*
391 	 * Must be the root of the filesystem
392 	 */
393 	if ((vp->v_flag & VROOT) == 0) {
394 		vput(vp);
395 		return (EINVAL);
396 	}
397 	vput(vp);
398 
399 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
400 		return (EBUSY);
401 
402 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
403 }
404 
405 /*
406  * Do the actual file system unmount.
407  */
408 int
409 dounmount(struct mount *mp, int flags, struct proc *p)
410 {
411 	SLIST_HEAD(, mount) mplist;
412 	struct mount *nmp;
413 	int error;
414 
415 	SLIST_INIT(&mplist);
416 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
417 
418 	/*
419 	 * Collect nested mount points. This takes advantage of the mount list
420 	 * being ordered - nested mount points come after their parent.
421 	 */
422 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
423 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
424 			if (mp->mnt_vnodecovered == NULLVP ||
425 			    mp->mnt_vnodecovered->v_mount != nmp)
426 				continue;
427 
428 			if ((flags & MNT_FORCE) == 0) {
429 				error = EBUSY;
430 				goto err;
431 			}
432 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
433 			if (error) {
434 				if ((flags & MNT_DOOMED)) {
435 					/*
436 					 * If the mount point was busy due to
437 					 * being unmounted, it has been removed
438 					 * from the mount list already.
439 					 * Restart the iteration from the last
440 					 * collected busy entry.
441 					 */
442 					mp = SLIST_FIRST(&mplist);
443 					break;
444 				}
445 				goto err;
446 			}
447 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
448 			break;
449 		}
450 	}
451 
452 	/*
453 	 * Nested mount points cannot appear during this loop as mounting
454 	 * requires a read lock for the parent mount point.
455 	 */
456 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
457 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
458 		error = dounmount_leaf(mp, flags, p);
459 		if (error)
460 			goto err;
461 	}
462 	return (0);
463 
464 err:
465 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
466 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
467 		vfs_unbusy(mp);
468 	}
469 	return (error);
470 }
471 
472 int
473 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
474 {
475 	struct vnode *coveredvp;
476 	struct vnode *vp, *nvp;
477 	int error;
478 	int hadsyncer = 0;
479 
480 	mp->mnt_flag &=~ MNT_ASYNC;
481 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
482 	if (mp->mnt_syncer != NULL) {
483 		hadsyncer = 1;
484 		vgone(mp->mnt_syncer);
485 		mp->mnt_syncer = NULL;
486 	}
487 
488 	/*
489 	 * Before calling file system unmount, make sure
490 	 * all unveils to vnodes in here are dropped.
491 	 */
492 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
493 		unveil_removevnode(vp);
494 	}
495 
496 	if (((mp->mnt_flag & MNT_RDONLY) ||
497 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
498 	    (flags & MNT_FORCE))
499 		error = VFS_UNMOUNT(mp, flags, p);
500 
501 	if (error && !(flags & MNT_DOOMED)) {
502 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
503 			(void) vfs_allocate_syncvnode(mp);
504 		vfs_unbusy(mp);
505 		return (error);
506 	}
507 
508 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
509 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
510 		coveredvp->v_mountedhere = NULL;
511 		vrele(coveredvp);
512 	}
513 
514 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
515 		panic("unmount: dangling vnode");
516 
517 	vfs_unbusy(mp);
518 	vfs_mount_free(mp);
519 
520 	return (0);
521 }
522 
523 /*
524  * Sync each mounted filesystem.
525  */
526 #ifdef DEBUG
527 int syncprt = 0;
528 struct ctldebug debug0 = { "syncprt", &syncprt };
529 #endif
530 
531 int
532 sys_sync(struct proc *p, void *v, register_t *retval)
533 {
534 	struct mount *mp;
535 	int asyncflag;
536 
537 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
538 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
539 			continue;
540 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
541 			asyncflag = mp->mnt_flag & MNT_ASYNC;
542 			mp->mnt_flag &= ~MNT_ASYNC;
543 			uvm_vnp_sync(mp);
544 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
545 			if (asyncflag)
546 				mp->mnt_flag |= MNT_ASYNC;
547 		}
548 		vfs_unbusy(mp);
549 	}
550 
551 	return (0);
552 }
553 
554 /*
555  * Change filesystem quotas.
556  */
557 int
558 sys_quotactl(struct proc *p, void *v, register_t *retval)
559 {
560 	struct sys_quotactl_args /* {
561 		syscallarg(const char *) path;
562 		syscallarg(int) cmd;
563 		syscallarg(int) uid;
564 		syscallarg(char *) arg;
565 	} */ *uap = v;
566 	struct mount *mp;
567 	int error;
568 	struct nameidata nd;
569 
570 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
571 	if ((error = namei(&nd)) != 0)
572 		return (error);
573 	mp = nd.ni_vp->v_mount;
574 	vrele(nd.ni_vp);
575 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
576 	    SCARG(uap, arg), p));
577 }
578 
579 int
580 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
581 {
582 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
583 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
584 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
585 	char *s, *d;
586 	int error;
587 
588 	/* Don't let non-root see filesystem id (for NFS security) */
589 	if (suser(p)) {
590 		fsid_t fsid;
591 
592 		s = (char *)sp;
593 		d = (char *)uaddr;
594 
595 		memset(&fsid, 0, sizeof(fsid));
596 
597 		if ((error = copyout(s, d, co_sz1)) != 0)
598 			return (error);
599 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
600 			return (error);
601 		return (copyout(s + co_off2, d + co_off2, co_sz2));
602 	}
603 
604 	return (copyout(sp, uaddr, sizeof(*sp)));
605 }
606 
607 /*
608  * Get filesystem statistics.
609  */
610 int
611 sys_statfs(struct proc *p, void *v, register_t *retval)
612 {
613 	struct sys_statfs_args /* {
614 		syscallarg(const char *) path;
615 		syscallarg(struct statfs *) buf;
616 	} */ *uap = v;
617 	struct mount *mp;
618 	struct statfs *sp;
619 	int error;
620 	struct nameidata nd;
621 
622 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
623 	    SCARG(uap, path), p);
624 	nd.ni_pledge = PLEDGE_RPATH;
625 	nd.ni_unveil = UNVEIL_READ;
626 	if ((error = namei(&nd)) != 0)
627 		return (error);
628 	mp = nd.ni_vp->v_mount;
629 	sp = &mp->mnt_stat;
630 	vrele(nd.ni_vp);
631 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
632 		return (error);
633 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
634 
635 	return (copyout_statfs(sp, SCARG(uap, buf), p));
636 }
637 
638 /*
639  * Get filesystem statistics.
640  */
641 int
642 sys_fstatfs(struct proc *p, void *v, register_t *retval)
643 {
644 	struct sys_fstatfs_args /* {
645 		syscallarg(int) fd;
646 		syscallarg(struct statfs *) buf;
647 	} */ *uap = v;
648 	struct file *fp;
649 	struct mount *mp;
650 	struct statfs *sp;
651 	int error;
652 
653 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
654 		return (error);
655 	mp = ((struct vnode *)fp->f_data)->v_mount;
656 	if (!mp) {
657 		FRELE(fp, p);
658 		return (ENOENT);
659 	}
660 	sp = &mp->mnt_stat;
661 	error = VFS_STATFS(mp, sp, p);
662 	FRELE(fp, p);
663 	if (error)
664 		return (error);
665 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
666 
667 	return (copyout_statfs(sp, SCARG(uap, buf), p));
668 }
669 
670 /*
671  * Get statistics on all filesystems.
672  */
673 int
674 sys_getfsstat(struct proc *p, void *v, register_t *retval)
675 {
676 	struct sys_getfsstat_args /* {
677 		syscallarg(struct statfs *) buf;
678 		syscallarg(size_t) bufsize;
679 		syscallarg(int) flags;
680 	} */ *uap = v;
681 	struct mount *mp;
682 	struct statfs *sp;
683 	struct statfs *sfsp;
684 	size_t count, maxcount;
685 	int error, flags = SCARG(uap, flags);
686 
687 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
688 	sfsp = SCARG(uap, buf);
689 	count = 0;
690 
691 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
692 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
693 			continue;
694 		if (sfsp && count < maxcount) {
695 			sp = &mp->mnt_stat;
696 
697 			/* Refresh stats unless MNT_NOWAIT is specified */
698 			if (flags != MNT_NOWAIT &&
699 			    flags != MNT_LAZY &&
700 			    (flags == MNT_WAIT ||
701 			    flags == 0) &&
702 			    (error = VFS_STATFS(mp, sp, p))) {
703 				vfs_unbusy(mp);
704 				continue;
705 			}
706 
707 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
708 #if notyet
709 			if (mp->mnt_flag & MNT_SOFTDEP)
710 				sp->f_eflags = STATFS_SOFTUPD;
711 #endif
712 			error = (copyout_statfs(sp, sfsp, p));
713 			if (error) {
714 				vfs_unbusy(mp);
715 				return (error);
716 			}
717 			sfsp++;
718 		}
719 		count++;
720 		vfs_unbusy(mp);
721 	}
722 
723 	if (sfsp && count > maxcount)
724 		*retval = maxcount;
725 	else
726 		*retval = count;
727 
728 	return (0);
729 }
730 
731 /*
732  * Change current working directory to a given file descriptor.
733  */
734 int
735 sys_fchdir(struct proc *p, void *v, register_t *retval)
736 {
737 	struct sys_fchdir_args /* {
738 		syscallarg(int) fd;
739 	} */ *uap = v;
740 	struct filedesc *fdp = p->p_fd;
741 	struct vnode *vp, *tdp, *old_cdir;
742 	struct mount *mp;
743 	struct file *fp;
744 	int error;
745 
746 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
747 		return (EBADF);
748 	vp = fp->f_data;
749 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
750 		FRELE(fp, p);
751 		return (ENOTDIR);
752 	}
753 	vref(vp);
754 	FRELE(fp, p);
755 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
756 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
757 
758 	while (!error && (mp = vp->v_mountedhere) != NULL) {
759 		if (vfs_busy(mp, VB_READ|VB_WAIT))
760 			continue;
761 		error = VFS_ROOT(mp, &tdp);
762 		vfs_unbusy(mp);
763 		if (error)
764 			break;
765 		vput(vp);
766 		vp = tdp;
767 	}
768 	if (error) {
769 		vput(vp);
770 		return (error);
771 	}
772 	VOP_UNLOCK(vp);
773 	old_cdir = fdp->fd_cdir;
774 	fdp->fd_cdir = vp;
775 	vrele(old_cdir);
776 	return (0);
777 }
778 
779 /*
780  * Change current working directory (``.'').
781  */
782 int
783 sys_chdir(struct proc *p, void *v, register_t *retval)
784 {
785 	struct sys_chdir_args /* {
786 		syscallarg(const char *) path;
787 	} */ *uap = v;
788 	struct filedesc *fdp = p->p_fd;
789 	struct vnode *old_cdir;
790 	int error;
791 	struct nameidata nd;
792 
793 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
794 	    SCARG(uap, path), p);
795 	nd.ni_pledge = PLEDGE_RPATH;
796 	nd.ni_unveil = UNVEIL_READ;
797 	if ((error = change_dir(&nd, p)) != 0)
798 		return (error);
799 	p->p_p->ps_uvpcwd = nd.ni_unveil_match;
800 	old_cdir = fdp->fd_cdir;
801 	fdp->fd_cdir = nd.ni_vp;
802 	vrele(old_cdir);
803 	return (0);
804 }
805 
806 /*
807  * Change notion of root (``/'') directory.
808  */
809 int
810 sys_chroot(struct proc *p, void *v, register_t *retval)
811 {
812 	struct sys_chroot_args /* {
813 		syscallarg(const char *) path;
814 	} */ *uap = v;
815 	struct filedesc *fdp = p->p_fd;
816 	struct vnode *old_cdir, *old_rdir;
817 	int error;
818 	struct nameidata nd;
819 
820 	if ((error = suser(p)) != 0)
821 		return (error);
822 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
823 	    SCARG(uap, path), p);
824 	if ((error = change_dir(&nd, p)) != 0)
825 		return (error);
826 	if (fdp->fd_rdir != NULL) {
827 		/*
828 		 * A chroot() done inside a changed root environment does
829 		 * an automatic chdir to avoid the out-of-tree experience.
830 		 */
831 		vref(nd.ni_vp);
832 		old_rdir = fdp->fd_rdir;
833 		old_cdir = fdp->fd_cdir;
834 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
835 		vrele(old_rdir);
836 		vrele(old_cdir);
837 	} else
838 		fdp->fd_rdir = nd.ni_vp;
839 	return (0);
840 }
841 
842 /*
843  * Common routine for chroot and chdir.
844  */
845 static int
846 change_dir(struct nameidata *ndp, struct proc *p)
847 {
848 	struct vnode *vp;
849 	int error;
850 
851 	if ((error = namei(ndp)) != 0)
852 		return (error);
853 	vp = ndp->ni_vp;
854 	if (vp->v_type != VDIR)
855 		error = ENOTDIR;
856 	else
857 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
858 	if (error)
859 		vput(vp);
860 	else
861 		VOP_UNLOCK(vp);
862 	return (error);
863 }
864 
865 int
866 sys___realpath(struct proc *p, void *v, register_t *retval)
867 {
868 	struct sys___realpath_args /* {
869 		syscallarg(const char *) pathname;
870 		syscallarg(char *) resolved;
871 	} */ *uap = v;
872 	char *pathname, *c;
873 	char *rpbuf;
874 	struct nameidata nd;
875 	size_t pathlen;
876 	int error = 0;
877 
878 	if (SCARG(uap, pathname) == NULL)
879 		return (EINVAL);
880 
881 	pathname = pool_get(&namei_pool, PR_WAITOK);
882 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
883 
884 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
885 	    &pathlen)))
886 		goto end;
887 
888 	if (pathlen == 1) { /* empty string "" */
889 		error = ENOENT;
890 		goto end;
891 	}
892 	if (pathlen < 2) {
893 		error = EINVAL;
894 		goto end;
895 	}
896 
897 	/* Get cwd for relative path if needed, prepend to rpbuf */
898 	rpbuf[0] = '\0';
899 	if (pathname[0] != '/') {
900 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
901 		char *cwdbuf, *bp;
902 
903 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
904 
905 		/* vfs_getcwd_common fills this in backwards */
906 		bp = &cwdbuf[cwdlen - 1];
907 		*bp = '\0';
908 
909 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
910 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
911 
912 		if (error) {
913 			free(cwdbuf, M_TEMP, cwdlen);
914 			goto end;
915 		}
916 
917 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
918 			free(cwdbuf, M_TEMP, cwdlen);
919 			error = ENAMETOOLONG;
920 			goto end;
921 		}
922 
923 		free(cwdbuf, M_TEMP, cwdlen);
924 	}
925 
926 	/* find root "/" or "//" */
927 	for (c = pathname; *c != '\0'; c++) {
928 		if (*c != '/')
929 			break;
930 	}
931 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME | REALPATH,
932 	    UIO_SYSSPACE, pathname, p);
933 
934 	nd.ni_cnd.cn_rpbuf = rpbuf;
935 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
936 
937 	nd.ni_pledge = PLEDGE_RPATH;
938 	nd.ni_unveil = UNVEIL_READ;
939 	if ((error = namei(&nd)) != 0)
940 		goto end;
941 
942 	/* release lock and reference from namei */
943 	if (nd.ni_vp) {
944 		VOP_UNLOCK(nd.ni_vp);
945 		vrele(nd.ni_vp);
946 	}
947 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
948 	    MAXPATHLEN, NULL);
949 
950 #ifdef KTRACE
951 	if (KTRPOINT(p, KTR_NAMEI))
952 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
953 #endif
954 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
955 end:
956 	pool_put(&namei_pool, rpbuf);
957 	pool_put(&namei_pool, pathname);
958 	return (error);
959 }
960 
961 int
962 sys_unveil(struct proc *p, void *v, register_t *retval)
963 {
964 	struct sys_unveil_args /* {
965 		syscallarg(const char *) path;
966 		syscallarg(const char *) permissions;
967 	} */ *uap = v;
968 	struct process *pr = p->p_p;
969 	char *pathname, *c;
970 	struct nameidata nd;
971 	size_t pathlen;
972 	char permissions[5];
973 	int error, allow;
974 
975 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
976 		pr->ps_uvdone = 1;
977 		return (0);
978 	}
979 
980 	if (pr->ps_uvdone != 0)
981 		return EPERM;
982 
983 	error = copyinstr(SCARG(uap, permissions), permissions,
984 	    sizeof(permissions), NULL);
985 	if (error)
986 		return (error);
987 	pathname = pool_get(&namei_pool, PR_WAITOK);
988 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
989 	if (error)
990 		goto end;
991 
992 #ifdef KTRACE
993 	if (KTRPOINT(p, KTR_STRUCT))
994 		ktrstruct(p, "unveil", permissions, strlen(permissions));
995 #endif
996 	if (pathlen < 2) {
997 		error = EINVAL;
998 		goto end;
999 	}
1000 
1001 	/* find root "/" or "//" */
1002 	for (c = pathname; *c != '\0'; c++) {
1003 		if (*c != '/')
1004 			break;
1005 	}
1006 	if (*c == '\0')
1007 		/* root directory */
1008 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
1009 		    UIO_SYSSPACE, pathname, p);
1010 	else
1011 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
1012 		    UIO_SYSSPACE, pathname, p);
1013 
1014 	nd.ni_pledge = PLEDGE_UNVEIL;
1015 	if ((error = namei(&nd)) != 0)
1016 		goto ndfree;
1017 
1018 	/*
1019 	 * XXX Any access to the file or directory will allow us to
1020 	 * pledge path it
1021 	 */
1022 	allow = ((nd.ni_vp &&
1023 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1024 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1025 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1026 	    (nd.ni_dvp &&
1027 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1028 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1029 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1030 
1031 	/* release lock from namei, but keep ref */
1032 	if (nd.ni_vp)
1033 		VOP_UNLOCK(nd.ni_vp);
1034 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1035 		VOP_UNLOCK(nd.ni_dvp);
1036 
1037 	if (allow) {
1038 		error = unveil_add(p, &nd, permissions);
1039 		pr->ps_uvpcwd = unveil_lookup(p->p_fd->fd_cdir, pr, NULL);
1040 		if (pr->ps_uvpcwd == NULL) {
1041 			ssize_t i = unveil_find_cover(p->p_fd->fd_cdir, p);
1042 			if (i >= 0)
1043 				pr->ps_uvpcwd = &pr->ps_uvpaths[i];
1044 		}
1045 	}
1046 	else
1047 		error = EPERM;
1048 
1049 	/* release vref from namei, but not vref from unveil_add */
1050 	if (nd.ni_vp)
1051 		vrele(nd.ni_vp);
1052 	if (nd.ni_dvp)
1053 		vrele(nd.ni_dvp);
1054 
1055 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1056 ndfree:
1057 	unveil_free_traversed_vnodes(&nd);
1058 end:
1059 	pool_put(&namei_pool, pathname);
1060 
1061 	return (error);
1062 }
1063 
1064 /*
1065  * Check permissions, allocate an open file structure,
1066  * and call the device open routine if any.
1067  */
1068 int
1069 sys_open(struct proc *p, void *v, register_t *retval)
1070 {
1071 	struct sys_open_args /* {
1072 		syscallarg(const char *) path;
1073 		syscallarg(int) flags;
1074 		syscallarg(mode_t) mode;
1075 	} */ *uap = v;
1076 
1077 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1078 	    SCARG(uap, mode), retval));
1079 }
1080 
1081 int
1082 sys_openat(struct proc *p, void *v, register_t *retval)
1083 {
1084 	struct sys_openat_args /* {
1085 		syscallarg(int) fd;
1086 		syscallarg(const char *) path;
1087 		syscallarg(int) flags;
1088 		syscallarg(mode_t) mode;
1089 	} */ *uap = v;
1090 
1091 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1092 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1093 }
1094 
1095 int
1096 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1097     register_t *retval)
1098 {
1099 	struct filedesc *fdp = p->p_fd;
1100 	struct file *fp;
1101 	struct vnode *vp;
1102 	struct vattr vattr;
1103 	int flags, cloexec, cmode;
1104 	int type, indx, error, localtrunc = 0;
1105 	struct flock lf;
1106 	struct nameidata nd;
1107 	uint64_t ni_pledge = 0;
1108 	u_char ni_unveil = 0;
1109 
1110 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1111 		error = pledge_flock(p);
1112 		if (error != 0)
1113 			return (error);
1114 	}
1115 
1116 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1117 
1118 	fdplock(fdp);
1119 	if ((error = falloc(p, &fp, &indx)) != 0)
1120 		goto out;
1121 	fdpunlock(fdp);
1122 
1123 	flags = FFLAGS(oflags);
1124 	if (flags & FREAD) {
1125 		ni_pledge |= PLEDGE_RPATH;
1126 		ni_unveil |= UNVEIL_READ;
1127 	}
1128 	if (flags & FWRITE) {
1129 		ni_pledge |= PLEDGE_WPATH;
1130 		ni_unveil |= UNVEIL_WRITE;
1131 	}
1132 	if (oflags & O_CREAT) {
1133 		ni_pledge |= PLEDGE_CPATH;
1134 		ni_unveil |= UNVEIL_CREATE;
1135 	}
1136 
1137 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1138 	if ((p->p_p->ps_flags & PS_PLEDGE))
1139 		cmode &= ACCESSPERMS;
1140 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1141 	nd.ni_pledge = ni_pledge;
1142 	nd.ni_unveil = ni_unveil;
1143 	p->p_dupfd = -1;			/* XXX check for fdopen */
1144 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1145 		localtrunc = 1;
1146 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1147 	}
1148 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1149 		fdplock(fdp);
1150 		if (error == ENODEV &&
1151 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1152 		    (error =
1153 			dupfdopen(p, indx, flags)) == 0) {
1154 			closef(fp, p);
1155 			*retval = indx;
1156 			goto out;
1157 		}
1158 		if (error == ERESTART)
1159 			error = EINTR;
1160 		fdremove(fdp, indx);
1161 		closef(fp, p);
1162 		goto out;
1163 	}
1164 	p->p_dupfd = 0;
1165 	vp = nd.ni_vp;
1166 	fp->f_flag = flags & FMASK;
1167 	fp->f_type = DTYPE_VNODE;
1168 	fp->f_ops = &vnops;
1169 	fp->f_data = vp;
1170 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1171 		lf.l_whence = SEEK_SET;
1172 		lf.l_start = 0;
1173 		lf.l_len = 0;
1174 		if (flags & O_EXLOCK)
1175 			lf.l_type = F_WRLCK;
1176 		else
1177 			lf.l_type = F_RDLCK;
1178 		type = F_FLOCK;
1179 		if ((flags & FNONBLOCK) == 0)
1180 			type |= F_WAIT;
1181 		VOP_UNLOCK(vp);
1182 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1183 		if (error) {
1184 			fdplock(fdp);
1185 			/* closef will vn_close the file for us. */
1186 			fdremove(fdp, indx);
1187 			closef(fp, p);
1188 			goto out;
1189 		}
1190 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1191 		fp->f_iflags |= FIF_HASLOCK;
1192 	}
1193 	if (localtrunc) {
1194 		if ((fp->f_flag & FWRITE) == 0)
1195 			error = EACCES;
1196 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1197 			error = EROFS;
1198 		else if (vp->v_type == VDIR)
1199 			error = EISDIR;
1200 		else if ((error = vn_writechk(vp)) == 0) {
1201 			VATTR_NULL(&vattr);
1202 			vattr.va_size = 0;
1203 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1204 		}
1205 		if (error) {
1206 			VOP_UNLOCK(vp);
1207 			fdplock(fdp);
1208 			/* closef will close the file for us. */
1209 			fdremove(fdp, indx);
1210 			closef(fp, p);
1211 			goto out;
1212 		}
1213 	}
1214 	VOP_UNLOCK(vp);
1215 	*retval = indx;
1216 	fdplock(fdp);
1217 	fdinsert(fdp, indx, cloexec, fp);
1218 	FRELE(fp, p);
1219 out:
1220 	fdpunlock(fdp);
1221 	return (error);
1222 }
1223 
1224 /*
1225  * Open a new created file (in /tmp) suitable for mmaping.
1226  */
1227 int
1228 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1229 {
1230 	struct sys___tmpfd_args /* {
1231 		syscallarg(int) flags;
1232 	} */ *uap = v;
1233 	struct filedesc *fdp = p->p_fd;
1234 	struct file *fp;
1235 	struct vnode *vp;
1236 	int oflags = SCARG(uap, flags);
1237 	int flags, cloexec, cmode;
1238 	int indx, error;
1239 	unsigned int i;
1240 	struct nameidata nd;
1241 	char path[64];
1242 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1243 
1244 	/* most flags are hardwired */
1245 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1246 
1247 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1248 
1249 	fdplock(fdp);
1250 	if ((error = falloc(p, &fp, &indx)) != 0)
1251 		goto out;
1252 	fdpunlock(fdp);
1253 
1254 	flags = FFLAGS(oflags);
1255 
1256 	arc4random_buf(path, sizeof(path));
1257 	memcpy(path, "/tmp/", 5);
1258 	for (i = 5; i < sizeof(path) - 1; i++)
1259 		path[i] = letters[(unsigned char)path[i] & 63];
1260 	path[sizeof(path)-1] = 0;
1261 
1262 	cmode = 0600;
1263 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1264 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1265 		fdplock(fdp);
1266 		if (error == ERESTART)
1267 			error = EINTR;
1268 		fdremove(fdp, indx);
1269 		closef(fp, p);
1270 		goto out;
1271 	}
1272 	vp = nd.ni_vp;
1273 	fp->f_flag = flags & FMASK;
1274 	fp->f_type = DTYPE_VNODE;
1275 	fp->f_ops = &vnops;
1276 	fp->f_data = vp;
1277 	VOP_UNLOCK(vp);
1278 	*retval = indx;
1279 	fdplock(fdp);
1280 	fdinsert(fdp, indx, cloexec, fp);
1281 	FRELE(fp, p);
1282 
1283 	/* unlink it */
1284 	/* XXX
1285 	 * there is a wee race here, although it is mostly inconsequential.
1286 	 * perhaps someday we can create a file like object without a name...
1287 	 */
1288 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1289 	    AT_FDCWD, path, p);
1290 	if ((error = namei(&nd)) != 0) {
1291 		printf("can't unlink temp file! %d\n", error);
1292 		error = 0;
1293 	} else {
1294 		vp = nd.ni_vp;
1295 		uvm_vnp_uncache(vp);
1296 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1297 		if (error) {
1298 			printf("error removing vop: %d\n", error);
1299 			error = 0;
1300 		}
1301 	}
1302 
1303 out:
1304 	fdpunlock(fdp);
1305 	return (error);
1306 }
1307 
1308 /*
1309  * Get file handle system call
1310  */
1311 int
1312 sys_getfh(struct proc *p, void *v, register_t *retval)
1313 {
1314 	struct sys_getfh_args /* {
1315 		syscallarg(const char *) fname;
1316 		syscallarg(fhandle_t *) fhp;
1317 	} */ *uap = v;
1318 	struct vnode *vp;
1319 	fhandle_t fh;
1320 	int error;
1321 	struct nameidata nd;
1322 
1323 	/*
1324 	 * Must be super user
1325 	 */
1326 	error = suser(p);
1327 	if (error)
1328 		return (error);
1329 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1330 	    SCARG(uap, fname), p);
1331 	error = namei(&nd);
1332 	if (error)
1333 		return (error);
1334 	vp = nd.ni_vp;
1335 	memset(&fh, 0, sizeof(fh));
1336 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1337 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1338 	vput(vp);
1339 	if (error)
1340 		return (error);
1341 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1342 	return (error);
1343 }
1344 
1345 /*
1346  * Open a file given a file handle.
1347  *
1348  * Check permissions, allocate an open file structure,
1349  * and call the device open routine if any.
1350  */
1351 int
1352 sys_fhopen(struct proc *p, void *v, register_t *retval)
1353 {
1354 	struct sys_fhopen_args /* {
1355 		syscallarg(const fhandle_t *) fhp;
1356 		syscallarg(int) flags;
1357 	} */ *uap = v;
1358 	struct filedesc *fdp = p->p_fd;
1359 	struct file *fp;
1360 	struct vnode *vp = NULL;
1361 	struct mount *mp;
1362 	struct ucred *cred = p->p_ucred;
1363 	int flags, cloexec;
1364 	int type, indx, error=0;
1365 	struct flock lf;
1366 	struct vattr va;
1367 	fhandle_t fh;
1368 
1369 	/*
1370 	 * Must be super user
1371 	 */
1372 	if ((error = suser(p)))
1373 		return (error);
1374 
1375 	flags = FFLAGS(SCARG(uap, flags));
1376 	if ((flags & (FREAD | FWRITE)) == 0)
1377 		return (EINVAL);
1378 	if ((flags & O_CREAT))
1379 		return (EINVAL);
1380 
1381 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1382 
1383 	fdplock(fdp);
1384 	if ((error = falloc(p, &fp, &indx)) != 0) {
1385 		fp = NULL;
1386 		goto bad;
1387 	}
1388 
1389 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1390 		goto bad;
1391 
1392 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1393 		error = ESTALE;
1394 		goto bad;
1395 	}
1396 
1397 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1398 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1399 		goto bad;
1400 	}
1401 
1402 	/* Now do an effective vn_open */
1403 
1404 	if (vp->v_type == VSOCK) {
1405 		error = EOPNOTSUPP;
1406 		goto bad;
1407 	}
1408 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1409 		error = ENOTDIR;
1410 		goto bad;
1411 	}
1412 	if (flags & FREAD) {
1413 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1414 			goto bad;
1415 	}
1416 	if (flags & (FWRITE | O_TRUNC)) {
1417 		if (vp->v_type == VDIR) {
1418 			error = EISDIR;
1419 			goto bad;
1420 		}
1421 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1422 		    (error = vn_writechk(vp)) != 0)
1423 			goto bad;
1424 	}
1425 	if (flags & O_TRUNC) {
1426 		VATTR_NULL(&va);
1427 		va.va_size = 0;
1428 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1429 			goto bad;
1430 	}
1431 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1432 		goto bad;
1433 	if (flags & FWRITE)
1434 		vp->v_writecount++;
1435 
1436 	/* done with modified vn_open, now finish what sys_open does. */
1437 
1438 	fp->f_flag = flags & FMASK;
1439 	fp->f_type = DTYPE_VNODE;
1440 	fp->f_ops = &vnops;
1441 	fp->f_data = vp;
1442 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1443 		lf.l_whence = SEEK_SET;
1444 		lf.l_start = 0;
1445 		lf.l_len = 0;
1446 		if (flags & O_EXLOCK)
1447 			lf.l_type = F_WRLCK;
1448 		else
1449 			lf.l_type = F_RDLCK;
1450 		type = F_FLOCK;
1451 		if ((flags & FNONBLOCK) == 0)
1452 			type |= F_WAIT;
1453 		VOP_UNLOCK(vp);
1454 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1455 		if (error) {
1456 			vp = NULL;	/* closef will vn_close the file */
1457 			goto bad;
1458 		}
1459 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1460 		fp->f_iflags |= FIF_HASLOCK;
1461 	}
1462 	VOP_UNLOCK(vp);
1463 	*retval = indx;
1464 	fdinsert(fdp, indx, cloexec, fp);
1465 	fdpunlock(fdp);
1466 	FRELE(fp, p);
1467 	return (0);
1468 
1469 bad:
1470 	if (fp) {
1471 		fdremove(fdp, indx);
1472 		closef(fp, p);
1473 		if (vp != NULL)
1474 			vput(vp);
1475 	}
1476 	fdpunlock(fdp);
1477 	return (error);
1478 }
1479 
1480 int
1481 sys_fhstat(struct proc *p, void *v, register_t *retval)
1482 {
1483 	struct sys_fhstat_args /* {
1484 		syscallarg(const fhandle_t *) fhp;
1485 		syscallarg(struct stat *) sb;
1486 	} */ *uap = v;
1487 	struct stat sb;
1488 	int error;
1489 	fhandle_t fh;
1490 	struct mount *mp;
1491 	struct vnode *vp;
1492 
1493 	/*
1494 	 * Must be super user
1495 	 */
1496 	if ((error = suser(p)))
1497 		return (error);
1498 
1499 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1500 		return (error);
1501 
1502 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1503 		return (ESTALE);
1504 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1505 		return (error);
1506 	error = vn_stat(vp, &sb, p);
1507 	vput(vp);
1508 	if (error)
1509 		return (error);
1510 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1511 	return (error);
1512 }
1513 
1514 int
1515 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1516 {
1517 	struct sys_fhstatfs_args /* {
1518 		syscallarg(const fhandle_t *) fhp;
1519 		syscallarg(struct statfs *) buf;
1520 	} */ *uap = v;
1521 	struct statfs *sp;
1522 	fhandle_t fh;
1523 	struct mount *mp;
1524 	struct vnode *vp;
1525 	int error;
1526 
1527 	/*
1528 	 * Must be super user
1529 	 */
1530 	if ((error = suser(p)))
1531 		return (error);
1532 
1533 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1534 		return (error);
1535 
1536 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1537 		return (ESTALE);
1538 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1539 		return (error);
1540 	mp = vp->v_mount;
1541 	sp = &mp->mnt_stat;
1542 	vput(vp);
1543 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1544 		return (error);
1545 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1546 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1547 }
1548 
1549 /*
1550  * Create a special file or named pipe.
1551  */
1552 int
1553 sys_mknod(struct proc *p, void *v, register_t *retval)
1554 {
1555 	struct sys_mknod_args /* {
1556 		syscallarg(const char *) path;
1557 		syscallarg(mode_t) mode;
1558 		syscallarg(int) dev;
1559 	} */ *uap = v;
1560 
1561 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1562 	    SCARG(uap, dev)));
1563 }
1564 
1565 int
1566 sys_mknodat(struct proc *p, void *v, register_t *retval)
1567 {
1568 	struct sys_mknodat_args /* {
1569 		syscallarg(int) fd;
1570 		syscallarg(const char *) path;
1571 		syscallarg(mode_t) mode;
1572 		syscallarg(dev_t) dev;
1573 	} */ *uap = v;
1574 
1575 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1576 	    SCARG(uap, mode), SCARG(uap, dev)));
1577 }
1578 
1579 int
1580 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1581 {
1582 	struct vnode *vp;
1583 	struct vattr vattr;
1584 	int error;
1585 	struct nameidata nd;
1586 
1587 	if (dev == VNOVAL)
1588 		return (EINVAL);
1589 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1590 	nd.ni_pledge = PLEDGE_DPATH;
1591 	nd.ni_unveil = UNVEIL_CREATE;
1592 	if ((error = namei(&nd)) != 0)
1593 		return (error);
1594 	vp = nd.ni_vp;
1595 	if (!S_ISFIFO(mode) || dev != 0) {
1596 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1597 			goto out;
1598 		if (p->p_fd->fd_rdir) {
1599 			error = EINVAL;
1600 			goto out;
1601 		}
1602 	}
1603 	if (vp != NULL)
1604 		error = EEXIST;
1605 	else {
1606 		VATTR_NULL(&vattr);
1607 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1608 		if ((p->p_p->ps_flags & PS_PLEDGE))
1609 			vattr.va_mode &= ACCESSPERMS;
1610 		vattr.va_rdev = dev;
1611 
1612 		switch (mode & S_IFMT) {
1613 		case S_IFMT:	/* used by badsect to flag bad sectors */
1614 			vattr.va_type = VBAD;
1615 			break;
1616 		case S_IFCHR:
1617 			vattr.va_type = VCHR;
1618 			break;
1619 		case S_IFBLK:
1620 			vattr.va_type = VBLK;
1621 			break;
1622 		case S_IFIFO:
1623 #ifndef FIFO
1624 			error = EOPNOTSUPP;
1625 			break;
1626 #else
1627 			if (dev == 0) {
1628 				vattr.va_type = VFIFO;
1629 				break;
1630 			}
1631 			/* FALLTHROUGH */
1632 #endif /* FIFO */
1633 		default:
1634 			error = EINVAL;
1635 			break;
1636 		}
1637 	}
1638 out:
1639 	if (!error) {
1640 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1641 		vput(nd.ni_dvp);
1642 	} else {
1643 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1644 		if (nd.ni_dvp == vp)
1645 			vrele(nd.ni_dvp);
1646 		else
1647 			vput(nd.ni_dvp);
1648 		if (vp)
1649 			vrele(vp);
1650 	}
1651 	return (error);
1652 }
1653 
1654 /*
1655  * Create a named pipe.
1656  */
1657 int
1658 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1659 {
1660 	struct sys_mkfifo_args /* {
1661 		syscallarg(const char *) path;
1662 		syscallarg(mode_t) mode;
1663 	} */ *uap = v;
1664 
1665 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1666 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1667 }
1668 
1669 int
1670 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1671 {
1672 	struct sys_mkfifoat_args /* {
1673 		syscallarg(int) fd;
1674 		syscallarg(const char *) path;
1675 		syscallarg(mode_t) mode;
1676 	} */ *uap = v;
1677 
1678 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1679 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1680 }
1681 
1682 /*
1683  * Make a hard file link.
1684  */
1685 int
1686 sys_link(struct proc *p, void *v, register_t *retval)
1687 {
1688 	struct sys_link_args /* {
1689 		syscallarg(const char *) path;
1690 		syscallarg(const char *) link;
1691 	} */ *uap = v;
1692 
1693 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1694 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1695 }
1696 
1697 int
1698 sys_linkat(struct proc *p, void *v, register_t *retval)
1699 {
1700 	struct sys_linkat_args /* {
1701 		syscallarg(int) fd1;
1702 		syscallarg(const char *) path1;
1703 		syscallarg(int) fd2;
1704 		syscallarg(const char *) path2;
1705 		syscallarg(int) flag;
1706 	} */ *uap = v;
1707 
1708 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1709 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1710 }
1711 
1712 int
1713 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1714     const char *path2, int flag)
1715 {
1716 	struct vnode *vp;
1717 	struct nameidata nd;
1718 	int error, follow;
1719 	int flags;
1720 
1721 	if (flag & ~AT_SYMLINK_FOLLOW)
1722 		return (EINVAL);
1723 
1724 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1725 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1726 	nd.ni_pledge = PLEDGE_RPATH;
1727 	nd.ni_unveil = UNVEIL_READ;
1728 	if ((error = namei(&nd)) != 0)
1729 		return (error);
1730 	vp = nd.ni_vp;
1731 
1732 	flags = LOCKPARENT;
1733 	if (vp->v_type == VDIR) {
1734 		flags |= STRIPSLASHES;
1735 	}
1736 
1737 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1738 	nd.ni_pledge = PLEDGE_CPATH;
1739 	nd.ni_unveil = UNVEIL_CREATE;
1740 	if ((error = namei(&nd)) != 0)
1741 		goto out;
1742 	if (nd.ni_vp) {
1743 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1744 		if (nd.ni_dvp == nd.ni_vp)
1745 			vrele(nd.ni_dvp);
1746 		else
1747 			vput(nd.ni_dvp);
1748 		vrele(nd.ni_vp);
1749 		error = EEXIST;
1750 		goto out;
1751 	}
1752 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1753 out:
1754 	vrele(vp);
1755 	return (error);
1756 }
1757 
1758 /*
1759  * Make a symbolic link.
1760  */
1761 int
1762 sys_symlink(struct proc *p, void *v, register_t *retval)
1763 {
1764 	struct sys_symlink_args /* {
1765 		syscallarg(const char *) path;
1766 		syscallarg(const char *) link;
1767 	} */ *uap = v;
1768 
1769 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1770 }
1771 
1772 int
1773 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1774 {
1775 	struct sys_symlinkat_args /* {
1776 		syscallarg(const char *) path;
1777 		syscallarg(int) fd;
1778 		syscallarg(const char *) link;
1779 	} */ *uap = v;
1780 
1781 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1782 	    SCARG(uap, link)));
1783 }
1784 
1785 int
1786 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1787 {
1788 	struct vattr vattr;
1789 	char *path;
1790 	int error;
1791 	struct nameidata nd;
1792 
1793 	path = pool_get(&namei_pool, PR_WAITOK);
1794 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1795 	if (error)
1796 		goto out;
1797 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1798 	nd.ni_pledge = PLEDGE_CPATH;
1799 	nd.ni_unveil = UNVEIL_CREATE;
1800 	if ((error = namei(&nd)) != 0)
1801 		goto out;
1802 	if (nd.ni_vp) {
1803 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1804 		if (nd.ni_dvp == nd.ni_vp)
1805 			vrele(nd.ni_dvp);
1806 		else
1807 			vput(nd.ni_dvp);
1808 		vrele(nd.ni_vp);
1809 		error = EEXIST;
1810 		goto out;
1811 	}
1812 	VATTR_NULL(&vattr);
1813 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1814 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1815 out:
1816 	pool_put(&namei_pool, path);
1817 	return (error);
1818 }
1819 
1820 /*
1821  * Delete a name from the filesystem.
1822  */
1823 int
1824 sys_unlink(struct proc *p, void *v, register_t *retval)
1825 {
1826 	struct sys_unlink_args /* {
1827 		syscallarg(const char *) path;
1828 	} */ *uap = v;
1829 
1830 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1831 }
1832 
1833 int
1834 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1835 {
1836 	struct sys_unlinkat_args /* {
1837 		syscallarg(int) fd;
1838 		syscallarg(const char *) path;
1839 		syscallarg(int) flag;
1840 	} */ *uap = v;
1841 
1842 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1843 	    SCARG(uap, flag)));
1844 }
1845 
1846 int
1847 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1848 {
1849 	struct vnode *vp;
1850 	int error;
1851 	struct nameidata nd;
1852 
1853 	if (flag & ~AT_REMOVEDIR)
1854 		return (EINVAL);
1855 
1856 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1857 	    fd, path, p);
1858 	nd.ni_pledge = PLEDGE_CPATH;
1859 	nd.ni_unveil = UNVEIL_CREATE;
1860 	if ((error = namei(&nd)) != 0)
1861 		return (error);
1862 	vp = nd.ni_vp;
1863 
1864 	if (flag & AT_REMOVEDIR) {
1865 		if (vp->v_type != VDIR) {
1866 			error = ENOTDIR;
1867 			goto out;
1868 		}
1869 		/*
1870 		 * No rmdir "." please.
1871 		 */
1872 		if (nd.ni_dvp == vp) {
1873 			error = EINVAL;
1874 			goto out;
1875 		}
1876 		/*
1877 		 * A mounted on directory cannot be deleted.
1878 		 */
1879 		if (vp->v_mountedhere != NULL) {
1880 			error = EBUSY;
1881 			goto out;
1882 		}
1883 	}
1884 
1885 	/*
1886 	 * The root of a mounted filesystem cannot be deleted.
1887 	 */
1888 	if (vp->v_flag & VROOT)
1889 		error = EBUSY;
1890 out:
1891 	if (!error) {
1892 		if (flag & AT_REMOVEDIR) {
1893 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1894 		} else {
1895 			(void)uvm_vnp_uncache(vp);
1896 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1897 		}
1898 	} else {
1899 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1900 		if (nd.ni_dvp == vp)
1901 			vrele(nd.ni_dvp);
1902 		else
1903 			vput(nd.ni_dvp);
1904 		vput(vp);
1905 	}
1906 	return (error);
1907 }
1908 
1909 /*
1910  * Reposition read/write file offset.
1911  */
1912 int
1913 sys_lseek(struct proc *p, void *v, register_t *retval)
1914 {
1915 	struct sys_lseek_args /* {
1916 		syscallarg(int) fd;
1917 		syscallarg(int) pad;
1918 		syscallarg(off_t) offset;
1919 		syscallarg(int) whence;
1920 	} */ *uap = v;
1921 	struct filedesc *fdp = p->p_fd;
1922 	struct file *fp;
1923 	off_t offset;
1924 	int error;
1925 
1926 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1927 		return (EBADF);
1928 	if (fp->f_ops->fo_seek == NULL) {
1929 		error = ESPIPE;
1930 		goto bad;
1931 	}
1932 	offset = SCARG(uap, offset);
1933 
1934 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1935 	if (error)
1936 		goto bad;
1937 
1938 	*(off_t *)retval = offset;
1939 	mtx_enter(&fp->f_mtx);
1940 	fp->f_seek++;
1941 	mtx_leave(&fp->f_mtx);
1942 	error = 0;
1943  bad:
1944 	FRELE(fp, p);
1945 	return (error);
1946 }
1947 
1948 /*
1949  * Check access permissions.
1950  */
1951 int
1952 sys_access(struct proc *p, void *v, register_t *retval)
1953 {
1954 	struct sys_access_args /* {
1955 		syscallarg(const char *) path;
1956 		syscallarg(int) amode;
1957 	} */ *uap = v;
1958 
1959 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1960 	    SCARG(uap, amode), 0));
1961 }
1962 
1963 int
1964 sys_faccessat(struct proc *p, void *v, register_t *retval)
1965 {
1966 	struct sys_faccessat_args /* {
1967 		syscallarg(int) fd;
1968 		syscallarg(const char *) path;
1969 		syscallarg(int) amode;
1970 		syscallarg(int) flag;
1971 	} */ *uap = v;
1972 
1973 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1974 	    SCARG(uap, amode), SCARG(uap, flag)));
1975 }
1976 
1977 int
1978 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1979 {
1980 	struct vnode *vp;
1981 	struct ucred *newcred, *oldcred;
1982 	struct nameidata nd;
1983 	int error;
1984 
1985 	if (amode & ~(R_OK | W_OK | X_OK))
1986 		return (EINVAL);
1987 	if (flag & ~AT_EACCESS)
1988 		return (EINVAL);
1989 
1990 	newcred = NULL;
1991 	oldcred = p->p_ucred;
1992 
1993 	/*
1994 	 * If access as real ids was requested and they really differ,
1995 	 * give the thread new creds with them reset
1996 	 */
1997 	if ((flag & AT_EACCESS) == 0 &&
1998 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1999 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
2000 		p->p_ucred = newcred = crdup(oldcred);
2001 		newcred->cr_uid = newcred->cr_ruid;
2002 		newcred->cr_gid = newcred->cr_rgid;
2003 	}
2004 
2005 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2006 	nd.ni_pledge = PLEDGE_RPATH;
2007 	nd.ni_unveil = UNVEIL_READ;
2008 	if ((error = namei(&nd)) != 0)
2009 		goto out;
2010 	vp = nd.ni_vp;
2011 
2012 	/* Flags == 0 means only check for existence. */
2013 	if (amode) {
2014 		int vflags = 0;
2015 
2016 		if (amode & R_OK)
2017 			vflags |= VREAD;
2018 		if (amode & W_OK)
2019 			vflags |= VWRITE;
2020 		if (amode & X_OK)
2021 			vflags |= VEXEC;
2022 
2023 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2024 		if (!error && (vflags & VWRITE))
2025 			error = vn_writechk(vp);
2026 	}
2027 	vput(vp);
2028 out:
2029 	if (newcred != NULL) {
2030 		p->p_ucred = oldcred;
2031 		crfree(newcred);
2032 	}
2033 	return (error);
2034 }
2035 
2036 /*
2037  * Get file status; this version follows links.
2038  */
2039 int
2040 sys_stat(struct proc *p, void *v, register_t *retval)
2041 {
2042 	struct sys_stat_args /* {
2043 		syscallarg(const char *) path;
2044 		syscallarg(struct stat *) ub;
2045 	} */ *uap = v;
2046 
2047 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2048 }
2049 
2050 int
2051 sys_fstatat(struct proc *p, void *v, register_t *retval)
2052 {
2053 	struct sys_fstatat_args /* {
2054 		syscallarg(int) fd;
2055 		syscallarg(const char *) path;
2056 		syscallarg(struct stat *) buf;
2057 		syscallarg(int) flag;
2058 	} */ *uap = v;
2059 
2060 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2061 	    SCARG(uap, buf), SCARG(uap, flag)));
2062 }
2063 
2064 int
2065 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2066 {
2067 	struct stat sb;
2068 	int error, follow;
2069 	struct nameidata nd;
2070 
2071 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2072 		return (EINVAL);
2073 
2074 
2075 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2076 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2077 	nd.ni_pledge = PLEDGE_RPATH;
2078 	nd.ni_unveil = UNVEIL_READ;
2079 	if ((error = namei(&nd)) != 0)
2080 		return (error);
2081 	error = vn_stat(nd.ni_vp, &sb, p);
2082 	vput(nd.ni_vp);
2083 	if (error)
2084 		return (error);
2085 	/* Don't let non-root see generation numbers (for NFS security) */
2086 	if (suser(p))
2087 		sb.st_gen = 0;
2088 	error = copyout(&sb, buf, sizeof(sb));
2089 #ifdef KTRACE
2090 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2091 		ktrstat(p, &sb);
2092 #endif
2093 	return (error);
2094 }
2095 
2096 /*
2097  * Get file status; this version does not follow links.
2098  */
2099 int
2100 sys_lstat(struct proc *p, void *v, register_t *retval)
2101 {
2102 	struct sys_lstat_args /* {
2103 		syscallarg(const char *) path;
2104 		syscallarg(struct stat *) ub;
2105 	} */ *uap = v;
2106 
2107 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2108 	    AT_SYMLINK_NOFOLLOW));
2109 }
2110 
2111 /*
2112  * Get configurable pathname variables.
2113  */
2114 int
2115 sys_pathconf(struct proc *p, void *v, register_t *retval)
2116 {
2117 	struct sys_pathconf_args /* {
2118 		syscallarg(const char *) path;
2119 		syscallarg(int) name;
2120 	} */ *uap = v;
2121 	int error;
2122 	struct nameidata nd;
2123 
2124 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2125 	    SCARG(uap, path), p);
2126 	nd.ni_pledge = PLEDGE_RPATH;
2127 	nd.ni_unveil = UNVEIL_READ;
2128 	if ((error = namei(&nd)) != 0)
2129 		return (error);
2130 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2131 	vput(nd.ni_vp);
2132 	return (error);
2133 }
2134 
2135 /*
2136  * Return target name of a symbolic link.
2137  */
2138 int
2139 sys_readlink(struct proc *p, void *v, register_t *retval)
2140 {
2141 	struct sys_readlink_args /* {
2142 		syscallarg(const char *) path;
2143 		syscallarg(char *) buf;
2144 		syscallarg(size_t) count;
2145 	} */ *uap = v;
2146 
2147 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2148 	    SCARG(uap, count), retval));
2149 }
2150 
2151 int
2152 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2153 {
2154 	struct sys_readlinkat_args /* {
2155 		syscallarg(int) fd;
2156 		syscallarg(const char *) path;
2157 		syscallarg(char *) buf;
2158 		syscallarg(size_t) count;
2159 	} */ *uap = v;
2160 
2161 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2162 	    SCARG(uap, buf), SCARG(uap, count), retval));
2163 }
2164 
2165 int
2166 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2167     size_t count, register_t *retval)
2168 {
2169 	struct vnode *vp;
2170 	struct iovec aiov;
2171 	struct uio auio;
2172 	int error;
2173 	struct nameidata nd;
2174 
2175 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2176 	nd.ni_pledge = PLEDGE_RPATH;
2177 	nd.ni_unveil = UNVEIL_READ;
2178 	if ((error = namei(&nd)) != 0)
2179 		return (error);
2180 	vp = nd.ni_vp;
2181 	if (vp->v_type != VLNK)
2182 		error = EINVAL;
2183 	else {
2184 		aiov.iov_base = buf;
2185 		aiov.iov_len = count;
2186 		auio.uio_iov = &aiov;
2187 		auio.uio_iovcnt = 1;
2188 		auio.uio_offset = 0;
2189 		auio.uio_rw = UIO_READ;
2190 		auio.uio_segflg = UIO_USERSPACE;
2191 		auio.uio_procp = p;
2192 		auio.uio_resid = count;
2193 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2194 		*retval = count - auio.uio_resid;
2195 	}
2196 	vput(vp);
2197 	return (error);
2198 }
2199 
2200 /*
2201  * Change flags of a file given a path name.
2202  */
2203 int
2204 sys_chflags(struct proc *p, void *v, register_t *retval)
2205 {
2206 	struct sys_chflags_args /* {
2207 		syscallarg(const char *) path;
2208 		syscallarg(u_int) flags;
2209 	} */ *uap = v;
2210 
2211 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2212 	    SCARG(uap, flags), 0));
2213 }
2214 
2215 int
2216 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2217 {
2218 	struct sys_chflagsat_args /* {
2219 		syscallarg(int) fd;
2220 		syscallarg(const char *) path;
2221 		syscallarg(u_int) flags;
2222 		syscallarg(int) atflags;
2223 	} */ *uap = v;
2224 
2225 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2226 	    SCARG(uap, flags), SCARG(uap, atflags)));
2227 }
2228 
2229 int
2230 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2231 {
2232 	struct nameidata nd;
2233 	int error, follow;
2234 
2235 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2236 		return (EINVAL);
2237 
2238 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2239 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2240 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2241 	nd.ni_unveil = UNVEIL_WRITE;
2242 	if ((error = namei(&nd)) != 0)
2243 		return (error);
2244 	return (dovchflags(p, nd.ni_vp, flags));
2245 }
2246 
2247 /*
2248  * Change flags of a file given a file descriptor.
2249  */
2250 int
2251 sys_fchflags(struct proc *p, void *v, register_t *retval)
2252 {
2253 	struct sys_fchflags_args /* {
2254 		syscallarg(int) fd;
2255 		syscallarg(u_int) flags;
2256 	} */ *uap = v;
2257 	struct file *fp;
2258 	struct vnode *vp;
2259 	int error;
2260 
2261 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2262 		return (error);
2263 	vp = fp->f_data;
2264 	vref(vp);
2265 	FRELE(fp, p);
2266 	return (dovchflags(p, vp, SCARG(uap, flags)));
2267 }
2268 
2269 int
2270 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2271 {
2272 	struct vattr vattr;
2273 	int error;
2274 
2275 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2276 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2277 		error = EROFS;
2278 	else if (flags == VNOVAL)
2279 		error = EINVAL;
2280 	else {
2281 		if (suser(p)) {
2282 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2283 			    != 0)
2284 				goto out;
2285 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2286 				error = EINVAL;
2287 				goto out;
2288 			}
2289 		}
2290 		VATTR_NULL(&vattr);
2291 		vattr.va_flags = flags;
2292 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2293 	}
2294 out:
2295 	vput(vp);
2296 	return (error);
2297 }
2298 
2299 /*
2300  * Change mode of a file given path name.
2301  */
2302 int
2303 sys_chmod(struct proc *p, void *v, register_t *retval)
2304 {
2305 	struct sys_chmod_args /* {
2306 		syscallarg(const char *) path;
2307 		syscallarg(mode_t) mode;
2308 	} */ *uap = v;
2309 
2310 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2311 }
2312 
2313 int
2314 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2315 {
2316 	struct sys_fchmodat_args /* {
2317 		syscallarg(int) fd;
2318 		syscallarg(const char *) path;
2319 		syscallarg(mode_t) mode;
2320 		syscallarg(int) flag;
2321 	} */ *uap = v;
2322 
2323 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2324 	    SCARG(uap, mode), SCARG(uap, flag)));
2325 }
2326 
2327 int
2328 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2329 {
2330 	struct vnode *vp;
2331 	struct vattr vattr;
2332 	int error, follow;
2333 	struct nameidata nd;
2334 
2335 	if (mode & ~(S_IFMT | ALLPERMS))
2336 		return (EINVAL);
2337 	if ((p->p_p->ps_flags & PS_PLEDGE))
2338 		mode &= ACCESSPERMS;
2339 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2340 		return (EINVAL);
2341 
2342 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2343 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2344 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2345 	nd.ni_unveil = UNVEIL_WRITE;
2346 	if ((error = namei(&nd)) != 0)
2347 		return (error);
2348 	vp = nd.ni_vp;
2349 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2350 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2351 		error = EROFS;
2352 	else {
2353 		VATTR_NULL(&vattr);
2354 		vattr.va_mode = mode & ALLPERMS;
2355 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2356 	}
2357 	vput(vp);
2358 	return (error);
2359 }
2360 
2361 /*
2362  * Change mode of a file given a file descriptor.
2363  */
2364 int
2365 sys_fchmod(struct proc *p, void *v, register_t *retval)
2366 {
2367 	struct sys_fchmod_args /* {
2368 		syscallarg(int) fd;
2369 		syscallarg(mode_t) mode;
2370 	} */ *uap = v;
2371 	struct vattr vattr;
2372 	struct vnode *vp;
2373 	struct file *fp;
2374 	mode_t mode = SCARG(uap, mode);
2375 	int error;
2376 
2377 	if (mode & ~(S_IFMT | ALLPERMS))
2378 		return (EINVAL);
2379 	if ((p->p_p->ps_flags & PS_PLEDGE))
2380 		mode &= ACCESSPERMS;
2381 
2382 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2383 		return (error);
2384 	vp = fp->f_data;
2385 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2386 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2387 		error = EROFS;
2388 	else {
2389 		VATTR_NULL(&vattr);
2390 		vattr.va_mode = mode & ALLPERMS;
2391 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2392 	}
2393 	VOP_UNLOCK(vp);
2394 	FRELE(fp, p);
2395 	return (error);
2396 }
2397 
2398 /*
2399  * Set ownership given a path name.
2400  */
2401 int
2402 sys_chown(struct proc *p, void *v, register_t *retval)
2403 {
2404 	struct sys_chown_args /* {
2405 		syscallarg(const char *) path;
2406 		syscallarg(uid_t) uid;
2407 		syscallarg(gid_t) gid;
2408 	} */ *uap = v;
2409 
2410 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2411 	    SCARG(uap, gid), 0));
2412 }
2413 
2414 int
2415 sys_fchownat(struct proc *p, void *v, register_t *retval)
2416 {
2417 	struct sys_fchownat_args /* {
2418 		syscallarg(int) fd;
2419 		syscallarg(const char *) path;
2420 		syscallarg(uid_t) uid;
2421 		syscallarg(gid_t) gid;
2422 		syscallarg(int) flag;
2423 	} */ *uap = v;
2424 
2425 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2426 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2427 }
2428 
2429 int
2430 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2431     int flag)
2432 {
2433 	struct vnode *vp;
2434 	struct vattr vattr;
2435 	int error, follow;
2436 	struct nameidata nd;
2437 	mode_t mode;
2438 
2439 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2440 		return (EINVAL);
2441 
2442 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2443 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2444 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2445 	nd.ni_unveil = UNVEIL_WRITE;
2446 	if ((error = namei(&nd)) != 0)
2447 		return (error);
2448 	vp = nd.ni_vp;
2449 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2450 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2451 		error = EROFS;
2452 	else {
2453 		if ((error = pledge_chown(p, uid, gid)))
2454 			goto out;
2455 		if ((uid != -1 || gid != -1) &&
2456 		    !vnoperm(vp) &&
2457 		    (suser(p) || suid_clear)) {
2458 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2459 			if (error)
2460 				goto out;
2461 			mode = vattr.va_mode & ~(VSUID | VSGID);
2462 			if (mode == vattr.va_mode)
2463 				mode = VNOVAL;
2464 		} else
2465 			mode = VNOVAL;
2466 		VATTR_NULL(&vattr);
2467 		vattr.va_uid = uid;
2468 		vattr.va_gid = gid;
2469 		vattr.va_mode = mode;
2470 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2471 	}
2472 out:
2473 	vput(vp);
2474 	return (error);
2475 }
2476 
2477 /*
2478  * Set ownership given a path name, without following links.
2479  */
2480 int
2481 sys_lchown(struct proc *p, void *v, register_t *retval)
2482 {
2483 	struct sys_lchown_args /* {
2484 		syscallarg(const char *) path;
2485 		syscallarg(uid_t) uid;
2486 		syscallarg(gid_t) gid;
2487 	} */ *uap = v;
2488 	struct vnode *vp;
2489 	struct vattr vattr;
2490 	int error;
2491 	struct nameidata nd;
2492 	mode_t mode;
2493 	uid_t uid = SCARG(uap, uid);
2494 	gid_t gid = SCARG(uap, gid);
2495 
2496 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2497 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2498 	nd.ni_unveil = UNVEIL_WRITE;
2499 	if ((error = namei(&nd)) != 0)
2500 		return (error);
2501 	vp = nd.ni_vp;
2502 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2503 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2504 		error = EROFS;
2505 	else {
2506 		if ((error = pledge_chown(p, uid, gid)))
2507 			goto out;
2508 		if ((uid != -1 || gid != -1) &&
2509 		    !vnoperm(vp) &&
2510 		    (suser(p) || suid_clear)) {
2511 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2512 			if (error)
2513 				goto out;
2514 			mode = vattr.va_mode & ~(VSUID | VSGID);
2515 			if (mode == vattr.va_mode)
2516 				mode = VNOVAL;
2517 		} else
2518 			mode = VNOVAL;
2519 		VATTR_NULL(&vattr);
2520 		vattr.va_uid = uid;
2521 		vattr.va_gid = gid;
2522 		vattr.va_mode = mode;
2523 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2524 	}
2525 out:
2526 	vput(vp);
2527 	return (error);
2528 }
2529 
2530 /*
2531  * Set ownership given a file descriptor.
2532  */
2533 int
2534 sys_fchown(struct proc *p, void *v, register_t *retval)
2535 {
2536 	struct sys_fchown_args /* {
2537 		syscallarg(int) fd;
2538 		syscallarg(uid_t) uid;
2539 		syscallarg(gid_t) gid;
2540 	} */ *uap = v;
2541 	struct vnode *vp;
2542 	struct vattr vattr;
2543 	int error;
2544 	struct file *fp;
2545 	mode_t mode;
2546 	uid_t uid = SCARG(uap, uid);
2547 	gid_t gid = SCARG(uap, gid);
2548 
2549 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2550 		return (error);
2551 	vp = fp->f_data;
2552 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2553 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2554 		error = EROFS;
2555 	else {
2556 		if ((error = pledge_chown(p, uid, gid)))
2557 			goto out;
2558 		if ((uid != -1 || gid != -1) &&
2559 		    !vnoperm(vp) &&
2560 		    (suser(p) || suid_clear)) {
2561 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2562 			if (error)
2563 				goto out;
2564 			mode = vattr.va_mode & ~(VSUID | VSGID);
2565 			if (mode == vattr.va_mode)
2566 				mode = VNOVAL;
2567 		} else
2568 			mode = VNOVAL;
2569 		VATTR_NULL(&vattr);
2570 		vattr.va_uid = uid;
2571 		vattr.va_gid = gid;
2572 		vattr.va_mode = mode;
2573 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2574 	}
2575 out:
2576 	VOP_UNLOCK(vp);
2577 	FRELE(fp, p);
2578 	return (error);
2579 }
2580 
2581 /*
2582  * Set the access and modification times given a path name.
2583  */
2584 int
2585 sys_utimes(struct proc *p, void *v, register_t *retval)
2586 {
2587 	struct sys_utimes_args /* {
2588 		syscallarg(const char *) path;
2589 		syscallarg(const struct timeval *) tptr;
2590 	} */ *uap = v;
2591 
2592 	struct timespec ts[2];
2593 	struct timeval tv[2];
2594 	const struct timeval *tvp;
2595 	int error;
2596 
2597 	tvp = SCARG(uap, tptr);
2598 	if (tvp != NULL) {
2599 		error = copyin(tvp, tv, sizeof(tv));
2600 		if (error)
2601 			return (error);
2602 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2603 			return (EINVAL);
2604 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2605 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2606 	} else
2607 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2608 
2609 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2610 }
2611 
2612 int
2613 sys_utimensat(struct proc *p, void *v, register_t *retval)
2614 {
2615 	struct sys_utimensat_args /* {
2616 		syscallarg(int) fd;
2617 		syscallarg(const char *) path;
2618 		syscallarg(const struct timespec *) times;
2619 		syscallarg(int) flag;
2620 	} */ *uap = v;
2621 
2622 	struct timespec ts[2];
2623 	const struct timespec *tsp;
2624 	int error, i;
2625 
2626 	tsp = SCARG(uap, times);
2627 	if (tsp != NULL) {
2628 		error = copyin(tsp, ts, sizeof(ts));
2629 		if (error)
2630 			return (error);
2631 		for (i = 0; i < nitems(ts); i++) {
2632 			if (ts[i].tv_nsec == UTIME_NOW)
2633 				continue;
2634 			if (ts[i].tv_nsec == UTIME_OMIT)
2635 				continue;
2636 			if (!timespecisvalid(&ts[i]))
2637 				return (EINVAL);
2638 		}
2639 	} else
2640 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2641 
2642 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2643 	    SCARG(uap, flag)));
2644 }
2645 
2646 int
2647 doutimensat(struct proc *p, int fd, const char *path,
2648     struct timespec ts[2], int flag)
2649 {
2650 	struct vnode *vp;
2651 	int error, follow;
2652 	struct nameidata nd;
2653 
2654 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2655 		return (EINVAL);
2656 
2657 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2658 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2659 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2660 	nd.ni_unveil = UNVEIL_WRITE;
2661 	if ((error = namei(&nd)) != 0)
2662 		return (error);
2663 	vp = nd.ni_vp;
2664 
2665 	return (dovutimens(p, vp, ts));
2666 }
2667 
2668 int
2669 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2670 {
2671 	struct vattr vattr;
2672 	struct timespec now;
2673 	int error;
2674 
2675 #ifdef KTRACE
2676 	/* if they're both UTIME_NOW, then don't report either */
2677 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2678 	    KTRPOINT(p, KTR_STRUCT)) {
2679 		ktrabstimespec(p, &ts[0]);
2680 		ktrabstimespec(p, &ts[1]);
2681 	}
2682 #endif
2683 
2684 	VATTR_NULL(&vattr);
2685 
2686 	/*  make sure ctime is updated even if neither mtime nor atime is */
2687 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2688 
2689 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2690 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2691 			vattr.va_vaflags |= VA_UTIMES_NULL;
2692 
2693 		getnanotime(&now);
2694 		if (ts[0].tv_nsec == UTIME_NOW)
2695 			ts[0] = now;
2696 		if (ts[1].tv_nsec == UTIME_NOW)
2697 			ts[1] = now;
2698 	}
2699 
2700 	if (ts[0].tv_nsec != UTIME_OMIT)
2701 		vattr.va_atime = ts[0];
2702 	if (ts[1].tv_nsec != UTIME_OMIT)
2703 		vattr.va_mtime = ts[1];
2704 
2705 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2706 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2707 		error = EROFS;
2708 	else
2709 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2710 	vput(vp);
2711 	return (error);
2712 }
2713 
2714 /*
2715  * Set the access and modification times given a file descriptor.
2716  */
2717 int
2718 sys_futimes(struct proc *p, void *v, register_t *retval)
2719 {
2720 	struct sys_futimes_args /* {
2721 		syscallarg(int) fd;
2722 		syscallarg(const struct timeval *) tptr;
2723 	} */ *uap = v;
2724 	struct timeval tv[2];
2725 	struct timespec ts[2];
2726 	const struct timeval *tvp;
2727 	int error;
2728 
2729 	tvp = SCARG(uap, tptr);
2730 	if (tvp != NULL) {
2731 		error = copyin(tvp, tv, sizeof(tv));
2732 		if (error)
2733 			return (error);
2734 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2735 			return (EINVAL);
2736 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2737 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2738 	} else
2739 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2740 
2741 	return (dofutimens(p, SCARG(uap, fd), ts));
2742 }
2743 
2744 int
2745 sys_futimens(struct proc *p, void *v, register_t *retval)
2746 {
2747 	struct sys_futimens_args /* {
2748 		syscallarg(int) fd;
2749 		syscallarg(const struct timespec *) times;
2750 	} */ *uap = v;
2751 	struct timespec ts[2];
2752 	const struct timespec *tsp;
2753 	int error, i;
2754 
2755 	tsp = SCARG(uap, times);
2756 	if (tsp != NULL) {
2757 		error = copyin(tsp, ts, sizeof(ts));
2758 		if (error)
2759 			return (error);
2760 		for (i = 0; i < nitems(ts); i++) {
2761 			if (ts[i].tv_nsec == UTIME_NOW)
2762 				continue;
2763 			if (ts[i].tv_nsec == UTIME_OMIT)
2764 				continue;
2765 			if (!timespecisvalid(&ts[i]))
2766 				return (EINVAL);
2767 		}
2768 	} else
2769 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2770 
2771 	return (dofutimens(p, SCARG(uap, fd), ts));
2772 }
2773 
2774 int
2775 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2776 {
2777 	struct file *fp;
2778 	struct vnode *vp;
2779 	int error;
2780 
2781 	if ((error = getvnode(p, fd, &fp)) != 0)
2782 		return (error);
2783 	vp = fp->f_data;
2784 	vref(vp);
2785 	FRELE(fp, p);
2786 
2787 	return (dovutimens(p, vp, ts));
2788 }
2789 
2790 /*
2791  * Truncate a file given its path name.
2792  */
2793 int
2794 sys_truncate(struct proc *p, void *v, register_t *retval)
2795 {
2796 	struct sys_truncate_args /* {
2797 		syscallarg(const char *) path;
2798 		syscallarg(int) pad;
2799 		syscallarg(off_t) length;
2800 	} */ *uap = v;
2801 	struct vnode *vp;
2802 	struct vattr vattr;
2803 	int error;
2804 	struct nameidata nd;
2805 
2806 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2807 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2808 	nd.ni_unveil = UNVEIL_WRITE;
2809 	if ((error = namei(&nd)) != 0)
2810 		return (error);
2811 	vp = nd.ni_vp;
2812 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2813 	if (vp->v_type == VDIR)
2814 		error = EISDIR;
2815 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2816 	    (error = vn_writechk(vp)) == 0) {
2817 		VATTR_NULL(&vattr);
2818 		vattr.va_size = SCARG(uap, length);
2819 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2820 	}
2821 	vput(vp);
2822 	return (error);
2823 }
2824 
2825 /*
2826  * Truncate a file given a file descriptor.
2827  */
2828 int
2829 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2830 {
2831 	struct sys_ftruncate_args /* {
2832 		syscallarg(int) fd;
2833 		syscallarg(int) pad;
2834 		syscallarg(off_t) length;
2835 	} */ *uap = v;
2836 	struct vattr vattr;
2837 	struct vnode *vp;
2838 	struct file *fp;
2839 	off_t len;
2840 	int error;
2841 
2842 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2843 		return (error);
2844 	len = SCARG(uap, length);
2845 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2846 		error = EINVAL;
2847 		goto bad;
2848 	}
2849 	vp = fp->f_data;
2850 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2851 	if (vp->v_type == VDIR)
2852 		error = EISDIR;
2853 	else if ((error = vn_writechk(vp)) == 0) {
2854 		VATTR_NULL(&vattr);
2855 		vattr.va_size = len;
2856 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2857 	}
2858 	VOP_UNLOCK(vp);
2859 bad:
2860 	FRELE(fp, p);
2861 	return (error);
2862 }
2863 
2864 /*
2865  * Sync an open file.
2866  */
2867 int
2868 sys_fsync(struct proc *p, void *v, register_t *retval)
2869 {
2870 	struct sys_fsync_args /* {
2871 		syscallarg(int) fd;
2872 	} */ *uap = v;
2873 	struct vnode *vp;
2874 	struct file *fp;
2875 	int error;
2876 
2877 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2878 		return (error);
2879 	vp = fp->f_data;
2880 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2881 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2882 #ifdef FFS_SOFTUPDATES
2883 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2884 		error = softdep_fsync(vp);
2885 #endif
2886 
2887 	VOP_UNLOCK(vp);
2888 	FRELE(fp, p);
2889 	return (error);
2890 }
2891 
2892 /*
2893  * Rename files.  Source and destination must either both be directories,
2894  * or both not be directories.  If target is a directory, it must be empty.
2895  */
2896 int
2897 sys_rename(struct proc *p, void *v, register_t *retval)
2898 {
2899 	struct sys_rename_args /* {
2900 		syscallarg(const char *) from;
2901 		syscallarg(const char *) to;
2902 	} */ *uap = v;
2903 
2904 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2905 	    SCARG(uap, to)));
2906 }
2907 
2908 int
2909 sys_renameat(struct proc *p, void *v, register_t *retval)
2910 {
2911 	struct sys_renameat_args /* {
2912 		syscallarg(int) fromfd;
2913 		syscallarg(const char *) from;
2914 		syscallarg(int) tofd;
2915 		syscallarg(const char *) to;
2916 	} */ *uap = v;
2917 
2918 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2919 	    SCARG(uap, tofd), SCARG(uap, to)));
2920 }
2921 
2922 int
2923 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2924     const char *to)
2925 {
2926 	struct vnode *tvp, *fvp, *tdvp;
2927 	struct nameidata fromnd, tond;
2928 	int error;
2929 	int flags;
2930 
2931 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2932 	    fromfd, from, p);
2933 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2934 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2935 	if ((error = namei(&fromnd)) != 0)
2936 		return (error);
2937 	fvp = fromnd.ni_vp;
2938 
2939 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2940 	/*
2941 	 * rename("foo/", "bar/");  is  OK
2942 	 */
2943 	if (fvp->v_type == VDIR)
2944 		flags |= STRIPSLASHES;
2945 
2946 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2947 	tond.ni_pledge = PLEDGE_CPATH;
2948 	tond.ni_unveil = UNVEIL_CREATE;
2949 	if ((error = namei(&tond)) != 0) {
2950 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2951 		vrele(fromnd.ni_dvp);
2952 		vrele(fvp);
2953 		goto out1;
2954 	}
2955 	tdvp = tond.ni_dvp;
2956 	tvp = tond.ni_vp;
2957 	if (tvp != NULL) {
2958 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2959 			error = ENOTDIR;
2960 			goto out;
2961 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2962 			error = EISDIR;
2963 			goto out;
2964 		}
2965 	}
2966 	if (fvp == tdvp)
2967 		error = EINVAL;
2968 	/*
2969 	 * If source is the same as the destination (that is the
2970 	 * same inode number)
2971 	 */
2972 	if (fvp == tvp)
2973 		error = -1;
2974 out:
2975 	if (!error) {
2976 		if (tvp) {
2977 			(void)uvm_vnp_uncache(tvp);
2978 		}
2979 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2980 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2981 	} else {
2982 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2983 		if (tdvp == tvp)
2984 			vrele(tdvp);
2985 		else
2986 			vput(tdvp);
2987 		if (tvp)
2988 			vput(tvp);
2989 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2990 		vrele(fromnd.ni_dvp);
2991 		vrele(fvp);
2992 	}
2993 	vrele(tond.ni_startdir);
2994 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2995 out1:
2996 	if (fromnd.ni_startdir)
2997 		vrele(fromnd.ni_startdir);
2998 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2999 	if (error == -1)
3000 		return (0);
3001 	return (error);
3002 }
3003 
3004 /*
3005  * Make a directory file.
3006  */
3007 int
3008 sys_mkdir(struct proc *p, void *v, register_t *retval)
3009 {
3010 	struct sys_mkdir_args /* {
3011 		syscallarg(const char *) path;
3012 		syscallarg(mode_t) mode;
3013 	} */ *uap = v;
3014 
3015 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3016 }
3017 
3018 int
3019 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3020 {
3021 	struct sys_mkdirat_args /* {
3022 		syscallarg(int) fd;
3023 		syscallarg(const char *) path;
3024 		syscallarg(mode_t) mode;
3025 	} */ *uap = v;
3026 
3027 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3028 	    SCARG(uap, mode)));
3029 }
3030 
3031 int
3032 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3033 {
3034 	struct vnode *vp;
3035 	struct vattr vattr;
3036 	int error;
3037 	struct nameidata nd;
3038 
3039 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3040 	    fd, path, p);
3041 	nd.ni_pledge = PLEDGE_CPATH;
3042 	nd.ni_unveil = UNVEIL_CREATE;
3043 	if ((error = namei(&nd)) != 0)
3044 		return (error);
3045 	vp = nd.ni_vp;
3046 	if (vp != NULL) {
3047 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3048 		if (nd.ni_dvp == vp)
3049 			vrele(nd.ni_dvp);
3050 		else
3051 			vput(nd.ni_dvp);
3052 		vrele(vp);
3053 		return (EEXIST);
3054 	}
3055 	VATTR_NULL(&vattr);
3056 	vattr.va_type = VDIR;
3057 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3058 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3059 	if (!error)
3060 		vput(nd.ni_vp);
3061 	return (error);
3062 }
3063 
3064 /*
3065  * Remove a directory file.
3066  */
3067 int
3068 sys_rmdir(struct proc *p, void *v, register_t *retval)
3069 {
3070 	struct sys_rmdir_args /* {
3071 		syscallarg(const char *) path;
3072 	} */ *uap = v;
3073 
3074 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3075 }
3076 
3077 /*
3078  * Read a block of directory entries in a file system independent format.
3079  */
3080 int
3081 sys_getdents(struct proc *p, void *v, register_t *retval)
3082 {
3083 	struct sys_getdents_args /* {
3084 		syscallarg(int) fd;
3085 		syscallarg(void *) buf;
3086 		syscallarg(size_t) buflen;
3087 	} */ *uap = v;
3088 	struct vnode *vp;
3089 	struct file *fp;
3090 	struct uio auio;
3091 	struct iovec aiov;
3092 	size_t buflen;
3093 	int error, eofflag;
3094 
3095 	buflen = SCARG(uap, buflen);
3096 
3097 	if (buflen > INT_MAX)
3098 		return (EINVAL);
3099 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3100 		return (error);
3101 	if ((fp->f_flag & FREAD) == 0) {
3102 		error = EBADF;
3103 		goto bad;
3104 	}
3105 	vp = fp->f_data;
3106 	if (vp->v_type != VDIR) {
3107 		error = EINVAL;
3108 		goto bad;
3109 	}
3110 
3111 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3112 
3113 	if (fp->f_offset < 0) {
3114 		VOP_UNLOCK(vp);
3115 		error = EINVAL;
3116 		goto bad;
3117 	}
3118 
3119 	aiov.iov_base = SCARG(uap, buf);
3120 	aiov.iov_len = buflen;
3121 	auio.uio_iov = &aiov;
3122 	auio.uio_iovcnt = 1;
3123 	auio.uio_rw = UIO_READ;
3124 	auio.uio_segflg = UIO_USERSPACE;
3125 	auio.uio_procp = p;
3126 	auio.uio_resid = buflen;
3127 	auio.uio_offset = fp->f_offset;
3128 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3129 	mtx_enter(&fp->f_mtx);
3130 	fp->f_offset = auio.uio_offset;
3131 	mtx_leave(&fp->f_mtx);
3132 	VOP_UNLOCK(vp);
3133 	if (error)
3134 		goto bad;
3135 	*retval = buflen - auio.uio_resid;
3136 bad:
3137 	FRELE(fp, p);
3138 	return (error);
3139 }
3140 
3141 /*
3142  * Set the mode mask for creation of filesystem nodes.
3143  */
3144 int
3145 sys_umask(struct proc *p, void *v, register_t *retval)
3146 {
3147 	struct sys_umask_args /* {
3148 		syscallarg(mode_t) newmask;
3149 	} */ *uap = v;
3150 	struct filedesc *fdp = p->p_fd;
3151 
3152 	fdplock(fdp);
3153 	*retval = fdp->fd_cmask;
3154 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3155 	fdpunlock(fdp);
3156 	return (0);
3157 }
3158 
3159 /*
3160  * Void all references to file by ripping underlying filesystem
3161  * away from vnode.
3162  */
3163 int
3164 sys_revoke(struct proc *p, void *v, register_t *retval)
3165 {
3166 	struct sys_revoke_args /* {
3167 		syscallarg(const char *) path;
3168 	} */ *uap = v;
3169 	struct vnode *vp;
3170 	struct vattr vattr;
3171 	int error;
3172 	struct nameidata nd;
3173 
3174 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3175 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3176 	nd.ni_unveil = UNVEIL_READ;
3177 	if ((error = namei(&nd)) != 0)
3178 		return (error);
3179 	vp = nd.ni_vp;
3180 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3181 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3182 		error = ENOTTY;
3183 		goto out;
3184 	}
3185 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3186 		goto out;
3187 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3188 	    (error = suser(p)))
3189 		goto out;
3190 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3191 		VOP_REVOKE(vp, REVOKEALL);
3192 out:
3193 	vrele(vp);
3194 	return (error);
3195 }
3196 
3197 /*
3198  * Convert a user file descriptor to a kernel file entry.
3199  *
3200  * On return *fpp is FREF:ed.
3201  */
3202 int
3203 getvnode(struct proc *p, int fd, struct file **fpp)
3204 {
3205 	struct file *fp;
3206 	struct vnode *vp;
3207 
3208 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3209 		return (EBADF);
3210 
3211 	if (fp->f_type != DTYPE_VNODE) {
3212 		FRELE(fp, p);
3213 		return (EINVAL);
3214 	}
3215 
3216 	vp = fp->f_data;
3217 	if (vp->v_type == VBAD) {
3218 		FRELE(fp, p);
3219 		return (EBADF);
3220 	}
3221 
3222 	*fpp = fp;
3223 
3224 	return (0);
3225 }
3226 
3227 /*
3228  * Positional read system call.
3229  */
3230 int
3231 sys_pread(struct proc *p, void *v, register_t *retval)
3232 {
3233 	struct sys_pread_args /* {
3234 		syscallarg(int) fd;
3235 		syscallarg(void *) buf;
3236 		syscallarg(size_t) nbyte;
3237 		syscallarg(int) pad;
3238 		syscallarg(off_t) offset;
3239 	} */ *uap = v;
3240 	struct iovec iov;
3241 	struct uio auio;
3242 
3243 	iov.iov_base = SCARG(uap, buf);
3244 	iov.iov_len = SCARG(uap, nbyte);
3245 	if (iov.iov_len > SSIZE_MAX)
3246 		return (EINVAL);
3247 
3248 	auio.uio_iov = &iov;
3249 	auio.uio_iovcnt = 1;
3250 	auio.uio_resid = iov.iov_len;
3251 	auio.uio_offset = SCARG(uap, offset);
3252 
3253 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3254 }
3255 
3256 /*
3257  * Positional scatter read system call.
3258  */
3259 int
3260 sys_preadv(struct proc *p, void *v, register_t *retval)
3261 {
3262 	struct sys_preadv_args /* {
3263 		syscallarg(int) fd;
3264 		syscallarg(const struct iovec *) iovp;
3265 		syscallarg(int) iovcnt;
3266 		syscallarg(int) pad;
3267 		syscallarg(off_t) offset;
3268 	} */ *uap = v;
3269 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3270 	int error, iovcnt = SCARG(uap, iovcnt);
3271 	struct uio auio;
3272 	size_t resid;
3273 
3274 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3275 	if (error)
3276 		goto done;
3277 
3278 	auio.uio_iov = iov;
3279 	auio.uio_iovcnt = iovcnt;
3280 	auio.uio_resid = resid;
3281 	auio.uio_offset = SCARG(uap, offset);
3282 
3283 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3284  done:
3285 	iovec_free(iov, iovcnt);
3286 	return (error);
3287 }
3288 
3289 /*
3290  * Positional write system call.
3291  */
3292 int
3293 sys_pwrite(struct proc *p, void *v, register_t *retval)
3294 {
3295 	struct sys_pwrite_args /* {
3296 		syscallarg(int) fd;
3297 		syscallarg(const void *) buf;
3298 		syscallarg(size_t) nbyte;
3299 		syscallarg(int) pad;
3300 		syscallarg(off_t) offset;
3301 	} */ *uap = v;
3302 	struct iovec iov;
3303 	struct uio auio;
3304 
3305 	iov.iov_base = (void *)SCARG(uap, buf);
3306 	iov.iov_len = SCARG(uap, nbyte);
3307 	if (iov.iov_len > SSIZE_MAX)
3308 		return (EINVAL);
3309 
3310 	auio.uio_iov = &iov;
3311 	auio.uio_iovcnt = 1;
3312 	auio.uio_resid = iov.iov_len;
3313 	auio.uio_offset = SCARG(uap, offset);
3314 
3315 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3316 }
3317 
3318 /*
3319  * Positional gather write system call.
3320  */
3321 int
3322 sys_pwritev(struct proc *p, void *v, register_t *retval)
3323 {
3324 	struct sys_pwritev_args /* {
3325 		syscallarg(int) fd;
3326 		syscallarg(const struct iovec *) iovp;
3327 		syscallarg(int) iovcnt;
3328 		syscallarg(int) pad;
3329 		syscallarg(off_t) offset;
3330 	} */ *uap = v;
3331 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3332 	int error, iovcnt = SCARG(uap, iovcnt);
3333 	struct uio auio;
3334 	size_t resid;
3335 
3336 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3337 	if (error)
3338 		goto done;
3339 
3340 	auio.uio_iov = iov;
3341 	auio.uio_iovcnt = iovcnt;
3342 	auio.uio_resid = resid;
3343 	auio.uio_offset = SCARG(uap, offset);
3344 
3345 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3346  done:
3347 	iovec_free(iov, iovcnt);
3348 	return (error);
3349 }
3350