xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.353 2021/10/02 14:05:10 semarie Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 
94 /*
95  * Virtual File System System Calls
96  */
97 
98 /*
99  * Mount a file system.
100  */
101 int
102 sys_mount(struct proc *p, void *v, register_t *retval)
103 {
104 	struct sys_mount_args /* {
105 		syscallarg(const char *) type;
106 		syscallarg(const char *) path;
107 		syscallarg(int) flags;
108 		syscallarg(void *) data;
109 	} */ *uap = v;
110 	struct vnode *vp;
111 	struct mount *mp;
112 	int error, mntflag = 0;
113 	char fstypename[MFSNAMELEN];
114 	char fspath[MNAMELEN];
115 	struct nameidata nd;
116 	struct vfsconf *vfsp;
117 	int flags = SCARG(uap, flags);
118 	void *args = NULL;
119 
120 	if ((error = suser(p)))
121 		return (error);
122 
123 	/*
124 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
125 	 */
126 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
127 	if (error)
128 		return(error);
129 
130 	/*
131 	 * Get vnode to be covered
132 	 */
133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
134 	if ((error = namei(&nd)) != 0)
135 		goto fail;
136 	vp = nd.ni_vp;
137 	if (flags & MNT_UPDATE) {
138 		if ((vp->v_flag & VROOT) == 0) {
139 			vput(vp);
140 			error = EINVAL;
141 			goto fail;
142 		}
143 		mp = vp->v_mount;
144 		vfsp = mp->mnt_vfc;
145 
146 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
147 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
148 		if (error) {
149 			vput(vp);
150 			goto fail;
151 		}
152 
153 		mntflag = mp->mnt_flag;
154 		/*
155 		 * We only allow the filesystem to be reloaded if it
156 		 * is currently mounted read-only.
157 		 */
158 		if ((flags & MNT_RELOAD) &&
159 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 			vput(vp);
161 			error = EOPNOTSUPP;	/* Needs translation */
162 			goto fail;
163 		}
164 
165 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
166 			vput(vp);
167 			goto fail;
168 		}
169 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
170 		goto update;
171 	}
172 	/*
173 	 * Do not allow disabling of permission checks unless exec and access to
174 	 * device files is disabled too.
175 	 */
176 	if ((flags & MNT_NOPERM) &&
177 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
178 		vput(vp);
179 		error = EPERM;
180 		goto fail;
181 	}
182 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
183 		vput(vp);
184 		goto fail;
185 	}
186 	if (vp->v_type != VDIR) {
187 		vput(vp);
188 		goto fail;
189 	}
190 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
191 	if (error) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	vfsp = vfs_byname(fstypename);
196 	if (vfsp == NULL) {
197 		vput(vp);
198 		error = EOPNOTSUPP;
199 		goto fail;
200 	}
201 
202 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
203 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
204 	if (error) {
205 		vput(vp);
206 		goto fail;
207 	}
208 
209 	if (vp->v_mountedhere != NULL) {
210 		vput(vp);
211 		error = EBUSY;
212 		goto fail;
213 	}
214 
215 	/*
216 	 * Allocate and initialize the file system.
217 	 */
218 	mp = vfs_mount_alloc(vp, vfsp);
219 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
220 
221 update:
222 	/* Ensure that the parent mountpoint does not get unmounted. */
223 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
224 	if (error) {
225 		if (mp->mnt_flag & MNT_UPDATE) {
226 			mp->mnt_flag = mntflag;
227 			vfs_unbusy(mp);
228 		} else {
229 			vfs_unbusy(mp);
230 			vfs_mount_free(mp);
231 		}
232 		vput(vp);
233 		goto fail;
234 	}
235 
236 	/*
237 	 * Set the mount level flags.
238 	 */
239 	if (flags & MNT_RDONLY)
240 		mp->mnt_flag |= MNT_RDONLY;
241 	else if (mp->mnt_flag & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_WANTRDWR;
243 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
244 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
245 	    MNT_NOPERM | MNT_FORCE);
246 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
247 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
248 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
249 	/*
250 	 * Mount the filesystem.
251 	 */
252 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
253 	if (!error) {
254 		mp->mnt_stat.f_ctime = gettime();
255 	}
256 	if (mp->mnt_flag & MNT_UPDATE) {
257 		vfs_unbusy(vp->v_mount);
258 		vput(vp);
259 		if (mp->mnt_flag & MNT_WANTRDWR)
260 			mp->mnt_flag &= ~MNT_RDONLY;
261 		mp->mnt_flag &= ~MNT_OP_FLAGS;
262 		if (error)
263 			mp->mnt_flag = mntflag;
264 
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
266 			if (mp->mnt_syncer == NULL)
267 				error = vfs_allocate_syncvnode(mp);
268 		} else {
269 			if (mp->mnt_syncer != NULL)
270 				vgone(mp->mnt_syncer);
271 			mp->mnt_syncer = NULL;
272 		}
273 
274 		vfs_unbusy(mp);
275 		goto fail;
276 	}
277 
278 	mp->mnt_flag &= ~MNT_OP_FLAGS;
279 	vp->v_mountedhere = mp;
280 
281 	/*
282 	 * Put the new filesystem on the mount list after root.
283 	 */
284 	cache_purge(vp);
285 	if (!error) {
286 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
287 		checkdirs(vp);
288 		vfs_unbusy(vp->v_mount);
289 		VOP_UNLOCK(vp);
290 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
291 			error = vfs_allocate_syncvnode(mp);
292 		vfs_unbusy(mp);
293 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
294 		if ((error = VFS_START(mp, 0, p)) != 0)
295 			vrele(vp);
296 	} else {
297 		mp->mnt_vnodecovered->v_mountedhere = NULL;
298 		vfs_unbusy(mp);
299 		vfs_mount_free(mp);
300 		vfs_unbusy(vp->v_mount);
301 		vput(vp);
302 	}
303 fail:
304 	if (args)
305 		free(args, M_TEMP, vfsp->vfc_datasize);
306 	return (error);
307 }
308 
309 /*
310  * Scan all active processes to see if any of them have a current
311  * or root directory onto which the new filesystem has just been
312  * mounted. If so, replace them with the new mount point, keeping
313  * track of how many were replaced.  That's the number of references
314  * the old vnode had that we've replaced, so finish by vrele()'ing
315  * it that many times.  This puts off any possible sleeping until
316  * we've finished walking the allprocess list.
317  */
318 void
319 checkdirs(struct vnode *olddp)
320 {
321 	struct filedesc *fdp;
322 	struct vnode *newdp;
323 	struct process *pr;
324 	u_int  free_count = 0;
325 
326 	if (olddp->v_usecount == 1)
327 		return;
328 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
329 		panic("mount: lost mount");
330 	LIST_FOREACH(pr, &allprocess, ps_list) {
331 		fdp = pr->ps_fd;
332 		if (fdp->fd_cdir == olddp) {
333 			free_count++;
334 			vref(newdp);
335 			fdp->fd_cdir = newdp;
336 		}
337 		if (fdp->fd_rdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_rdir = newdp;
341 		}
342 	}
343 	if (rootvnode == olddp) {
344 		free_count++;
345 		vref(newdp);
346 		rootvnode = newdp;
347 	}
348 	while (free_count-- > 0)
349 		vrele(olddp);
350 	vput(newdp);
351 }
352 
353 /*
354  * Unmount a file system.
355  *
356  * Note: unmount takes a path to the vnode mounted on as argument,
357  * not special file (as before).
358  */
359 int
360 sys_unmount(struct proc *p, void *v, register_t *retval)
361 {
362 	struct sys_unmount_args /* {
363 		syscallarg(const char *) path;
364 		syscallarg(int) flags;
365 	} */ *uap = v;
366 	struct vnode *vp;
367 	struct mount *mp;
368 	int error;
369 	struct nameidata nd;
370 
371 	if ((error = suser(p)) != 0)
372 		return (error);
373 
374 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
375 	    SCARG(uap, path), p);
376 	if ((error = namei(&nd)) != 0)
377 		return (error);
378 	vp = nd.ni_vp;
379 	mp = vp->v_mount;
380 
381 	/*
382 	 * Don't allow unmounting the root file system.
383 	 */
384 	if (mp->mnt_flag & MNT_ROOTFS) {
385 		vput(vp);
386 		return (EINVAL);
387 	}
388 
389 	/*
390 	 * Must be the root of the filesystem
391 	 */
392 	if ((vp->v_flag & VROOT) == 0) {
393 		vput(vp);
394 		return (EINVAL);
395 	}
396 	vput(vp);
397 
398 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
399 		return (EBUSY);
400 
401 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
402 }
403 
404 /*
405  * Do the actual file system unmount.
406  */
407 int
408 dounmount(struct mount *mp, int flags, struct proc *p)
409 {
410 	SLIST_HEAD(, mount) mplist;
411 	struct mount *nmp;
412 	int error;
413 
414 	SLIST_INIT(&mplist);
415 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
416 
417 	/*
418 	 * Collect nested mount points. This takes advantage of the mount list
419 	 * being ordered - nested mount points come after their parent.
420 	 */
421 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
422 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
423 			if (mp->mnt_vnodecovered == NULLVP ||
424 			    mp->mnt_vnodecovered->v_mount != nmp)
425 				continue;
426 
427 			if ((flags & MNT_FORCE) == 0) {
428 				error = EBUSY;
429 				goto err;
430 			}
431 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
432 			if (error) {
433 				if ((flags & MNT_DOOMED)) {
434 					/*
435 					 * If the mount point was busy due to
436 					 * being unmounted, it has been removed
437 					 * from the mount list already.
438 					 * Restart the iteration from the last
439 					 * collected busy entry.
440 					 */
441 					mp = SLIST_FIRST(&mplist);
442 					break;
443 				}
444 				goto err;
445 			}
446 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
447 			break;
448 		}
449 	}
450 
451 	/*
452 	 * Nested mount points cannot appear during this loop as mounting
453 	 * requires a read lock for the parent mount point.
454 	 */
455 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
456 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
457 		error = dounmount_leaf(mp, flags, p);
458 		if (error)
459 			goto err;
460 	}
461 	return (0);
462 
463 err:
464 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
465 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
466 		vfs_unbusy(mp);
467 	}
468 	return (error);
469 }
470 
471 int
472 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
473 {
474 	struct vnode *coveredvp;
475 	struct vnode *vp, *nvp;
476 	int error;
477 	int hadsyncer = 0;
478 
479 	mp->mnt_flag &=~ MNT_ASYNC;
480 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
481 	if (mp->mnt_syncer != NULL) {
482 		hadsyncer = 1;
483 		vgone(mp->mnt_syncer);
484 		mp->mnt_syncer = NULL;
485 	}
486 
487 	/*
488 	 * Before calling file system unmount, make sure
489 	 * all unveils to vnodes in here are dropped.
490 	 */
491 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
492 		unveil_removevnode(vp);
493 	}
494 
495 	if (((mp->mnt_flag & MNT_RDONLY) ||
496 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
497 	    (flags & MNT_FORCE))
498 		error = VFS_UNMOUNT(mp, flags, p);
499 
500 	if (error && !(flags & MNT_DOOMED)) {
501 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
502 			(void) vfs_allocate_syncvnode(mp);
503 		vfs_unbusy(mp);
504 		return (error);
505 	}
506 
507 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
508 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
509 		coveredvp->v_mountedhere = NULL;
510 		vrele(coveredvp);
511 	}
512 
513 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
514 		panic("unmount: dangling vnode");
515 
516 	vfs_unbusy(mp);
517 	vfs_mount_free(mp);
518 
519 	return (0);
520 }
521 
522 /*
523  * Sync each mounted filesystem.
524  */
525 int
526 sys_sync(struct proc *p, void *v, register_t *retval)
527 {
528 	struct mount *mp;
529 	int asyncflag;
530 
531 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
532 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
533 			continue;
534 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
535 			asyncflag = mp->mnt_flag & MNT_ASYNC;
536 			mp->mnt_flag &= ~MNT_ASYNC;
537 			uvm_vnp_sync(mp);
538 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
539 			if (asyncflag)
540 				mp->mnt_flag |= MNT_ASYNC;
541 		}
542 		vfs_unbusy(mp);
543 	}
544 
545 	return (0);
546 }
547 
548 /*
549  * Change filesystem quotas.
550  */
551 int
552 sys_quotactl(struct proc *p, void *v, register_t *retval)
553 {
554 	struct sys_quotactl_args /* {
555 		syscallarg(const char *) path;
556 		syscallarg(int) cmd;
557 		syscallarg(int) uid;
558 		syscallarg(char *) arg;
559 	} */ *uap = v;
560 	struct mount *mp;
561 	int error;
562 	struct nameidata nd;
563 
564 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
565 	if ((error = namei(&nd)) != 0)
566 		return (error);
567 	mp = nd.ni_vp->v_mount;
568 	vrele(nd.ni_vp);
569 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
570 	    SCARG(uap, arg), p));
571 }
572 
573 int
574 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
575 {
576 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
577 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
578 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
579 	char *s, *d;
580 	int error;
581 
582 	/* Don't let non-root see filesystem id (for NFS security) */
583 	if (suser(p)) {
584 		fsid_t fsid;
585 
586 		s = (char *)sp;
587 		d = (char *)uaddr;
588 
589 		memset(&fsid, 0, sizeof(fsid));
590 
591 		if ((error = copyout(s, d, co_sz1)) != 0)
592 			return (error);
593 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
594 			return (error);
595 		return (copyout(s + co_off2, d + co_off2, co_sz2));
596 	}
597 
598 	return (copyout(sp, uaddr, sizeof(*sp)));
599 }
600 
601 /*
602  * Get filesystem statistics.
603  */
604 int
605 sys_statfs(struct proc *p, void *v, register_t *retval)
606 {
607 	struct sys_statfs_args /* {
608 		syscallarg(const char *) path;
609 		syscallarg(struct statfs *) buf;
610 	} */ *uap = v;
611 	struct mount *mp;
612 	struct statfs *sp;
613 	int error;
614 	struct nameidata nd;
615 
616 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
617 	    SCARG(uap, path), p);
618 	nd.ni_pledge = PLEDGE_RPATH;
619 	nd.ni_unveil = UNVEIL_READ;
620 	if ((error = namei(&nd)) != 0)
621 		return (error);
622 	mp = nd.ni_vp->v_mount;
623 	sp = &mp->mnt_stat;
624 	vrele(nd.ni_vp);
625 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
626 		return (error);
627 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
628 
629 	return (copyout_statfs(sp, SCARG(uap, buf), p));
630 }
631 
632 /*
633  * Get filesystem statistics.
634  */
635 int
636 sys_fstatfs(struct proc *p, void *v, register_t *retval)
637 {
638 	struct sys_fstatfs_args /* {
639 		syscallarg(int) fd;
640 		syscallarg(struct statfs *) buf;
641 	} */ *uap = v;
642 	struct file *fp;
643 	struct mount *mp;
644 	struct statfs *sp;
645 	int error;
646 
647 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
648 		return (error);
649 	mp = ((struct vnode *)fp->f_data)->v_mount;
650 	if (!mp) {
651 		FRELE(fp, p);
652 		return (ENOENT);
653 	}
654 	sp = &mp->mnt_stat;
655 	error = VFS_STATFS(mp, sp, p);
656 	FRELE(fp, p);
657 	if (error)
658 		return (error);
659 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
660 
661 	return (copyout_statfs(sp, SCARG(uap, buf), p));
662 }
663 
664 /*
665  * Get statistics on all filesystems.
666  */
667 int
668 sys_getfsstat(struct proc *p, void *v, register_t *retval)
669 {
670 	struct sys_getfsstat_args /* {
671 		syscallarg(struct statfs *) buf;
672 		syscallarg(size_t) bufsize;
673 		syscallarg(int) flags;
674 	} */ *uap = v;
675 	struct mount *mp;
676 	struct statfs *sp;
677 	struct statfs *sfsp;
678 	size_t count, maxcount;
679 	int error, flags = SCARG(uap, flags);
680 
681 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
682 	sfsp = SCARG(uap, buf);
683 	count = 0;
684 
685 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
686 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
687 			continue;
688 		if (sfsp && count < maxcount) {
689 			sp = &mp->mnt_stat;
690 
691 			/* Refresh stats unless MNT_NOWAIT is specified */
692 			if (flags != MNT_NOWAIT &&
693 			    flags != MNT_LAZY &&
694 			    (flags == MNT_WAIT ||
695 			    flags == 0) &&
696 			    (error = VFS_STATFS(mp, sp, p))) {
697 				vfs_unbusy(mp);
698 				continue;
699 			}
700 
701 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
702 #if notyet
703 			if (mp->mnt_flag & MNT_SOFTDEP)
704 				sp->f_eflags = STATFS_SOFTUPD;
705 #endif
706 			error = (copyout_statfs(sp, sfsp, p));
707 			if (error) {
708 				vfs_unbusy(mp);
709 				return (error);
710 			}
711 			sfsp++;
712 		}
713 		count++;
714 		vfs_unbusy(mp);
715 	}
716 
717 	if (sfsp && count > maxcount)
718 		*retval = maxcount;
719 	else
720 		*retval = count;
721 
722 	return (0);
723 }
724 
725 /*
726  * Change current working directory to a given file descriptor.
727  */
728 int
729 sys_fchdir(struct proc *p, void *v, register_t *retval)
730 {
731 	struct sys_fchdir_args /* {
732 		syscallarg(int) fd;
733 	} */ *uap = v;
734 	struct filedesc *fdp = p->p_fd;
735 	struct vnode *vp, *tdp, *old_cdir;
736 	struct mount *mp;
737 	struct file *fp;
738 	int error;
739 
740 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
741 		return (EBADF);
742 	vp = fp->f_data;
743 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
744 		FRELE(fp, p);
745 		return (ENOTDIR);
746 	}
747 	vref(vp);
748 	FRELE(fp, p);
749 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
750 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
751 
752 	while (!error && (mp = vp->v_mountedhere) != NULL) {
753 		if (vfs_busy(mp, VB_READ|VB_WAIT))
754 			continue;
755 		error = VFS_ROOT(mp, &tdp);
756 		vfs_unbusy(mp);
757 		if (error)
758 			break;
759 		vput(vp);
760 		vp = tdp;
761 	}
762 	if (error) {
763 		vput(vp);
764 		return (error);
765 	}
766 	VOP_UNLOCK(vp);
767 	old_cdir = fdp->fd_cdir;
768 	fdp->fd_cdir = vp;
769 	vrele(old_cdir);
770 	return (0);
771 }
772 
773 /*
774  * Change current working directory (``.'').
775  */
776 int
777 sys_chdir(struct proc *p, void *v, register_t *retval)
778 {
779 	struct sys_chdir_args /* {
780 		syscallarg(const char *) path;
781 	} */ *uap = v;
782 	struct filedesc *fdp = p->p_fd;
783 	struct vnode *old_cdir;
784 	int error;
785 	struct nameidata nd;
786 
787 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
788 	    SCARG(uap, path), p);
789 	nd.ni_pledge = PLEDGE_RPATH;
790 	nd.ni_unveil = UNVEIL_READ;
791 	if ((error = change_dir(&nd, p)) != 0)
792 		return (error);
793 	old_cdir = fdp->fd_cdir;
794 	fdp->fd_cdir = nd.ni_vp;
795 	vrele(old_cdir);
796 	return (0);
797 }
798 
799 /*
800  * Change notion of root (``/'') directory.
801  */
802 int
803 sys_chroot(struct proc *p, void *v, register_t *retval)
804 {
805 	struct sys_chroot_args /* {
806 		syscallarg(const char *) path;
807 	} */ *uap = v;
808 	struct filedesc *fdp = p->p_fd;
809 	struct vnode *old_cdir, *old_rdir;
810 	int error;
811 	struct nameidata nd;
812 
813 	if ((error = suser(p)) != 0)
814 		return (error);
815 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
816 	    SCARG(uap, path), p);
817 	if ((error = change_dir(&nd, p)) != 0)
818 		return (error);
819 	if (fdp->fd_rdir != NULL) {
820 		/*
821 		 * A chroot() done inside a changed root environment does
822 		 * an automatic chdir to avoid the out-of-tree experience.
823 		 */
824 		vref(nd.ni_vp);
825 		old_rdir = fdp->fd_rdir;
826 		old_cdir = fdp->fd_cdir;
827 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
828 		vrele(old_rdir);
829 		vrele(old_cdir);
830 	} else
831 		fdp->fd_rdir = nd.ni_vp;
832 	return (0);
833 }
834 
835 /*
836  * Common routine for chroot and chdir.
837  */
838 static int
839 change_dir(struct nameidata *ndp, struct proc *p)
840 {
841 	struct vnode *vp;
842 	int error;
843 
844 	if ((error = namei(ndp)) != 0)
845 		return (error);
846 	vp = ndp->ni_vp;
847 	if (vp->v_type != VDIR)
848 		error = ENOTDIR;
849 	else
850 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
851 	if (error)
852 		vput(vp);
853 	else
854 		VOP_UNLOCK(vp);
855 	return (error);
856 }
857 
858 int
859 sys___realpath(struct proc *p, void *v, register_t *retval)
860 {
861 	struct sys___realpath_args /* {
862 		syscallarg(const char *) pathname;
863 		syscallarg(char *) resolved;
864 	} */ *uap = v;
865 	char *pathname;
866 	char *rpbuf;
867 	struct nameidata nd;
868 	size_t pathlen;
869 	int error = 0;
870 
871 	if (SCARG(uap, pathname) == NULL)
872 		return (EINVAL);
873 
874 	pathname = pool_get(&namei_pool, PR_WAITOK);
875 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
876 
877 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
878 	    &pathlen)))
879 		goto end;
880 
881 	if (pathlen == 1) { /* empty string "" */
882 		error = ENOENT;
883 		goto end;
884 	}
885 	if (pathlen < 2) {
886 		error = EINVAL;
887 		goto end;
888 	}
889 
890 	/* Get cwd for relative path if needed, prepend to rpbuf */
891 	rpbuf[0] = '\0';
892 	if (pathname[0] != '/') {
893 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
894 		char *cwdbuf, *bp;
895 
896 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
897 
898 		/* vfs_getcwd_common fills this in backwards */
899 		bp = &cwdbuf[cwdlen - 1];
900 		*bp = '\0';
901 
902 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
903 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
904 
905 		if (error) {
906 			free(cwdbuf, M_TEMP, cwdlen);
907 			goto end;
908 		}
909 
910 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
911 			free(cwdbuf, M_TEMP, cwdlen);
912 			error = ENAMETOOLONG;
913 			goto end;
914 		}
915 
916 		free(cwdbuf, M_TEMP, cwdlen);
917 	}
918 
919 	NDINIT(&nd, LOOKUP, FOLLOW | SAVENAME | REALPATH, UIO_SYSSPACE,
920 	    pathname, p);
921 
922 	nd.ni_cnd.cn_rpbuf = rpbuf;
923 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
924 
925 	nd.ni_pledge = PLEDGE_RPATH;
926 	nd.ni_unveil = UNVEIL_READ;
927 	if ((error = namei(&nd)) != 0)
928 		goto end;
929 
930 	/* release reference from namei */
931 	if (nd.ni_vp)
932 		vrele(nd.ni_vp);
933 
934 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
935 	    MAXPATHLEN, NULL);
936 
937 #ifdef KTRACE
938 	if (KTRPOINT(p, KTR_NAMEI))
939 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
940 #endif
941 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
942 end:
943 	pool_put(&namei_pool, rpbuf);
944 	pool_put(&namei_pool, pathname);
945 	return (error);
946 }
947 
948 int
949 sys_unveil(struct proc *p, void *v, register_t *retval)
950 {
951 	struct sys_unveil_args /* {
952 		syscallarg(const char *) path;
953 		syscallarg(const char *) permissions;
954 	} */ *uap = v;
955 	struct process *pr = p->p_p;
956 	char *pathname, *c;
957 	struct nameidata nd;
958 	size_t pathlen;
959 	char permissions[5];
960 	int error, allow;
961 
962 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
963 		pr->ps_uvdone = 1;
964 		return (0);
965 	}
966 
967 	if (pr->ps_uvdone != 0)
968 		return EPERM;
969 
970 	error = copyinstr(SCARG(uap, permissions), permissions,
971 	    sizeof(permissions), NULL);
972 	if (error)
973 		return (error);
974 	pathname = pool_get(&namei_pool, PR_WAITOK);
975 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
976 	if (error)
977 		goto end;
978 
979 #ifdef KTRACE
980 	if (KTRPOINT(p, KTR_STRUCT))
981 		ktrstruct(p, "unveil", permissions, strlen(permissions));
982 #endif
983 	if (pathlen < 2) {
984 		error = EINVAL;
985 		goto end;
986 	}
987 
988 	/* find root "/" or "//" */
989 	for (c = pathname; *c != '\0'; c++) {
990 		if (*c != '/')
991 			break;
992 	}
993 	if (*c == '\0')
994 		/* root directory */
995 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
996 		    UIO_SYSSPACE, pathname, p);
997 	else
998 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
999 		    UIO_SYSSPACE, pathname, p);
1000 
1001 	nd.ni_pledge = PLEDGE_UNVEIL;
1002 	if ((error = namei(&nd)) != 0)
1003 		goto end;
1004 
1005 	/*
1006 	 * XXX Any access to the file or directory will allow us to
1007 	 * pledge path it
1008 	 */
1009 	allow = ((nd.ni_vp &&
1010 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1011 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1012 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1013 	    (nd.ni_dvp &&
1014 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1015 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1016 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1017 
1018 	/* release lock from namei, but keep ref */
1019 	if (nd.ni_vp)
1020 		VOP_UNLOCK(nd.ni_vp);
1021 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1022 		VOP_UNLOCK(nd.ni_dvp);
1023 
1024 	if (allow)
1025 		error = unveil_add(p, &nd, permissions);
1026 	else
1027 		error = EPERM;
1028 
1029 	/* release vref from namei, but not vref from unveil_add */
1030 	if (nd.ni_vp)
1031 		vrele(nd.ni_vp);
1032 	if (nd.ni_dvp)
1033 		vrele(nd.ni_dvp);
1034 
1035 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1036 end:
1037 	pool_put(&namei_pool, pathname);
1038 
1039 	return (error);
1040 }
1041 
1042 /*
1043  * Check permissions, allocate an open file structure,
1044  * and call the device open routine if any.
1045  */
1046 int
1047 sys_open(struct proc *p, void *v, register_t *retval)
1048 {
1049 	struct sys_open_args /* {
1050 		syscallarg(const char *) path;
1051 		syscallarg(int) flags;
1052 		syscallarg(mode_t) mode;
1053 	} */ *uap = v;
1054 
1055 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1056 	    SCARG(uap, mode), retval));
1057 }
1058 
1059 int
1060 sys_openat(struct proc *p, void *v, register_t *retval)
1061 {
1062 	struct sys_openat_args /* {
1063 		syscallarg(int) fd;
1064 		syscallarg(const char *) path;
1065 		syscallarg(int) flags;
1066 		syscallarg(mode_t) mode;
1067 	} */ *uap = v;
1068 
1069 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1070 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1071 }
1072 
1073 int
1074 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1075     register_t *retval)
1076 {
1077 	struct filedesc *fdp = p->p_fd;
1078 	struct file *fp;
1079 	struct vnode *vp;
1080 	struct vattr vattr;
1081 	int flags, cloexec, cmode;
1082 	int type, indx, error, localtrunc = 0;
1083 	struct flock lf;
1084 	struct nameidata nd;
1085 	uint64_t ni_pledge = 0;
1086 	u_char ni_unveil = 0;
1087 
1088 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1089 		error = pledge_flock(p);
1090 		if (error != 0)
1091 			return (error);
1092 	}
1093 
1094 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1095 
1096 	fdplock(fdp);
1097 	if ((error = falloc(p, &fp, &indx)) != 0) {
1098 		fdpunlock(fdp);
1099 		return (error);
1100 	}
1101 	fdpunlock(fdp);
1102 
1103 	flags = FFLAGS(oflags);
1104 	if (flags & FREAD) {
1105 		ni_pledge |= PLEDGE_RPATH;
1106 		ni_unveil |= UNVEIL_READ;
1107 	}
1108 	if (flags & FWRITE) {
1109 		ni_pledge |= PLEDGE_WPATH;
1110 		ni_unveil |= UNVEIL_WRITE;
1111 	}
1112 	if (oflags & O_CREAT) {
1113 		ni_pledge |= PLEDGE_CPATH;
1114 		ni_unveil |= UNVEIL_CREATE;
1115 	}
1116 
1117 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1118 	if ((p->p_p->ps_flags & PS_PLEDGE))
1119 		cmode &= ACCESSPERMS;
1120 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1121 	nd.ni_pledge = ni_pledge;
1122 	nd.ni_unveil = ni_unveil;
1123 	p->p_dupfd = -1;			/* XXX check for fdopen */
1124 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1125 		localtrunc = 1;
1126 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1127 	}
1128 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1129 		fdplock(fdp);
1130 		if (error == ENODEV &&
1131 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1132 		    (error =
1133 			dupfdopen(p, indx, flags)) == 0) {
1134 			fdpunlock(fdp);
1135 			closef(fp, p);
1136 			*retval = indx;
1137 			return (error);
1138 		}
1139 		if (error == ERESTART)
1140 			error = EINTR;
1141 		fdremove(fdp, indx);
1142 		fdpunlock(fdp);
1143 		closef(fp, p);
1144 		return (error);
1145 	}
1146 	p->p_dupfd = 0;
1147 	vp = nd.ni_vp;
1148 	fp->f_flag = flags & FMASK;
1149 	fp->f_type = DTYPE_VNODE;
1150 	fp->f_ops = &vnops;
1151 	fp->f_data = vp;
1152 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1153 		lf.l_whence = SEEK_SET;
1154 		lf.l_start = 0;
1155 		lf.l_len = 0;
1156 		if (flags & O_EXLOCK)
1157 			lf.l_type = F_WRLCK;
1158 		else
1159 			lf.l_type = F_RDLCK;
1160 		type = F_FLOCK;
1161 		if ((flags & FNONBLOCK) == 0)
1162 			type |= F_WAIT;
1163 		VOP_UNLOCK(vp);
1164 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1165 		if (error) {
1166 			fdplock(fdp);
1167 			/* closef will vn_close the file for us. */
1168 			fdremove(fdp, indx);
1169 			fdpunlock(fdp);
1170 			closef(fp, p);
1171 			return (error);
1172 		}
1173 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1174 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1175 	}
1176 	if (localtrunc) {
1177 		if ((fp->f_flag & FWRITE) == 0)
1178 			error = EACCES;
1179 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1180 			error = EROFS;
1181 		else if (vp->v_type == VDIR)
1182 			error = EISDIR;
1183 		else if ((error = vn_writechk(vp)) == 0) {
1184 			VATTR_NULL(&vattr);
1185 			vattr.va_size = 0;
1186 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1187 		}
1188 		if (error) {
1189 			VOP_UNLOCK(vp);
1190 			fdplock(fdp);
1191 			/* closef will close the file for us. */
1192 			fdremove(fdp, indx);
1193 			fdpunlock(fdp);
1194 			closef(fp, p);
1195 			return (error);
1196 		}
1197 	}
1198 	VOP_UNLOCK(vp);
1199 	*retval = indx;
1200 	fdplock(fdp);
1201 	fdinsert(fdp, indx, cloexec, fp);
1202 	fdpunlock(fdp);
1203 	FRELE(fp, p);
1204 	return (error);
1205 }
1206 
1207 /*
1208  * Open a new created file (in /tmp) suitable for mmaping.
1209  */
1210 int
1211 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1212 {
1213 	struct sys___tmpfd_args /* {
1214 		syscallarg(int) flags;
1215 	} */ *uap = v;
1216 	struct filedesc *fdp = p->p_fd;
1217 	struct file *fp;
1218 	struct vnode *vp;
1219 	int oflags = SCARG(uap, flags);
1220 	int flags, cloexec, cmode;
1221 	int indx, error;
1222 	unsigned int i;
1223 	struct nameidata nd;
1224 	char path[64];
1225 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1226 
1227 	/* most flags are hardwired */
1228 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1229 
1230 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1231 
1232 	fdplock(fdp);
1233 	if ((error = falloc(p, &fp, &indx)) != 0) {
1234 		fdpunlock(fdp);
1235 		return (error);
1236 	}
1237 	fdpunlock(fdp);
1238 
1239 	flags = FFLAGS(oflags);
1240 
1241 	arc4random_buf(path, sizeof(path));
1242 	memcpy(path, "/tmp/", 5);
1243 	for (i = 5; i < sizeof(path) - 1; i++)
1244 		path[i] = letters[(unsigned char)path[i] & 63];
1245 	path[sizeof(path)-1] = 0;
1246 
1247 	cmode = 0600;
1248 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1249 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1250 		if (error == ERESTART)
1251 			error = EINTR;
1252 		fdplock(fdp);
1253 		fdremove(fdp, indx);
1254 		fdpunlock(fdp);
1255 		closef(fp, p);
1256 		return (error);
1257 	}
1258 	vp = nd.ni_vp;
1259 	fp->f_flag = flags & FMASK;
1260 	fp->f_type = DTYPE_VNODE;
1261 	fp->f_ops = &vnops;
1262 	fp->f_data = vp;
1263 	VOP_UNLOCK(vp);
1264 	*retval = indx;
1265 	fdplock(fdp);
1266 	fdinsert(fdp, indx, cloexec, fp);
1267 	fdpunlock(fdp);
1268 	FRELE(fp, p);
1269 
1270 	/* unlink it */
1271 	/* XXX
1272 	 * there is a wee race here, although it is mostly inconsequential.
1273 	 * perhaps someday we can create a file like object without a name...
1274 	 */
1275 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1276 	    AT_FDCWD, path, p);
1277 	if ((error = namei(&nd)) != 0) {
1278 		printf("can't unlink temp file! %d\n", error);
1279 		error = 0;
1280 	} else {
1281 		vp = nd.ni_vp;
1282 		uvm_vnp_uncache(vp);
1283 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1284 		if (error) {
1285 			printf("error removing vop: %d\n", error);
1286 			error = 0;
1287 		}
1288 	}
1289 
1290 	return (error);
1291 }
1292 
1293 /*
1294  * Get file handle system call
1295  */
1296 int
1297 sys_getfh(struct proc *p, void *v, register_t *retval)
1298 {
1299 	struct sys_getfh_args /* {
1300 		syscallarg(const char *) fname;
1301 		syscallarg(fhandle_t *) fhp;
1302 	} */ *uap = v;
1303 	struct vnode *vp;
1304 	fhandle_t fh;
1305 	int error;
1306 	struct nameidata nd;
1307 
1308 	/*
1309 	 * Must be super user
1310 	 */
1311 	error = suser(p);
1312 	if (error)
1313 		return (error);
1314 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1315 	    SCARG(uap, fname), p);
1316 	error = namei(&nd);
1317 	if (error)
1318 		return (error);
1319 	vp = nd.ni_vp;
1320 	memset(&fh, 0, sizeof(fh));
1321 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1322 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1323 	vput(vp);
1324 	if (error)
1325 		return (error);
1326 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1327 	return (error);
1328 }
1329 
1330 /*
1331  * Open a file given a file handle.
1332  *
1333  * Check permissions, allocate an open file structure,
1334  * and call the device open routine if any.
1335  */
1336 int
1337 sys_fhopen(struct proc *p, void *v, register_t *retval)
1338 {
1339 	struct sys_fhopen_args /* {
1340 		syscallarg(const fhandle_t *) fhp;
1341 		syscallarg(int) flags;
1342 	} */ *uap = v;
1343 	struct filedesc *fdp = p->p_fd;
1344 	struct file *fp;
1345 	struct vnode *vp = NULL;
1346 	struct mount *mp;
1347 	struct ucred *cred = p->p_ucred;
1348 	int flags, cloexec;
1349 	int type, indx, error=0;
1350 	struct flock lf;
1351 	struct vattr va;
1352 	fhandle_t fh;
1353 
1354 	/*
1355 	 * Must be super user
1356 	 */
1357 	if ((error = suser(p)))
1358 		return (error);
1359 
1360 	flags = FFLAGS(SCARG(uap, flags));
1361 	if ((flags & (FREAD | FWRITE)) == 0)
1362 		return (EINVAL);
1363 	if ((flags & O_CREAT))
1364 		return (EINVAL);
1365 
1366 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1367 
1368 	fdplock(fdp);
1369 	if ((error = falloc(p, &fp, &indx)) != 0) {
1370 		fdpunlock(fdp);
1371 		fp = NULL;
1372 		goto bad;
1373 	}
1374 	fdpunlock(fdp);
1375 
1376 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1377 		goto bad;
1378 
1379 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1380 		error = ESTALE;
1381 		goto bad;
1382 	}
1383 
1384 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1385 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1386 		goto bad;
1387 	}
1388 
1389 	/* Now do an effective vn_open */
1390 
1391 	if (vp->v_type == VSOCK) {
1392 		error = EOPNOTSUPP;
1393 		goto bad;
1394 	}
1395 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1396 		error = ENOTDIR;
1397 		goto bad;
1398 	}
1399 	if (flags & FREAD) {
1400 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1401 			goto bad;
1402 	}
1403 	if (flags & (FWRITE | O_TRUNC)) {
1404 		if (vp->v_type == VDIR) {
1405 			error = EISDIR;
1406 			goto bad;
1407 		}
1408 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1409 		    (error = vn_writechk(vp)) != 0)
1410 			goto bad;
1411 	}
1412 	if (flags & O_TRUNC) {
1413 		VATTR_NULL(&va);
1414 		va.va_size = 0;
1415 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1416 			goto bad;
1417 	}
1418 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1419 		goto bad;
1420 	if (flags & FWRITE)
1421 		vp->v_writecount++;
1422 
1423 	/* done with modified vn_open, now finish what sys_open does. */
1424 
1425 	fp->f_flag = flags & FMASK;
1426 	fp->f_type = DTYPE_VNODE;
1427 	fp->f_ops = &vnops;
1428 	fp->f_data = vp;
1429 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1430 		lf.l_whence = SEEK_SET;
1431 		lf.l_start = 0;
1432 		lf.l_len = 0;
1433 		if (flags & O_EXLOCK)
1434 			lf.l_type = F_WRLCK;
1435 		else
1436 			lf.l_type = F_RDLCK;
1437 		type = F_FLOCK;
1438 		if ((flags & FNONBLOCK) == 0)
1439 			type |= F_WAIT;
1440 		VOP_UNLOCK(vp);
1441 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1442 		if (error) {
1443 			vp = NULL;	/* closef will vn_close the file */
1444 			goto bad;
1445 		}
1446 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1447 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1448 	}
1449 	VOP_UNLOCK(vp);
1450 	*retval = indx;
1451 	fdplock(fdp);
1452 	fdinsert(fdp, indx, cloexec, fp);
1453 	fdpunlock(fdp);
1454 	FRELE(fp, p);
1455 	return (0);
1456 
1457 bad:
1458 	if (fp) {
1459 		fdplock(fdp);
1460 		fdremove(fdp, indx);
1461 		fdpunlock(fdp);
1462 		closef(fp, p);
1463 		if (vp != NULL)
1464 			vput(vp);
1465 	}
1466 	return (error);
1467 }
1468 
1469 int
1470 sys_fhstat(struct proc *p, void *v, register_t *retval)
1471 {
1472 	struct sys_fhstat_args /* {
1473 		syscallarg(const fhandle_t *) fhp;
1474 		syscallarg(struct stat *) sb;
1475 	} */ *uap = v;
1476 	struct stat sb;
1477 	int error;
1478 	fhandle_t fh;
1479 	struct mount *mp;
1480 	struct vnode *vp;
1481 
1482 	/*
1483 	 * Must be super user
1484 	 */
1485 	if ((error = suser(p)))
1486 		return (error);
1487 
1488 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1489 		return (error);
1490 
1491 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1492 		return (ESTALE);
1493 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1494 		return (error);
1495 	error = vn_stat(vp, &sb, p);
1496 	vput(vp);
1497 	if (error)
1498 		return (error);
1499 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1500 	return (error);
1501 }
1502 
1503 int
1504 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1505 {
1506 	struct sys_fhstatfs_args /* {
1507 		syscallarg(const fhandle_t *) fhp;
1508 		syscallarg(struct statfs *) buf;
1509 	} */ *uap = v;
1510 	struct statfs *sp;
1511 	fhandle_t fh;
1512 	struct mount *mp;
1513 	struct vnode *vp;
1514 	int error;
1515 
1516 	/*
1517 	 * Must be super user
1518 	 */
1519 	if ((error = suser(p)))
1520 		return (error);
1521 
1522 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1523 		return (error);
1524 
1525 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1526 		return (ESTALE);
1527 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1528 		return (error);
1529 	mp = vp->v_mount;
1530 	sp = &mp->mnt_stat;
1531 	vput(vp);
1532 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1533 		return (error);
1534 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1535 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1536 }
1537 
1538 /*
1539  * Create a special file or named pipe.
1540  */
1541 int
1542 sys_mknod(struct proc *p, void *v, register_t *retval)
1543 {
1544 	struct sys_mknod_args /* {
1545 		syscallarg(const char *) path;
1546 		syscallarg(mode_t) mode;
1547 		syscallarg(int) dev;
1548 	} */ *uap = v;
1549 
1550 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1551 	    SCARG(uap, dev)));
1552 }
1553 
1554 int
1555 sys_mknodat(struct proc *p, void *v, register_t *retval)
1556 {
1557 	struct sys_mknodat_args /* {
1558 		syscallarg(int) fd;
1559 		syscallarg(const char *) path;
1560 		syscallarg(mode_t) mode;
1561 		syscallarg(dev_t) dev;
1562 	} */ *uap = v;
1563 
1564 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1565 	    SCARG(uap, mode), SCARG(uap, dev)));
1566 }
1567 
1568 int
1569 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1570 {
1571 	struct vnode *vp;
1572 	struct vattr vattr;
1573 	int error;
1574 	struct nameidata nd;
1575 
1576 	if (dev == VNOVAL)
1577 		return (EINVAL);
1578 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1579 	nd.ni_pledge = PLEDGE_DPATH;
1580 	nd.ni_unveil = UNVEIL_CREATE;
1581 	if ((error = namei(&nd)) != 0)
1582 		return (error);
1583 	vp = nd.ni_vp;
1584 	if (!S_ISFIFO(mode) || dev != 0) {
1585 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1586 			goto out;
1587 		if (p->p_fd->fd_rdir) {
1588 			error = EINVAL;
1589 			goto out;
1590 		}
1591 	}
1592 	if (vp != NULL)
1593 		error = EEXIST;
1594 	else {
1595 		VATTR_NULL(&vattr);
1596 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1597 		if ((p->p_p->ps_flags & PS_PLEDGE))
1598 			vattr.va_mode &= ACCESSPERMS;
1599 		vattr.va_rdev = dev;
1600 
1601 		switch (mode & S_IFMT) {
1602 		case S_IFMT:	/* used by badsect to flag bad sectors */
1603 			vattr.va_type = VBAD;
1604 			break;
1605 		case S_IFCHR:
1606 			vattr.va_type = VCHR;
1607 			break;
1608 		case S_IFBLK:
1609 			vattr.va_type = VBLK;
1610 			break;
1611 		case S_IFIFO:
1612 #ifndef FIFO
1613 			error = EOPNOTSUPP;
1614 			break;
1615 #else
1616 			if (dev == 0) {
1617 				vattr.va_type = VFIFO;
1618 				break;
1619 			}
1620 			/* FALLTHROUGH */
1621 #endif /* FIFO */
1622 		default:
1623 			error = EINVAL;
1624 			break;
1625 		}
1626 	}
1627 out:
1628 	if (!error) {
1629 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1630 		vput(nd.ni_dvp);
1631 	} else {
1632 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1633 		if (nd.ni_dvp == vp)
1634 			vrele(nd.ni_dvp);
1635 		else
1636 			vput(nd.ni_dvp);
1637 		if (vp)
1638 			vrele(vp);
1639 	}
1640 	return (error);
1641 }
1642 
1643 /*
1644  * Create a named pipe.
1645  */
1646 int
1647 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1648 {
1649 	struct sys_mkfifo_args /* {
1650 		syscallarg(const char *) path;
1651 		syscallarg(mode_t) mode;
1652 	} */ *uap = v;
1653 
1654 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1655 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1656 }
1657 
1658 int
1659 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1660 {
1661 	struct sys_mkfifoat_args /* {
1662 		syscallarg(int) fd;
1663 		syscallarg(const char *) path;
1664 		syscallarg(mode_t) mode;
1665 	} */ *uap = v;
1666 
1667 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1668 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1669 }
1670 
1671 /*
1672  * Make a hard file link.
1673  */
1674 int
1675 sys_link(struct proc *p, void *v, register_t *retval)
1676 {
1677 	struct sys_link_args /* {
1678 		syscallarg(const char *) path;
1679 		syscallarg(const char *) link;
1680 	} */ *uap = v;
1681 
1682 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1683 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1684 }
1685 
1686 int
1687 sys_linkat(struct proc *p, void *v, register_t *retval)
1688 {
1689 	struct sys_linkat_args /* {
1690 		syscallarg(int) fd1;
1691 		syscallarg(const char *) path1;
1692 		syscallarg(int) fd2;
1693 		syscallarg(const char *) path2;
1694 		syscallarg(int) flag;
1695 	} */ *uap = v;
1696 
1697 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1698 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1699 }
1700 
1701 int
1702 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1703     const char *path2, int flag)
1704 {
1705 	struct vnode *vp;
1706 	struct nameidata nd;
1707 	int error, follow;
1708 	int flags;
1709 
1710 	if (flag & ~AT_SYMLINK_FOLLOW)
1711 		return (EINVAL);
1712 
1713 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1714 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1715 	nd.ni_pledge = PLEDGE_RPATH;
1716 	nd.ni_unveil = UNVEIL_READ;
1717 	if ((error = namei(&nd)) != 0)
1718 		return (error);
1719 	vp = nd.ni_vp;
1720 
1721 	flags = LOCKPARENT;
1722 	if (vp->v_type == VDIR) {
1723 		flags |= STRIPSLASHES;
1724 	}
1725 
1726 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1727 	nd.ni_pledge = PLEDGE_CPATH;
1728 	nd.ni_unveil = UNVEIL_CREATE;
1729 	if ((error = namei(&nd)) != 0)
1730 		goto out;
1731 	if (nd.ni_vp) {
1732 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1733 		if (nd.ni_dvp == nd.ni_vp)
1734 			vrele(nd.ni_dvp);
1735 		else
1736 			vput(nd.ni_dvp);
1737 		vrele(nd.ni_vp);
1738 		error = EEXIST;
1739 		goto out;
1740 	}
1741 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1742 out:
1743 	vrele(vp);
1744 	return (error);
1745 }
1746 
1747 /*
1748  * Make a symbolic link.
1749  */
1750 int
1751 sys_symlink(struct proc *p, void *v, register_t *retval)
1752 {
1753 	struct sys_symlink_args /* {
1754 		syscallarg(const char *) path;
1755 		syscallarg(const char *) link;
1756 	} */ *uap = v;
1757 
1758 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1759 }
1760 
1761 int
1762 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1763 {
1764 	struct sys_symlinkat_args /* {
1765 		syscallarg(const char *) path;
1766 		syscallarg(int) fd;
1767 		syscallarg(const char *) link;
1768 	} */ *uap = v;
1769 
1770 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1771 	    SCARG(uap, link)));
1772 }
1773 
1774 int
1775 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1776 {
1777 	struct vattr vattr;
1778 	char *path;
1779 	int error;
1780 	struct nameidata nd;
1781 
1782 	path = pool_get(&namei_pool, PR_WAITOK);
1783 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1784 	if (error)
1785 		goto out;
1786 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1787 	nd.ni_pledge = PLEDGE_CPATH;
1788 	nd.ni_unveil = UNVEIL_CREATE;
1789 	if ((error = namei(&nd)) != 0)
1790 		goto out;
1791 	if (nd.ni_vp) {
1792 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1793 		if (nd.ni_dvp == nd.ni_vp)
1794 			vrele(nd.ni_dvp);
1795 		else
1796 			vput(nd.ni_dvp);
1797 		vrele(nd.ni_vp);
1798 		error = EEXIST;
1799 		goto out;
1800 	}
1801 	VATTR_NULL(&vattr);
1802 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1803 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1804 out:
1805 	pool_put(&namei_pool, path);
1806 	return (error);
1807 }
1808 
1809 /*
1810  * Delete a name from the filesystem.
1811  */
1812 int
1813 sys_unlink(struct proc *p, void *v, register_t *retval)
1814 {
1815 	struct sys_unlink_args /* {
1816 		syscallarg(const char *) path;
1817 	} */ *uap = v;
1818 
1819 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1820 }
1821 
1822 int
1823 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1824 {
1825 	struct sys_unlinkat_args /* {
1826 		syscallarg(int) fd;
1827 		syscallarg(const char *) path;
1828 		syscallarg(int) flag;
1829 	} */ *uap = v;
1830 
1831 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1832 	    SCARG(uap, flag)));
1833 }
1834 
1835 int
1836 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1837 {
1838 	struct vnode *vp;
1839 	int error;
1840 	struct nameidata nd;
1841 
1842 	if (flag & ~AT_REMOVEDIR)
1843 		return (EINVAL);
1844 
1845 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1846 	    fd, path, p);
1847 	nd.ni_pledge = PLEDGE_CPATH;
1848 	nd.ni_unveil = UNVEIL_CREATE;
1849 	if ((error = namei(&nd)) != 0)
1850 		return (error);
1851 	vp = nd.ni_vp;
1852 
1853 	if (flag & AT_REMOVEDIR) {
1854 		if (vp->v_type != VDIR) {
1855 			error = ENOTDIR;
1856 			goto out;
1857 		}
1858 		/*
1859 		 * No rmdir "." please.
1860 		 */
1861 		if (nd.ni_dvp == vp) {
1862 			error = EINVAL;
1863 			goto out;
1864 		}
1865 		/*
1866 		 * A mounted on directory cannot be deleted.
1867 		 */
1868 		if (vp->v_mountedhere != NULL) {
1869 			error = EBUSY;
1870 			goto out;
1871 		}
1872 	}
1873 
1874 	/*
1875 	 * The root of a mounted filesystem cannot be deleted.
1876 	 */
1877 	if (vp->v_flag & VROOT)
1878 		error = EBUSY;
1879 out:
1880 	if (!error) {
1881 		if (flag & AT_REMOVEDIR) {
1882 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1883 		} else {
1884 			(void)uvm_vnp_uncache(vp);
1885 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1886 		}
1887 	} else {
1888 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1889 		if (nd.ni_dvp == vp)
1890 			vrele(nd.ni_dvp);
1891 		else
1892 			vput(nd.ni_dvp);
1893 		vput(vp);
1894 	}
1895 	return (error);
1896 }
1897 
1898 /*
1899  * Reposition read/write file offset.
1900  */
1901 int
1902 sys_lseek(struct proc *p, void *v, register_t *retval)
1903 {
1904 	struct sys_lseek_args /* {
1905 		syscallarg(int) fd;
1906 		syscallarg(int) pad;
1907 		syscallarg(off_t) offset;
1908 		syscallarg(int) whence;
1909 	} */ *uap = v;
1910 	struct filedesc *fdp = p->p_fd;
1911 	struct file *fp;
1912 	off_t offset;
1913 	int error;
1914 
1915 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1916 		return (EBADF);
1917 	if (fp->f_ops->fo_seek == NULL) {
1918 		error = ESPIPE;
1919 		goto bad;
1920 	}
1921 	offset = SCARG(uap, offset);
1922 
1923 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1924 	if (error)
1925 		goto bad;
1926 
1927 	*(off_t *)retval = offset;
1928 	mtx_enter(&fp->f_mtx);
1929 	fp->f_seek++;
1930 	mtx_leave(&fp->f_mtx);
1931 	error = 0;
1932  bad:
1933 	FRELE(fp, p);
1934 	return (error);
1935 }
1936 
1937 /*
1938  * Check access permissions.
1939  */
1940 int
1941 sys_access(struct proc *p, void *v, register_t *retval)
1942 {
1943 	struct sys_access_args /* {
1944 		syscallarg(const char *) path;
1945 		syscallarg(int) amode;
1946 	} */ *uap = v;
1947 
1948 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1949 	    SCARG(uap, amode), 0));
1950 }
1951 
1952 int
1953 sys_faccessat(struct proc *p, void *v, register_t *retval)
1954 {
1955 	struct sys_faccessat_args /* {
1956 		syscallarg(int) fd;
1957 		syscallarg(const char *) path;
1958 		syscallarg(int) amode;
1959 		syscallarg(int) flag;
1960 	} */ *uap = v;
1961 
1962 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1963 	    SCARG(uap, amode), SCARG(uap, flag)));
1964 }
1965 
1966 int
1967 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1968 {
1969 	struct vnode *vp;
1970 	struct ucred *newcred, *oldcred;
1971 	struct nameidata nd;
1972 	int error;
1973 
1974 	if (amode & ~(R_OK | W_OK | X_OK))
1975 		return (EINVAL);
1976 	if (flag & ~AT_EACCESS)
1977 		return (EINVAL);
1978 
1979 	newcred = NULL;
1980 	oldcred = p->p_ucred;
1981 
1982 	/*
1983 	 * If access as real ids was requested and they really differ,
1984 	 * give the thread new creds with them reset
1985 	 */
1986 	if ((flag & AT_EACCESS) == 0 &&
1987 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1988 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1989 		p->p_ucred = newcred = crdup(oldcred);
1990 		newcred->cr_uid = newcred->cr_ruid;
1991 		newcred->cr_gid = newcred->cr_rgid;
1992 	}
1993 
1994 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1995 	nd.ni_pledge = PLEDGE_RPATH;
1996 	nd.ni_unveil = UNVEIL_READ;
1997 	if ((error = namei(&nd)) != 0)
1998 		goto out;
1999 	vp = nd.ni_vp;
2000 
2001 	/* Flags == 0 means only check for existence. */
2002 	if (amode) {
2003 		int vflags = 0;
2004 
2005 		if (amode & R_OK)
2006 			vflags |= VREAD;
2007 		if (amode & W_OK)
2008 			vflags |= VWRITE;
2009 		if (amode & X_OK)
2010 			vflags |= VEXEC;
2011 
2012 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2013 		if (!error && (vflags & VWRITE))
2014 			error = vn_writechk(vp);
2015 	}
2016 	vput(vp);
2017 out:
2018 	if (newcred != NULL) {
2019 		p->p_ucred = oldcred;
2020 		crfree(newcred);
2021 	}
2022 	return (error);
2023 }
2024 
2025 /*
2026  * Get file status; this version follows links.
2027  */
2028 int
2029 sys_stat(struct proc *p, void *v, register_t *retval)
2030 {
2031 	struct sys_stat_args /* {
2032 		syscallarg(const char *) path;
2033 		syscallarg(struct stat *) ub;
2034 	} */ *uap = v;
2035 
2036 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2037 }
2038 
2039 int
2040 sys_fstatat(struct proc *p, void *v, register_t *retval)
2041 {
2042 	struct sys_fstatat_args /* {
2043 		syscallarg(int) fd;
2044 		syscallarg(const char *) path;
2045 		syscallarg(struct stat *) buf;
2046 		syscallarg(int) flag;
2047 	} */ *uap = v;
2048 
2049 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2050 	    SCARG(uap, buf), SCARG(uap, flag)));
2051 }
2052 
2053 int
2054 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2055 {
2056 	struct stat sb;
2057 	int error, follow;
2058 	struct nameidata nd;
2059 
2060 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2061 		return (EINVAL);
2062 
2063 
2064 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2065 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2066 	nd.ni_pledge = PLEDGE_RPATH;
2067 	nd.ni_unveil = UNVEIL_READ;
2068 	if ((error = namei(&nd)) != 0)
2069 		return (error);
2070 	error = vn_stat(nd.ni_vp, &sb, p);
2071 	vput(nd.ni_vp);
2072 	if (error)
2073 		return (error);
2074 	/* Don't let non-root see generation numbers (for NFS security) */
2075 	if (suser(p))
2076 		sb.st_gen = 0;
2077 	error = copyout(&sb, buf, sizeof(sb));
2078 #ifdef KTRACE
2079 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2080 		ktrstat(p, &sb);
2081 #endif
2082 	return (error);
2083 }
2084 
2085 /*
2086  * Get file status; this version does not follow links.
2087  */
2088 int
2089 sys_lstat(struct proc *p, void *v, register_t *retval)
2090 {
2091 	struct sys_lstat_args /* {
2092 		syscallarg(const char *) path;
2093 		syscallarg(struct stat *) ub;
2094 	} */ *uap = v;
2095 
2096 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2097 	    AT_SYMLINK_NOFOLLOW));
2098 }
2099 
2100 /*
2101  * Get configurable pathname variables.
2102  */
2103 int
2104 sys_pathconf(struct proc *p, void *v, register_t *retval)
2105 {
2106 	struct sys_pathconf_args /* {
2107 		syscallarg(const char *) path;
2108 		syscallarg(int) name;
2109 	} */ *uap = v;
2110 	int error;
2111 	struct nameidata nd;
2112 
2113 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2114 	    SCARG(uap, path), p);
2115 	nd.ni_pledge = PLEDGE_RPATH;
2116 	nd.ni_unveil = UNVEIL_READ;
2117 	if ((error = namei(&nd)) != 0)
2118 		return (error);
2119 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2120 	vput(nd.ni_vp);
2121 	return (error);
2122 }
2123 
2124 /*
2125  * Return target name of a symbolic link.
2126  */
2127 int
2128 sys_readlink(struct proc *p, void *v, register_t *retval)
2129 {
2130 	struct sys_readlink_args /* {
2131 		syscallarg(const char *) path;
2132 		syscallarg(char *) buf;
2133 		syscallarg(size_t) count;
2134 	} */ *uap = v;
2135 
2136 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2137 	    SCARG(uap, count), retval));
2138 }
2139 
2140 int
2141 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2142 {
2143 	struct sys_readlinkat_args /* {
2144 		syscallarg(int) fd;
2145 		syscallarg(const char *) path;
2146 		syscallarg(char *) buf;
2147 		syscallarg(size_t) count;
2148 	} */ *uap = v;
2149 
2150 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2151 	    SCARG(uap, buf), SCARG(uap, count), retval));
2152 }
2153 
2154 int
2155 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2156     size_t count, register_t *retval)
2157 {
2158 	struct vnode *vp;
2159 	struct iovec aiov;
2160 	struct uio auio;
2161 	int error;
2162 	struct nameidata nd;
2163 
2164 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2165 	nd.ni_pledge = PLEDGE_RPATH;
2166 	nd.ni_unveil = UNVEIL_READ;
2167 	if ((error = namei(&nd)) != 0)
2168 		return (error);
2169 	vp = nd.ni_vp;
2170 	if (vp->v_type != VLNK)
2171 		error = EINVAL;
2172 	else {
2173 		aiov.iov_base = buf;
2174 		aiov.iov_len = count;
2175 		auio.uio_iov = &aiov;
2176 		auio.uio_iovcnt = 1;
2177 		auio.uio_offset = 0;
2178 		auio.uio_rw = UIO_READ;
2179 		auio.uio_segflg = UIO_USERSPACE;
2180 		auio.uio_procp = p;
2181 		auio.uio_resid = count;
2182 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2183 		*retval = count - auio.uio_resid;
2184 	}
2185 	vput(vp);
2186 	return (error);
2187 }
2188 
2189 /*
2190  * Change flags of a file given a path name.
2191  */
2192 int
2193 sys_chflags(struct proc *p, void *v, register_t *retval)
2194 {
2195 	struct sys_chflags_args /* {
2196 		syscallarg(const char *) path;
2197 		syscallarg(u_int) flags;
2198 	} */ *uap = v;
2199 
2200 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2201 	    SCARG(uap, flags), 0));
2202 }
2203 
2204 int
2205 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2206 {
2207 	struct sys_chflagsat_args /* {
2208 		syscallarg(int) fd;
2209 		syscallarg(const char *) path;
2210 		syscallarg(u_int) flags;
2211 		syscallarg(int) atflags;
2212 	} */ *uap = v;
2213 
2214 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2215 	    SCARG(uap, flags), SCARG(uap, atflags)));
2216 }
2217 
2218 int
2219 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2220 {
2221 	struct nameidata nd;
2222 	int error, follow;
2223 
2224 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2225 		return (EINVAL);
2226 
2227 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2228 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2229 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2230 	nd.ni_unveil = UNVEIL_WRITE;
2231 	if ((error = namei(&nd)) != 0)
2232 		return (error);
2233 	return (dovchflags(p, nd.ni_vp, flags));
2234 }
2235 
2236 /*
2237  * Change flags of a file given a file descriptor.
2238  */
2239 int
2240 sys_fchflags(struct proc *p, void *v, register_t *retval)
2241 {
2242 	struct sys_fchflags_args /* {
2243 		syscallarg(int) fd;
2244 		syscallarg(u_int) flags;
2245 	} */ *uap = v;
2246 	struct file *fp;
2247 	struct vnode *vp;
2248 	int error;
2249 
2250 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2251 		return (error);
2252 	vp = fp->f_data;
2253 	vref(vp);
2254 	FRELE(fp, p);
2255 	return (dovchflags(p, vp, SCARG(uap, flags)));
2256 }
2257 
2258 int
2259 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2260 {
2261 	struct vattr vattr;
2262 	int error;
2263 
2264 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2265 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2266 		error = EROFS;
2267 	else if (flags == VNOVAL)
2268 		error = EINVAL;
2269 	else {
2270 		if (suser(p)) {
2271 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2272 			    != 0)
2273 				goto out;
2274 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2275 				error = EINVAL;
2276 				goto out;
2277 			}
2278 		}
2279 		VATTR_NULL(&vattr);
2280 		vattr.va_flags = flags;
2281 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2282 	}
2283 out:
2284 	vput(vp);
2285 	return (error);
2286 }
2287 
2288 /*
2289  * Change mode of a file given path name.
2290  */
2291 int
2292 sys_chmod(struct proc *p, void *v, register_t *retval)
2293 {
2294 	struct sys_chmod_args /* {
2295 		syscallarg(const char *) path;
2296 		syscallarg(mode_t) mode;
2297 	} */ *uap = v;
2298 
2299 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2300 }
2301 
2302 int
2303 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2304 {
2305 	struct sys_fchmodat_args /* {
2306 		syscallarg(int) fd;
2307 		syscallarg(const char *) path;
2308 		syscallarg(mode_t) mode;
2309 		syscallarg(int) flag;
2310 	} */ *uap = v;
2311 
2312 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2313 	    SCARG(uap, mode), SCARG(uap, flag)));
2314 }
2315 
2316 int
2317 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2318 {
2319 	struct vnode *vp;
2320 	struct vattr vattr;
2321 	int error, follow;
2322 	struct nameidata nd;
2323 
2324 	if (mode & ~(S_IFMT | ALLPERMS))
2325 		return (EINVAL);
2326 	if ((p->p_p->ps_flags & PS_PLEDGE))
2327 		mode &= ACCESSPERMS;
2328 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2329 		return (EINVAL);
2330 
2331 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2332 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2333 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2334 	nd.ni_unveil = UNVEIL_WRITE;
2335 	if ((error = namei(&nd)) != 0)
2336 		return (error);
2337 	vp = nd.ni_vp;
2338 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2339 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2340 		error = EROFS;
2341 	else {
2342 		VATTR_NULL(&vattr);
2343 		vattr.va_mode = mode & ALLPERMS;
2344 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2345 	}
2346 	vput(vp);
2347 	return (error);
2348 }
2349 
2350 /*
2351  * Change mode of a file given a file descriptor.
2352  */
2353 int
2354 sys_fchmod(struct proc *p, void *v, register_t *retval)
2355 {
2356 	struct sys_fchmod_args /* {
2357 		syscallarg(int) fd;
2358 		syscallarg(mode_t) mode;
2359 	} */ *uap = v;
2360 	struct vattr vattr;
2361 	struct vnode *vp;
2362 	struct file *fp;
2363 	mode_t mode = SCARG(uap, mode);
2364 	int error;
2365 
2366 	if (mode & ~(S_IFMT | ALLPERMS))
2367 		return (EINVAL);
2368 	if ((p->p_p->ps_flags & PS_PLEDGE))
2369 		mode &= ACCESSPERMS;
2370 
2371 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2372 		return (error);
2373 	vp = fp->f_data;
2374 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2375 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2376 		error = EROFS;
2377 	else {
2378 		VATTR_NULL(&vattr);
2379 		vattr.va_mode = mode & ALLPERMS;
2380 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2381 	}
2382 	VOP_UNLOCK(vp);
2383 	FRELE(fp, p);
2384 	return (error);
2385 }
2386 
2387 /*
2388  * Set ownership given a path name.
2389  */
2390 int
2391 sys_chown(struct proc *p, void *v, register_t *retval)
2392 {
2393 	struct sys_chown_args /* {
2394 		syscallarg(const char *) path;
2395 		syscallarg(uid_t) uid;
2396 		syscallarg(gid_t) gid;
2397 	} */ *uap = v;
2398 
2399 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2400 	    SCARG(uap, gid), 0));
2401 }
2402 
2403 int
2404 sys_fchownat(struct proc *p, void *v, register_t *retval)
2405 {
2406 	struct sys_fchownat_args /* {
2407 		syscallarg(int) fd;
2408 		syscallarg(const char *) path;
2409 		syscallarg(uid_t) uid;
2410 		syscallarg(gid_t) gid;
2411 		syscallarg(int) flag;
2412 	} */ *uap = v;
2413 
2414 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2415 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2416 }
2417 
2418 int
2419 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2420     int flag)
2421 {
2422 	struct vnode *vp;
2423 	struct vattr vattr;
2424 	int error, follow;
2425 	struct nameidata nd;
2426 	mode_t mode;
2427 
2428 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2429 		return (EINVAL);
2430 
2431 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2432 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2433 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2434 	nd.ni_unveil = UNVEIL_WRITE;
2435 	if ((error = namei(&nd)) != 0)
2436 		return (error);
2437 	vp = nd.ni_vp;
2438 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2439 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2440 		error = EROFS;
2441 	else {
2442 		if ((error = pledge_chown(p, uid, gid)))
2443 			goto out;
2444 		if ((uid != -1 || gid != -1) &&
2445 		    !vnoperm(vp) &&
2446 		    (suser(p) || suid_clear)) {
2447 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2448 			if (error)
2449 				goto out;
2450 			mode = vattr.va_mode & ~(VSUID | VSGID);
2451 			if (mode == vattr.va_mode)
2452 				mode = VNOVAL;
2453 		} else
2454 			mode = VNOVAL;
2455 		VATTR_NULL(&vattr);
2456 		vattr.va_uid = uid;
2457 		vattr.va_gid = gid;
2458 		vattr.va_mode = mode;
2459 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2460 	}
2461 out:
2462 	vput(vp);
2463 	return (error);
2464 }
2465 
2466 /*
2467  * Set ownership given a path name, without following links.
2468  */
2469 int
2470 sys_lchown(struct proc *p, void *v, register_t *retval)
2471 {
2472 	struct sys_lchown_args /* {
2473 		syscallarg(const char *) path;
2474 		syscallarg(uid_t) uid;
2475 		syscallarg(gid_t) gid;
2476 	} */ *uap = v;
2477 	struct vnode *vp;
2478 	struct vattr vattr;
2479 	int error;
2480 	struct nameidata nd;
2481 	mode_t mode;
2482 	uid_t uid = SCARG(uap, uid);
2483 	gid_t gid = SCARG(uap, gid);
2484 
2485 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2486 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2487 	nd.ni_unveil = UNVEIL_WRITE;
2488 	if ((error = namei(&nd)) != 0)
2489 		return (error);
2490 	vp = nd.ni_vp;
2491 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2492 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2493 		error = EROFS;
2494 	else {
2495 		if ((error = pledge_chown(p, uid, gid)))
2496 			goto out;
2497 		if ((uid != -1 || gid != -1) &&
2498 		    !vnoperm(vp) &&
2499 		    (suser(p) || suid_clear)) {
2500 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2501 			if (error)
2502 				goto out;
2503 			mode = vattr.va_mode & ~(VSUID | VSGID);
2504 			if (mode == vattr.va_mode)
2505 				mode = VNOVAL;
2506 		} else
2507 			mode = VNOVAL;
2508 		VATTR_NULL(&vattr);
2509 		vattr.va_uid = uid;
2510 		vattr.va_gid = gid;
2511 		vattr.va_mode = mode;
2512 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2513 	}
2514 out:
2515 	vput(vp);
2516 	return (error);
2517 }
2518 
2519 /*
2520  * Set ownership given a file descriptor.
2521  */
2522 int
2523 sys_fchown(struct proc *p, void *v, register_t *retval)
2524 {
2525 	struct sys_fchown_args /* {
2526 		syscallarg(int) fd;
2527 		syscallarg(uid_t) uid;
2528 		syscallarg(gid_t) gid;
2529 	} */ *uap = v;
2530 	struct vnode *vp;
2531 	struct vattr vattr;
2532 	int error;
2533 	struct file *fp;
2534 	mode_t mode;
2535 	uid_t uid = SCARG(uap, uid);
2536 	gid_t gid = SCARG(uap, gid);
2537 
2538 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2539 		return (error);
2540 	vp = fp->f_data;
2541 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2542 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2543 		error = EROFS;
2544 	else {
2545 		if ((error = pledge_chown(p, uid, gid)))
2546 			goto out;
2547 		if ((uid != -1 || gid != -1) &&
2548 		    !vnoperm(vp) &&
2549 		    (suser(p) || suid_clear)) {
2550 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2551 			if (error)
2552 				goto out;
2553 			mode = vattr.va_mode & ~(VSUID | VSGID);
2554 			if (mode == vattr.va_mode)
2555 				mode = VNOVAL;
2556 		} else
2557 			mode = VNOVAL;
2558 		VATTR_NULL(&vattr);
2559 		vattr.va_uid = uid;
2560 		vattr.va_gid = gid;
2561 		vattr.va_mode = mode;
2562 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2563 	}
2564 out:
2565 	VOP_UNLOCK(vp);
2566 	FRELE(fp, p);
2567 	return (error);
2568 }
2569 
2570 /*
2571  * Set the access and modification times given a path name.
2572  */
2573 int
2574 sys_utimes(struct proc *p, void *v, register_t *retval)
2575 {
2576 	struct sys_utimes_args /* {
2577 		syscallarg(const char *) path;
2578 		syscallarg(const struct timeval *) tptr;
2579 	} */ *uap = v;
2580 
2581 	struct timespec ts[2];
2582 	struct timeval tv[2];
2583 	const struct timeval *tvp;
2584 	int error;
2585 
2586 	tvp = SCARG(uap, tptr);
2587 	if (tvp != NULL) {
2588 		error = copyin(tvp, tv, sizeof(tv));
2589 		if (error)
2590 			return (error);
2591 #ifdef KTRACE
2592 		if (KTRPOINT(p, KTR_STRUCT))
2593 			ktrabstimeval(p, &tv);
2594 #endif
2595 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2596 			return (EINVAL);
2597 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2598 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2599 	} else
2600 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2601 
2602 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2603 }
2604 
2605 int
2606 sys_utimensat(struct proc *p, void *v, register_t *retval)
2607 {
2608 	struct sys_utimensat_args /* {
2609 		syscallarg(int) fd;
2610 		syscallarg(const char *) path;
2611 		syscallarg(const struct timespec *) times;
2612 		syscallarg(int) flag;
2613 	} */ *uap = v;
2614 
2615 	struct timespec ts[2];
2616 	const struct timespec *tsp;
2617 	int error, i;
2618 
2619 	tsp = SCARG(uap, times);
2620 	if (tsp != NULL) {
2621 		error = copyin(tsp, ts, sizeof(ts));
2622 		if (error)
2623 			return (error);
2624 		for (i = 0; i < nitems(ts); i++) {
2625 			if (ts[i].tv_nsec == UTIME_NOW)
2626 				continue;
2627 			if (ts[i].tv_nsec == UTIME_OMIT)
2628 				continue;
2629 #ifdef KTRACE
2630 			if (KTRPOINT(p, KTR_STRUCT))
2631 				ktrabstimespec(p, &ts[i]);
2632 #endif
2633 			if (!timespecisvalid(&ts[i]))
2634 				return (EINVAL);
2635 		}
2636 	} else
2637 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2638 
2639 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2640 	    SCARG(uap, flag)));
2641 }
2642 
2643 int
2644 doutimensat(struct proc *p, int fd, const char *path,
2645     struct timespec ts[2], int flag)
2646 {
2647 	struct vnode *vp;
2648 	int error, follow;
2649 	struct nameidata nd;
2650 
2651 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2652 		return (EINVAL);
2653 
2654 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2655 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2656 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2657 	nd.ni_unveil = UNVEIL_WRITE;
2658 	if ((error = namei(&nd)) != 0)
2659 		return (error);
2660 	vp = nd.ni_vp;
2661 
2662 	return (dovutimens(p, vp, ts));
2663 }
2664 
2665 int
2666 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2667 {
2668 	struct vattr vattr;
2669 	struct timespec now;
2670 	int error;
2671 
2672 #ifdef KTRACE
2673 	/* if they're both UTIME_NOW, then don't report either */
2674 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2675 	    KTRPOINT(p, KTR_STRUCT)) {
2676 		ktrabstimespec(p, &ts[0]);
2677 		ktrabstimespec(p, &ts[1]);
2678 	}
2679 #endif
2680 
2681 	VATTR_NULL(&vattr);
2682 
2683 	/*  make sure ctime is updated even if neither mtime nor atime is */
2684 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2685 
2686 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2687 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2688 			vattr.va_vaflags |= VA_UTIMES_NULL;
2689 
2690 		getnanotime(&now);
2691 		if (ts[0].tv_nsec == UTIME_NOW)
2692 			ts[0] = now;
2693 		if (ts[1].tv_nsec == UTIME_NOW)
2694 			ts[1] = now;
2695 	}
2696 
2697 	if (ts[0].tv_nsec != UTIME_OMIT)
2698 		vattr.va_atime = ts[0];
2699 	if (ts[1].tv_nsec != UTIME_OMIT)
2700 		vattr.va_mtime = ts[1];
2701 
2702 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2703 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2704 		error = EROFS;
2705 	else
2706 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2707 	vput(vp);
2708 	return (error);
2709 }
2710 
2711 /*
2712  * Set the access and modification times given a file descriptor.
2713  */
2714 int
2715 sys_futimes(struct proc *p, void *v, register_t *retval)
2716 {
2717 	struct sys_futimes_args /* {
2718 		syscallarg(int) fd;
2719 		syscallarg(const struct timeval *) tptr;
2720 	} */ *uap = v;
2721 	struct timeval tv[2];
2722 	struct timespec ts[2];
2723 	const struct timeval *tvp;
2724 	int error;
2725 
2726 	tvp = SCARG(uap, tptr);
2727 	if (tvp != NULL) {
2728 		error = copyin(tvp, tv, sizeof(tv));
2729 		if (error)
2730 			return (error);
2731 #ifdef KTRACE
2732 		if (KTRPOINT(p, KTR_STRUCT)) {
2733 			ktrabstimeval(p, &tv[0]);
2734 			ktrabstimeval(p, &tv[1]);
2735 		}
2736 #endif
2737 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2738 			return (EINVAL);
2739 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2740 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2741 	} else
2742 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2743 
2744 	return (dofutimens(p, SCARG(uap, fd), ts));
2745 }
2746 
2747 int
2748 sys_futimens(struct proc *p, void *v, register_t *retval)
2749 {
2750 	struct sys_futimens_args /* {
2751 		syscallarg(int) fd;
2752 		syscallarg(const struct timespec *) times;
2753 	} */ *uap = v;
2754 	struct timespec ts[2];
2755 	const struct timespec *tsp;
2756 	int error, i;
2757 
2758 	tsp = SCARG(uap, times);
2759 	if (tsp != NULL) {
2760 		error = copyin(tsp, ts, sizeof(ts));
2761 		if (error)
2762 			return (error);
2763 		for (i = 0; i < nitems(ts); i++) {
2764 			if (ts[i].tv_nsec == UTIME_NOW)
2765 				continue;
2766 			if (ts[i].tv_nsec == UTIME_OMIT)
2767 				continue;
2768 #ifdef KTRACE
2769 			if (KTRPOINT(p, KTR_STRUCT))
2770 				ktrabstimespec(p, &ts[i]);
2771 #endif
2772 			if (!timespecisvalid(&ts[i]))
2773 				return (EINVAL);
2774 		}
2775 	} else
2776 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2777 
2778 	return (dofutimens(p, SCARG(uap, fd), ts));
2779 }
2780 
2781 int
2782 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2783 {
2784 	struct file *fp;
2785 	struct vnode *vp;
2786 	int error;
2787 
2788 	if ((error = getvnode(p, fd, &fp)) != 0)
2789 		return (error);
2790 	vp = fp->f_data;
2791 	vref(vp);
2792 	FRELE(fp, p);
2793 
2794 	return (dovutimens(p, vp, ts));
2795 }
2796 
2797 /*
2798  * Truncate a file given its path name.
2799  */
2800 int
2801 sys_truncate(struct proc *p, void *v, register_t *retval)
2802 {
2803 	struct sys_truncate_args /* {
2804 		syscallarg(const char *) path;
2805 		syscallarg(int) pad;
2806 		syscallarg(off_t) length;
2807 	} */ *uap = v;
2808 	struct vnode *vp;
2809 	struct vattr vattr;
2810 	int error;
2811 	struct nameidata nd;
2812 
2813 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2814 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2815 	nd.ni_unveil = UNVEIL_WRITE;
2816 	if ((error = namei(&nd)) != 0)
2817 		return (error);
2818 	vp = nd.ni_vp;
2819 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2820 	if (vp->v_type == VDIR)
2821 		error = EISDIR;
2822 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2823 	    (error = vn_writechk(vp)) == 0) {
2824 		VATTR_NULL(&vattr);
2825 		vattr.va_size = SCARG(uap, length);
2826 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2827 	}
2828 	vput(vp);
2829 	return (error);
2830 }
2831 
2832 /*
2833  * Truncate a file given a file descriptor.
2834  */
2835 int
2836 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2837 {
2838 	struct sys_ftruncate_args /* {
2839 		syscallarg(int) fd;
2840 		syscallarg(int) pad;
2841 		syscallarg(off_t) length;
2842 	} */ *uap = v;
2843 	struct vattr vattr;
2844 	struct vnode *vp;
2845 	struct file *fp;
2846 	off_t len;
2847 	int error;
2848 
2849 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2850 		return (error);
2851 	len = SCARG(uap, length);
2852 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2853 		error = EINVAL;
2854 		goto bad;
2855 	}
2856 	vp = fp->f_data;
2857 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2858 	if (vp->v_type == VDIR)
2859 		error = EISDIR;
2860 	else if ((error = vn_writechk(vp)) == 0) {
2861 		VATTR_NULL(&vattr);
2862 		vattr.va_size = len;
2863 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2864 	}
2865 	VOP_UNLOCK(vp);
2866 bad:
2867 	FRELE(fp, p);
2868 	return (error);
2869 }
2870 
2871 /*
2872  * Sync an open file.
2873  */
2874 int
2875 sys_fsync(struct proc *p, void *v, register_t *retval)
2876 {
2877 	struct sys_fsync_args /* {
2878 		syscallarg(int) fd;
2879 	} */ *uap = v;
2880 	struct vnode *vp;
2881 	struct file *fp;
2882 	int error;
2883 
2884 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2885 		return (error);
2886 	vp = fp->f_data;
2887 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2888 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2889 #ifdef FFS_SOFTUPDATES
2890 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2891 		error = softdep_fsync(vp);
2892 #endif
2893 
2894 	VOP_UNLOCK(vp);
2895 	FRELE(fp, p);
2896 	return (error);
2897 }
2898 
2899 /*
2900  * Rename files.  Source and destination must either both be directories,
2901  * or both not be directories.  If target is a directory, it must be empty.
2902  */
2903 int
2904 sys_rename(struct proc *p, void *v, register_t *retval)
2905 {
2906 	struct sys_rename_args /* {
2907 		syscallarg(const char *) from;
2908 		syscallarg(const char *) to;
2909 	} */ *uap = v;
2910 
2911 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2912 	    SCARG(uap, to)));
2913 }
2914 
2915 int
2916 sys_renameat(struct proc *p, void *v, register_t *retval)
2917 {
2918 	struct sys_renameat_args /* {
2919 		syscallarg(int) fromfd;
2920 		syscallarg(const char *) from;
2921 		syscallarg(int) tofd;
2922 		syscallarg(const char *) to;
2923 	} */ *uap = v;
2924 
2925 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2926 	    SCARG(uap, tofd), SCARG(uap, to)));
2927 }
2928 
2929 int
2930 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2931     const char *to)
2932 {
2933 	struct vnode *tvp, *fvp, *tdvp;
2934 	struct nameidata fromnd, tond;
2935 	int error;
2936 	int flags;
2937 
2938 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2939 	    fromfd, from, p);
2940 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2941 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2942 	if ((error = namei(&fromnd)) != 0)
2943 		return (error);
2944 	fvp = fromnd.ni_vp;
2945 
2946 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2947 	/*
2948 	 * rename("foo/", "bar/");  is  OK
2949 	 */
2950 	if (fvp->v_type == VDIR)
2951 		flags |= STRIPSLASHES;
2952 
2953 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2954 	tond.ni_pledge = PLEDGE_CPATH;
2955 	tond.ni_unveil = UNVEIL_CREATE;
2956 	if ((error = namei(&tond)) != 0) {
2957 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2958 		vrele(fromnd.ni_dvp);
2959 		vrele(fvp);
2960 		goto out1;
2961 	}
2962 	tdvp = tond.ni_dvp;
2963 	tvp = tond.ni_vp;
2964 	if (tvp != NULL) {
2965 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2966 			error = ENOTDIR;
2967 			goto out;
2968 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2969 			error = EISDIR;
2970 			goto out;
2971 		}
2972 	}
2973 	if (fvp == tdvp)
2974 		error = EINVAL;
2975 	/*
2976 	 * If source is the same as the destination (that is the
2977 	 * same inode number)
2978 	 */
2979 	if (fvp == tvp)
2980 		error = -1;
2981 out:
2982 	if (!error) {
2983 		if (tvp) {
2984 			(void)uvm_vnp_uncache(tvp);
2985 		}
2986 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2987 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2988 	} else {
2989 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2990 		if (tdvp == tvp)
2991 			vrele(tdvp);
2992 		else
2993 			vput(tdvp);
2994 		if (tvp)
2995 			vput(tvp);
2996 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2997 		vrele(fromnd.ni_dvp);
2998 		vrele(fvp);
2999 	}
3000 	vrele(tond.ni_startdir);
3001 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3002 out1:
3003 	if (fromnd.ni_startdir)
3004 		vrele(fromnd.ni_startdir);
3005 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3006 	if (error == -1)
3007 		return (0);
3008 	return (error);
3009 }
3010 
3011 /*
3012  * Make a directory file.
3013  */
3014 int
3015 sys_mkdir(struct proc *p, void *v, register_t *retval)
3016 {
3017 	struct sys_mkdir_args /* {
3018 		syscallarg(const char *) path;
3019 		syscallarg(mode_t) mode;
3020 	} */ *uap = v;
3021 
3022 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3023 }
3024 
3025 int
3026 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3027 {
3028 	struct sys_mkdirat_args /* {
3029 		syscallarg(int) fd;
3030 		syscallarg(const char *) path;
3031 		syscallarg(mode_t) mode;
3032 	} */ *uap = v;
3033 
3034 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3035 	    SCARG(uap, mode)));
3036 }
3037 
3038 int
3039 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3040 {
3041 	struct vnode *vp;
3042 	struct vattr vattr;
3043 	int error;
3044 	struct nameidata nd;
3045 
3046 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3047 	    fd, path, p);
3048 	nd.ni_pledge = PLEDGE_CPATH;
3049 	nd.ni_unveil = UNVEIL_CREATE;
3050 	if ((error = namei(&nd)) != 0)
3051 		return (error);
3052 	vp = nd.ni_vp;
3053 	if (vp != NULL) {
3054 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3055 		if (nd.ni_dvp == vp)
3056 			vrele(nd.ni_dvp);
3057 		else
3058 			vput(nd.ni_dvp);
3059 		vrele(vp);
3060 		return (EEXIST);
3061 	}
3062 	VATTR_NULL(&vattr);
3063 	vattr.va_type = VDIR;
3064 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3065 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3066 	if (!error)
3067 		vput(nd.ni_vp);
3068 	return (error);
3069 }
3070 
3071 /*
3072  * Remove a directory file.
3073  */
3074 int
3075 sys_rmdir(struct proc *p, void *v, register_t *retval)
3076 {
3077 	struct sys_rmdir_args /* {
3078 		syscallarg(const char *) path;
3079 	} */ *uap = v;
3080 
3081 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3082 }
3083 
3084 /*
3085  * Read a block of directory entries in a file system independent format.
3086  */
3087 int
3088 sys_getdents(struct proc *p, void *v, register_t *retval)
3089 {
3090 	struct sys_getdents_args /* {
3091 		syscallarg(int) fd;
3092 		syscallarg(void *) buf;
3093 		syscallarg(size_t) buflen;
3094 	} */ *uap = v;
3095 	struct vnode *vp;
3096 	struct file *fp;
3097 	struct uio auio;
3098 	struct iovec aiov;
3099 	size_t buflen;
3100 	int error, eofflag;
3101 
3102 	buflen = SCARG(uap, buflen);
3103 
3104 	if (buflen > INT_MAX)
3105 		return (EINVAL);
3106 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3107 		return (error);
3108 	if ((fp->f_flag & FREAD) == 0) {
3109 		error = EBADF;
3110 		goto bad;
3111 	}
3112 	vp = fp->f_data;
3113 	if (vp->v_type != VDIR) {
3114 		error = EINVAL;
3115 		goto bad;
3116 	}
3117 
3118 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3119 
3120 	if (fp->f_offset < 0) {
3121 		VOP_UNLOCK(vp);
3122 		error = EINVAL;
3123 		goto bad;
3124 	}
3125 
3126 	aiov.iov_base = SCARG(uap, buf);
3127 	aiov.iov_len = buflen;
3128 	auio.uio_iov = &aiov;
3129 	auio.uio_iovcnt = 1;
3130 	auio.uio_rw = UIO_READ;
3131 	auio.uio_segflg = UIO_USERSPACE;
3132 	auio.uio_procp = p;
3133 	auio.uio_resid = buflen;
3134 	auio.uio_offset = fp->f_offset;
3135 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3136 	mtx_enter(&fp->f_mtx);
3137 	fp->f_offset = auio.uio_offset;
3138 	mtx_leave(&fp->f_mtx);
3139 	VOP_UNLOCK(vp);
3140 	if (error)
3141 		goto bad;
3142 	*retval = buflen - auio.uio_resid;
3143 bad:
3144 	FRELE(fp, p);
3145 	return (error);
3146 }
3147 
3148 /*
3149  * Set the mode mask for creation of filesystem nodes.
3150  */
3151 int
3152 sys_umask(struct proc *p, void *v, register_t *retval)
3153 {
3154 	struct sys_umask_args /* {
3155 		syscallarg(mode_t) newmask;
3156 	} */ *uap = v;
3157 	struct filedesc *fdp = p->p_fd;
3158 
3159 	fdplock(fdp);
3160 	*retval = fdp->fd_cmask;
3161 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3162 	fdpunlock(fdp);
3163 	return (0);
3164 }
3165 
3166 /*
3167  * Void all references to file by ripping underlying filesystem
3168  * away from vnode.
3169  */
3170 int
3171 sys_revoke(struct proc *p, void *v, register_t *retval)
3172 {
3173 	struct sys_revoke_args /* {
3174 		syscallarg(const char *) path;
3175 	} */ *uap = v;
3176 	struct vnode *vp;
3177 	struct vattr vattr;
3178 	int error;
3179 	struct nameidata nd;
3180 
3181 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3182 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3183 	nd.ni_unveil = UNVEIL_READ;
3184 	if ((error = namei(&nd)) != 0)
3185 		return (error);
3186 	vp = nd.ni_vp;
3187 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3188 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3189 		error = ENOTTY;
3190 		goto out;
3191 	}
3192 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3193 		goto out;
3194 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3195 	    (error = suser(p)))
3196 		goto out;
3197 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3198 		VOP_REVOKE(vp, REVOKEALL);
3199 out:
3200 	vrele(vp);
3201 	return (error);
3202 }
3203 
3204 /*
3205  * Convert a user file descriptor to a kernel file entry.
3206  *
3207  * On return *fpp is FREF:ed.
3208  */
3209 int
3210 getvnode(struct proc *p, int fd, struct file **fpp)
3211 {
3212 	struct file *fp;
3213 	struct vnode *vp;
3214 
3215 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3216 		return (EBADF);
3217 
3218 	if (fp->f_type != DTYPE_VNODE) {
3219 		FRELE(fp, p);
3220 		return (EINVAL);
3221 	}
3222 
3223 	vp = fp->f_data;
3224 	if (vp->v_type == VBAD) {
3225 		FRELE(fp, p);
3226 		return (EBADF);
3227 	}
3228 
3229 	*fpp = fp;
3230 
3231 	return (0);
3232 }
3233 
3234 /*
3235  * Positional read system call.
3236  */
3237 int
3238 sys_pread(struct proc *p, void *v, register_t *retval)
3239 {
3240 	struct sys_pread_args /* {
3241 		syscallarg(int) fd;
3242 		syscallarg(void *) buf;
3243 		syscallarg(size_t) nbyte;
3244 		syscallarg(int) pad;
3245 		syscallarg(off_t) offset;
3246 	} */ *uap = v;
3247 	struct iovec iov;
3248 	struct uio auio;
3249 
3250 	iov.iov_base = SCARG(uap, buf);
3251 	iov.iov_len = SCARG(uap, nbyte);
3252 	if (iov.iov_len > SSIZE_MAX)
3253 		return (EINVAL);
3254 
3255 	auio.uio_iov = &iov;
3256 	auio.uio_iovcnt = 1;
3257 	auio.uio_resid = iov.iov_len;
3258 	auio.uio_offset = SCARG(uap, offset);
3259 
3260 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3261 }
3262 
3263 /*
3264  * Positional scatter read system call.
3265  */
3266 int
3267 sys_preadv(struct proc *p, void *v, register_t *retval)
3268 {
3269 	struct sys_preadv_args /* {
3270 		syscallarg(int) fd;
3271 		syscallarg(const struct iovec *) iovp;
3272 		syscallarg(int) iovcnt;
3273 		syscallarg(int) pad;
3274 		syscallarg(off_t) offset;
3275 	} */ *uap = v;
3276 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3277 	int error, iovcnt = SCARG(uap, iovcnt);
3278 	struct uio auio;
3279 	size_t resid;
3280 
3281 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3282 	if (error)
3283 		goto done;
3284 
3285 	auio.uio_iov = iov;
3286 	auio.uio_iovcnt = iovcnt;
3287 	auio.uio_resid = resid;
3288 	auio.uio_offset = SCARG(uap, offset);
3289 
3290 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3291  done:
3292 	iovec_free(iov, iovcnt);
3293 	return (error);
3294 }
3295 
3296 /*
3297  * Positional write system call.
3298  */
3299 int
3300 sys_pwrite(struct proc *p, void *v, register_t *retval)
3301 {
3302 	struct sys_pwrite_args /* {
3303 		syscallarg(int) fd;
3304 		syscallarg(const void *) buf;
3305 		syscallarg(size_t) nbyte;
3306 		syscallarg(int) pad;
3307 		syscallarg(off_t) offset;
3308 	} */ *uap = v;
3309 	struct iovec iov;
3310 	struct uio auio;
3311 
3312 	iov.iov_base = (void *)SCARG(uap, buf);
3313 	iov.iov_len = SCARG(uap, nbyte);
3314 	if (iov.iov_len > SSIZE_MAX)
3315 		return (EINVAL);
3316 
3317 	auio.uio_iov = &iov;
3318 	auio.uio_iovcnt = 1;
3319 	auio.uio_resid = iov.iov_len;
3320 	auio.uio_offset = SCARG(uap, offset);
3321 
3322 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3323 }
3324 
3325 /*
3326  * Positional gather write system call.
3327  */
3328 int
3329 sys_pwritev(struct proc *p, void *v, register_t *retval)
3330 {
3331 	struct sys_pwritev_args /* {
3332 		syscallarg(int) fd;
3333 		syscallarg(const struct iovec *) iovp;
3334 		syscallarg(int) iovcnt;
3335 		syscallarg(int) pad;
3336 		syscallarg(off_t) offset;
3337 	} */ *uap = v;
3338 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3339 	int error, iovcnt = SCARG(uap, iovcnt);
3340 	struct uio auio;
3341 	size_t resid;
3342 
3343 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3344 	if (error)
3345 		goto done;
3346 
3347 	auio.uio_iov = iov;
3348 	auio.uio_iovcnt = iovcnt;
3349 	auio.uio_resid = resid;
3350 	auio.uio_offset = SCARG(uap, offset);
3351 
3352 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3353  done:
3354 	iovec_free(iov, iovcnt);
3355 	return (error);
3356 }
3357