xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.348 2020/10/02 15:45:22 deraadt Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 
94 /*
95  * Virtual File System System Calls
96  */
97 
98 /*
99  * Mount a file system.
100  */
101 int
102 sys_mount(struct proc *p, void *v, register_t *retval)
103 {
104 	struct sys_mount_args /* {
105 		syscallarg(const char *) type;
106 		syscallarg(const char *) path;
107 		syscallarg(int) flags;
108 		syscallarg(void *) data;
109 	} */ *uap = v;
110 	struct vnode *vp;
111 	struct mount *mp;
112 	int error, mntflag = 0;
113 	char fstypename[MFSNAMELEN];
114 	char fspath[MNAMELEN];
115 	struct nameidata nd;
116 	struct vfsconf *vfsp;
117 	int flags = SCARG(uap, flags);
118 	void *args = NULL;
119 
120 	if ((error = suser(p)))
121 		return (error);
122 
123 	/*
124 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
125 	 */
126 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
127 	if (error)
128 		return(error);
129 
130 	/*
131 	 * Get vnode to be covered
132 	 */
133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
134 	if ((error = namei(&nd)) != 0)
135 		goto fail;
136 	vp = nd.ni_vp;
137 	if (flags & MNT_UPDATE) {
138 		if ((vp->v_flag & VROOT) == 0) {
139 			vput(vp);
140 			error = EINVAL;
141 			goto fail;
142 		}
143 		mp = vp->v_mount;
144 		vfsp = mp->mnt_vfc;
145 
146 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
147 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
148 		if (error) {
149 			vput(vp);
150 			goto fail;
151 		}
152 
153 		mntflag = mp->mnt_flag;
154 		/*
155 		 * We only allow the filesystem to be reloaded if it
156 		 * is currently mounted read-only.
157 		 */
158 		if ((flags & MNT_RELOAD) &&
159 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 			vput(vp);
161 			error = EOPNOTSUPP;	/* Needs translation */
162 			goto fail;
163 		}
164 
165 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
166 			vput(vp);
167 			goto fail;
168 		}
169 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
170 		goto update;
171 	}
172 	/*
173 	 * Do not allow disabling of permission checks unless exec and access to
174 	 * device files is disabled too.
175 	 */
176 	if ((flags & MNT_NOPERM) &&
177 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
178 		vput(vp);
179 		error = EPERM;
180 		goto fail;
181 	}
182 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
183 		vput(vp);
184 		goto fail;
185 	}
186 	if (vp->v_type != VDIR) {
187 		vput(vp);
188 		goto fail;
189 	}
190 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
191 	if (error) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	vfsp = vfs_byname(fstypename);
196 	if (vfsp == NULL) {
197 		vput(vp);
198 		error = EOPNOTSUPP;
199 		goto fail;
200 	}
201 
202 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
203 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
204 	if (error) {
205 		vput(vp);
206 		goto fail;
207 	}
208 
209 	if (vp->v_mountedhere != NULL) {
210 		vput(vp);
211 		error = EBUSY;
212 		goto fail;
213 	}
214 
215 	/*
216 	 * Allocate and initialize the file system.
217 	 */
218 	mp = vfs_mount_alloc(vp, vfsp);
219 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
220 
221 update:
222 	/* Ensure that the parent mountpoint does not get unmounted. */
223 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
224 	if (error) {
225 		if (mp->mnt_flag & MNT_UPDATE) {
226 			mp->mnt_flag = mntflag;
227 			vfs_unbusy(mp);
228 		} else {
229 			vfs_unbusy(mp);
230 			vfs_mount_free(mp);
231 		}
232 		vput(vp);
233 		goto fail;
234 	}
235 
236 	/*
237 	 * Set the mount level flags.
238 	 */
239 	if (flags & MNT_RDONLY)
240 		mp->mnt_flag |= MNT_RDONLY;
241 	else if (mp->mnt_flag & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_WANTRDWR;
243 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
244 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
245 	    MNT_NOPERM | MNT_FORCE);
246 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
247 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
248 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
249 	/*
250 	 * Mount the filesystem.
251 	 */
252 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
253 	if (!error) {
254 		mp->mnt_stat.f_ctime = gettime();
255 	}
256 	if (mp->mnt_flag & MNT_UPDATE) {
257 		vfs_unbusy(vp->v_mount);
258 		vput(vp);
259 		if (mp->mnt_flag & MNT_WANTRDWR)
260 			mp->mnt_flag &= ~MNT_RDONLY;
261 		mp->mnt_flag &= ~MNT_OP_FLAGS;
262 		if (error)
263 			mp->mnt_flag = mntflag;
264 
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
266 			if (mp->mnt_syncer == NULL)
267 				error = vfs_allocate_syncvnode(mp);
268 		} else {
269 			if (mp->mnt_syncer != NULL)
270 				vgone(mp->mnt_syncer);
271 			mp->mnt_syncer = NULL;
272 		}
273 
274 		vfs_unbusy(mp);
275 		goto fail;
276 	}
277 
278 	mp->mnt_flag &= ~MNT_OP_FLAGS;
279 	vp->v_mountedhere = mp;
280 
281 	/*
282 	 * Put the new filesystem on the mount list after root.
283 	 */
284 	cache_purge(vp);
285 	if (!error) {
286 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
287 		checkdirs(vp);
288 		vfs_unbusy(vp->v_mount);
289 		VOP_UNLOCK(vp);
290 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
291 			error = vfs_allocate_syncvnode(mp);
292 		vfs_unbusy(mp);
293 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
294 		if ((error = VFS_START(mp, 0, p)) != 0)
295 			vrele(vp);
296 	} else {
297 		mp->mnt_vnodecovered->v_mountedhere = NULL;
298 		vfs_unbusy(mp);
299 		vfs_mount_free(mp);
300 		vfs_unbusy(vp->v_mount);
301 		vput(vp);
302 	}
303 fail:
304 	if (args)
305 		free(args, M_TEMP, vfsp->vfc_datasize);
306 	return (error);
307 }
308 
309 /*
310  * Scan all active processes to see if any of them have a current
311  * or root directory onto which the new filesystem has just been
312  * mounted. If so, replace them with the new mount point, keeping
313  * track of how many were replaced.  That's the number of references
314  * the old vnode had that we've replaced, so finish by vrele()'ing
315  * it that many times.  This puts off any possible sleeping until
316  * we've finished walking the allprocess list.
317  */
318 void
319 checkdirs(struct vnode *olddp)
320 {
321 	struct filedesc *fdp;
322 	struct vnode *newdp;
323 	struct process *pr;
324 	u_int  free_count = 0;
325 
326 	if (olddp->v_usecount == 1)
327 		return;
328 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
329 		panic("mount: lost mount");
330 	LIST_FOREACH(pr, &allprocess, ps_list) {
331 		fdp = pr->ps_fd;
332 		if (fdp->fd_cdir == olddp) {
333 			free_count++;
334 			vref(newdp);
335 			fdp->fd_cdir = newdp;
336 		}
337 		if (fdp->fd_rdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_rdir = newdp;
341 		}
342 		pr->ps_uvpcwd = NULL;
343 	}
344 	if (rootvnode == olddp) {
345 		free_count++;
346 		vref(newdp);
347 		rootvnode = newdp;
348 	}
349 	while (free_count-- > 0)
350 		vrele(olddp);
351 	vput(newdp);
352 }
353 
354 /*
355  * Unmount a file system.
356  *
357  * Note: unmount takes a path to the vnode mounted on as argument,
358  * not special file (as before).
359  */
360 int
361 sys_unmount(struct proc *p, void *v, register_t *retval)
362 {
363 	struct sys_unmount_args /* {
364 		syscallarg(const char *) path;
365 		syscallarg(int) flags;
366 	} */ *uap = v;
367 	struct vnode *vp;
368 	struct mount *mp;
369 	int error;
370 	struct nameidata nd;
371 
372 	if ((error = suser(p)) != 0)
373 		return (error);
374 
375 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
376 	    SCARG(uap, path), p);
377 	if ((error = namei(&nd)) != 0)
378 		return (error);
379 	vp = nd.ni_vp;
380 	mp = vp->v_mount;
381 
382 	/*
383 	 * Don't allow unmounting the root file system.
384 	 */
385 	if (mp->mnt_flag & MNT_ROOTFS) {
386 		vput(vp);
387 		return (EINVAL);
388 	}
389 
390 	/*
391 	 * Must be the root of the filesystem
392 	 */
393 	if ((vp->v_flag & VROOT) == 0) {
394 		vput(vp);
395 		return (EINVAL);
396 	}
397 	vput(vp);
398 
399 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
400 		return (EBUSY);
401 
402 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
403 }
404 
405 /*
406  * Do the actual file system unmount.
407  */
408 int
409 dounmount(struct mount *mp, int flags, struct proc *p)
410 {
411 	SLIST_HEAD(, mount) mplist;
412 	struct mount *nmp;
413 	int error;
414 
415 	SLIST_INIT(&mplist);
416 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
417 
418 	/*
419 	 * Collect nested mount points. This takes advantage of the mount list
420 	 * being ordered - nested mount points come after their parent.
421 	 */
422 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
423 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
424 			if (mp->mnt_vnodecovered == NULLVP ||
425 			    mp->mnt_vnodecovered->v_mount != nmp)
426 				continue;
427 
428 			if ((flags & MNT_FORCE) == 0) {
429 				error = EBUSY;
430 				goto err;
431 			}
432 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
433 			if (error) {
434 				if ((flags & MNT_DOOMED)) {
435 					/*
436 					 * If the mount point was busy due to
437 					 * being unmounted, it has been removed
438 					 * from the mount list already.
439 					 * Restart the iteration from the last
440 					 * collected busy entry.
441 					 */
442 					mp = SLIST_FIRST(&mplist);
443 					break;
444 				}
445 				goto err;
446 			}
447 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
448 			break;
449 		}
450 	}
451 
452 	/*
453 	 * Nested mount points cannot appear during this loop as mounting
454 	 * requires a read lock for the parent mount point.
455 	 */
456 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
457 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
458 		error = dounmount_leaf(mp, flags, p);
459 		if (error)
460 			goto err;
461 	}
462 	return (0);
463 
464 err:
465 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
466 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
467 		vfs_unbusy(mp);
468 	}
469 	return (error);
470 }
471 
472 int
473 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
474 {
475 	struct vnode *coveredvp;
476 	struct vnode *vp, *nvp;
477 	int error;
478 	int hadsyncer = 0;
479 
480 	mp->mnt_flag &=~ MNT_ASYNC;
481 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
482 	if (mp->mnt_syncer != NULL) {
483 		hadsyncer = 1;
484 		vgone(mp->mnt_syncer);
485 		mp->mnt_syncer = NULL;
486 	}
487 
488 	/*
489 	 * Before calling file system unmount, make sure
490 	 * all unveils to vnodes in here are dropped.
491 	 */
492 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
493 		unveil_removevnode(vp);
494 	}
495 
496 	if (((mp->mnt_flag & MNT_RDONLY) ||
497 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
498 	    (flags & MNT_FORCE))
499 		error = VFS_UNMOUNT(mp, flags, p);
500 
501 	if (error && !(flags & MNT_DOOMED)) {
502 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
503 			(void) vfs_allocate_syncvnode(mp);
504 		vfs_unbusy(mp);
505 		return (error);
506 	}
507 
508 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
509 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
510 		coveredvp->v_mountedhere = NULL;
511 		vrele(coveredvp);
512 	}
513 
514 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
515 		panic("unmount: dangling vnode");
516 
517 	vfs_unbusy(mp);
518 	vfs_mount_free(mp);
519 
520 	return (0);
521 }
522 
523 /*
524  * Sync each mounted filesystem.
525  */
526 int
527 sys_sync(struct proc *p, void *v, register_t *retval)
528 {
529 	struct mount *mp;
530 	int asyncflag;
531 
532 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
533 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
534 			continue;
535 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
536 			asyncflag = mp->mnt_flag & MNT_ASYNC;
537 			mp->mnt_flag &= ~MNT_ASYNC;
538 			uvm_vnp_sync(mp);
539 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
540 			if (asyncflag)
541 				mp->mnt_flag |= MNT_ASYNC;
542 		}
543 		vfs_unbusy(mp);
544 	}
545 
546 	return (0);
547 }
548 
549 /*
550  * Change filesystem quotas.
551  */
552 int
553 sys_quotactl(struct proc *p, void *v, register_t *retval)
554 {
555 	struct sys_quotactl_args /* {
556 		syscallarg(const char *) path;
557 		syscallarg(int) cmd;
558 		syscallarg(int) uid;
559 		syscallarg(char *) arg;
560 	} */ *uap = v;
561 	struct mount *mp;
562 	int error;
563 	struct nameidata nd;
564 
565 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
566 	if ((error = namei(&nd)) != 0)
567 		return (error);
568 	mp = nd.ni_vp->v_mount;
569 	vrele(nd.ni_vp);
570 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
571 	    SCARG(uap, arg), p));
572 }
573 
574 int
575 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
576 {
577 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
578 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
579 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
580 	char *s, *d;
581 	int error;
582 
583 	/* Don't let non-root see filesystem id (for NFS security) */
584 	if (suser(p)) {
585 		fsid_t fsid;
586 
587 		s = (char *)sp;
588 		d = (char *)uaddr;
589 
590 		memset(&fsid, 0, sizeof(fsid));
591 
592 		if ((error = copyout(s, d, co_sz1)) != 0)
593 			return (error);
594 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
595 			return (error);
596 		return (copyout(s + co_off2, d + co_off2, co_sz2));
597 	}
598 
599 	return (copyout(sp, uaddr, sizeof(*sp)));
600 }
601 
602 /*
603  * Get filesystem statistics.
604  */
605 int
606 sys_statfs(struct proc *p, void *v, register_t *retval)
607 {
608 	struct sys_statfs_args /* {
609 		syscallarg(const char *) path;
610 		syscallarg(struct statfs *) buf;
611 	} */ *uap = v;
612 	struct mount *mp;
613 	struct statfs *sp;
614 	int error;
615 	struct nameidata nd;
616 
617 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
618 	    SCARG(uap, path), p);
619 	nd.ni_pledge = PLEDGE_RPATH;
620 	nd.ni_unveil = UNVEIL_READ;
621 	if ((error = namei(&nd)) != 0)
622 		return (error);
623 	mp = nd.ni_vp->v_mount;
624 	sp = &mp->mnt_stat;
625 	vrele(nd.ni_vp);
626 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
627 		return (error);
628 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
629 
630 	return (copyout_statfs(sp, SCARG(uap, buf), p));
631 }
632 
633 /*
634  * Get filesystem statistics.
635  */
636 int
637 sys_fstatfs(struct proc *p, void *v, register_t *retval)
638 {
639 	struct sys_fstatfs_args /* {
640 		syscallarg(int) fd;
641 		syscallarg(struct statfs *) buf;
642 	} */ *uap = v;
643 	struct file *fp;
644 	struct mount *mp;
645 	struct statfs *sp;
646 	int error;
647 
648 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
649 		return (error);
650 	mp = ((struct vnode *)fp->f_data)->v_mount;
651 	if (!mp) {
652 		FRELE(fp, p);
653 		return (ENOENT);
654 	}
655 	sp = &mp->mnt_stat;
656 	error = VFS_STATFS(mp, sp, p);
657 	FRELE(fp, p);
658 	if (error)
659 		return (error);
660 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
661 
662 	return (copyout_statfs(sp, SCARG(uap, buf), p));
663 }
664 
665 /*
666  * Get statistics on all filesystems.
667  */
668 int
669 sys_getfsstat(struct proc *p, void *v, register_t *retval)
670 {
671 	struct sys_getfsstat_args /* {
672 		syscallarg(struct statfs *) buf;
673 		syscallarg(size_t) bufsize;
674 		syscallarg(int) flags;
675 	} */ *uap = v;
676 	struct mount *mp;
677 	struct statfs *sp;
678 	struct statfs *sfsp;
679 	size_t count, maxcount;
680 	int error, flags = SCARG(uap, flags);
681 
682 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
683 	sfsp = SCARG(uap, buf);
684 	count = 0;
685 
686 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
687 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
688 			continue;
689 		if (sfsp && count < maxcount) {
690 			sp = &mp->mnt_stat;
691 
692 			/* Refresh stats unless MNT_NOWAIT is specified */
693 			if (flags != MNT_NOWAIT &&
694 			    flags != MNT_LAZY &&
695 			    (flags == MNT_WAIT ||
696 			    flags == 0) &&
697 			    (error = VFS_STATFS(mp, sp, p))) {
698 				vfs_unbusy(mp);
699 				continue;
700 			}
701 
702 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
703 #if notyet
704 			if (mp->mnt_flag & MNT_SOFTDEP)
705 				sp->f_eflags = STATFS_SOFTUPD;
706 #endif
707 			error = (copyout_statfs(sp, sfsp, p));
708 			if (error) {
709 				vfs_unbusy(mp);
710 				return (error);
711 			}
712 			sfsp++;
713 		}
714 		count++;
715 		vfs_unbusy(mp);
716 	}
717 
718 	if (sfsp && count > maxcount)
719 		*retval = maxcount;
720 	else
721 		*retval = count;
722 
723 	return (0);
724 }
725 
726 /*
727  * Change current working directory to a given file descriptor.
728  */
729 int
730 sys_fchdir(struct proc *p, void *v, register_t *retval)
731 {
732 	struct sys_fchdir_args /* {
733 		syscallarg(int) fd;
734 	} */ *uap = v;
735 	struct filedesc *fdp = p->p_fd;
736 	struct vnode *vp, *tdp, *old_cdir;
737 	struct mount *mp;
738 	struct file *fp;
739 	int error;
740 
741 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
742 		return (EBADF);
743 	vp = fp->f_data;
744 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
745 		FRELE(fp, p);
746 		return (ENOTDIR);
747 	}
748 	vref(vp);
749 	FRELE(fp, p);
750 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
751 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
752 
753 	while (!error && (mp = vp->v_mountedhere) != NULL) {
754 		if (vfs_busy(mp, VB_READ|VB_WAIT))
755 			continue;
756 		error = VFS_ROOT(mp, &tdp);
757 		vfs_unbusy(mp);
758 		if (error)
759 			break;
760 		vput(vp);
761 		vp = tdp;
762 	}
763 	if (error) {
764 		vput(vp);
765 		return (error);
766 	}
767 	VOP_UNLOCK(vp);
768 	old_cdir = fdp->fd_cdir;
769 	fdp->fd_cdir = vp;
770 	vrele(old_cdir);
771 	return (0);
772 }
773 
774 /*
775  * Change current working directory (``.'').
776  */
777 int
778 sys_chdir(struct proc *p, void *v, register_t *retval)
779 {
780 	struct sys_chdir_args /* {
781 		syscallarg(const char *) path;
782 	} */ *uap = v;
783 	struct filedesc *fdp = p->p_fd;
784 	struct vnode *old_cdir;
785 	int error;
786 	struct nameidata nd;
787 
788 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
789 	    SCARG(uap, path), p);
790 	nd.ni_pledge = PLEDGE_RPATH;
791 	nd.ni_unveil = UNVEIL_READ;
792 	if ((error = change_dir(&nd, p)) != 0)
793 		return (error);
794 	p->p_p->ps_uvpcwd = nd.ni_unveil_match;
795 	old_cdir = fdp->fd_cdir;
796 	fdp->fd_cdir = nd.ni_vp;
797 	vrele(old_cdir);
798 	return (0);
799 }
800 
801 /*
802  * Change notion of root (``/'') directory.
803  */
804 int
805 sys_chroot(struct proc *p, void *v, register_t *retval)
806 {
807 	struct sys_chroot_args /* {
808 		syscallarg(const char *) path;
809 	} */ *uap = v;
810 	struct filedesc *fdp = p->p_fd;
811 	struct vnode *old_cdir, *old_rdir;
812 	int error;
813 	struct nameidata nd;
814 
815 	if ((error = suser(p)) != 0)
816 		return (error);
817 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
818 	    SCARG(uap, path), p);
819 	if ((error = change_dir(&nd, p)) != 0)
820 		return (error);
821 	if (fdp->fd_rdir != NULL) {
822 		/*
823 		 * A chroot() done inside a changed root environment does
824 		 * an automatic chdir to avoid the out-of-tree experience.
825 		 */
826 		vref(nd.ni_vp);
827 		old_rdir = fdp->fd_rdir;
828 		old_cdir = fdp->fd_cdir;
829 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
830 		vrele(old_rdir);
831 		vrele(old_cdir);
832 	} else
833 		fdp->fd_rdir = nd.ni_vp;
834 	return (0);
835 }
836 
837 /*
838  * Common routine for chroot and chdir.
839  */
840 static int
841 change_dir(struct nameidata *ndp, struct proc *p)
842 {
843 	struct vnode *vp;
844 	int error;
845 
846 	if ((error = namei(ndp)) != 0)
847 		return (error);
848 	vp = ndp->ni_vp;
849 	if (vp->v_type != VDIR)
850 		error = ENOTDIR;
851 	else
852 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
853 	if (error)
854 		vput(vp);
855 	else
856 		VOP_UNLOCK(vp);
857 	return (error);
858 }
859 
860 int
861 sys___realpath(struct proc *p, void *v, register_t *retval)
862 {
863 	struct sys___realpath_args /* {
864 		syscallarg(const char *) pathname;
865 		syscallarg(char *) resolved;
866 	} */ *uap = v;
867 	char *pathname, *c;
868 	char *rpbuf;
869 	struct nameidata nd;
870 	size_t pathlen;
871 	int error = 0;
872 
873 	if (SCARG(uap, pathname) == NULL)
874 		return (EINVAL);
875 
876 	pathname = pool_get(&namei_pool, PR_WAITOK);
877 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
878 
879 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
880 	    &pathlen)))
881 		goto end;
882 
883 	if (pathlen == 1) { /* empty string "" */
884 		error = ENOENT;
885 		goto end;
886 	}
887 	if (pathlen < 2) {
888 		error = EINVAL;
889 		goto end;
890 	}
891 
892 	/* Get cwd for relative path if needed, prepend to rpbuf */
893 	rpbuf[0] = '\0';
894 	if (pathname[0] != '/') {
895 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
896 		char *cwdbuf, *bp;
897 
898 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
899 
900 		/* vfs_getcwd_common fills this in backwards */
901 		bp = &cwdbuf[cwdlen - 1];
902 		*bp = '\0';
903 
904 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
905 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
906 
907 		if (error) {
908 			free(cwdbuf, M_TEMP, cwdlen);
909 			goto end;
910 		}
911 
912 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
913 			free(cwdbuf, M_TEMP, cwdlen);
914 			error = ENAMETOOLONG;
915 			goto end;
916 		}
917 
918 		free(cwdbuf, M_TEMP, cwdlen);
919 	}
920 
921 	/* find root "/" or "//" */
922 	for (c = pathname; *c != '\0'; c++) {
923 		if (*c != '/')
924 			break;
925 	}
926 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME | REALPATH,
927 	    UIO_SYSSPACE, pathname, p);
928 
929 	nd.ni_cnd.cn_rpbuf = rpbuf;
930 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
931 
932 	nd.ni_pledge = PLEDGE_RPATH;
933 	nd.ni_unveil = UNVEIL_READ;
934 	if ((error = namei(&nd)) != 0)
935 		goto end;
936 
937 	/* release lock and reference from namei */
938 	if (nd.ni_vp) {
939 		VOP_UNLOCK(nd.ni_vp);
940 		vrele(nd.ni_vp);
941 	}
942 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
943 	    MAXPATHLEN, NULL);
944 
945 #ifdef KTRACE
946 	if (KTRPOINT(p, KTR_NAMEI))
947 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
948 #endif
949 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
950 end:
951 	pool_put(&namei_pool, rpbuf);
952 	pool_put(&namei_pool, pathname);
953 	return (error);
954 }
955 
956 int
957 sys_unveil(struct proc *p, void *v, register_t *retval)
958 {
959 	struct sys_unveil_args /* {
960 		syscallarg(const char *) path;
961 		syscallarg(const char *) permissions;
962 	} */ *uap = v;
963 	struct process *pr = p->p_p;
964 	char *pathname, *c;
965 	struct nameidata nd;
966 	size_t pathlen;
967 	char permissions[5];
968 	int error, allow;
969 
970 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
971 		pr->ps_uvdone = 1;
972 		return (0);
973 	}
974 
975 	if (pr->ps_uvdone != 0)
976 		return EPERM;
977 
978 	error = copyinstr(SCARG(uap, permissions), permissions,
979 	    sizeof(permissions), NULL);
980 	if (error)
981 		return (error);
982 	pathname = pool_get(&namei_pool, PR_WAITOK);
983 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
984 	if (error)
985 		goto end;
986 
987 #ifdef KTRACE
988 	if (KTRPOINT(p, KTR_STRUCT))
989 		ktrstruct(p, "unveil", permissions, strlen(permissions));
990 #endif
991 	if (pathlen < 2) {
992 		error = EINVAL;
993 		goto end;
994 	}
995 
996 	/* find root "/" or "//" */
997 	for (c = pathname; *c != '\0'; c++) {
998 		if (*c != '/')
999 			break;
1000 	}
1001 	if (*c == '\0')
1002 		/* root directory */
1003 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
1004 		    UIO_SYSSPACE, pathname, p);
1005 	else
1006 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
1007 		    UIO_SYSSPACE, pathname, p);
1008 
1009 	nd.ni_pledge = PLEDGE_UNVEIL;
1010 	if ((error = namei(&nd)) != 0)
1011 		goto ndfree;
1012 
1013 	/*
1014 	 * XXX Any access to the file or directory will allow us to
1015 	 * pledge path it
1016 	 */
1017 	allow = ((nd.ni_vp &&
1018 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1019 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1020 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1021 	    (nd.ni_dvp &&
1022 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1023 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1024 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1025 
1026 	/* release lock from namei, but keep ref */
1027 	if (nd.ni_vp)
1028 		VOP_UNLOCK(nd.ni_vp);
1029 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1030 		VOP_UNLOCK(nd.ni_dvp);
1031 
1032 	if (allow)
1033 		error = unveil_add(p, &nd, permissions);
1034 	else
1035 		error = EPERM;
1036 
1037 	/* release vref from namei, but not vref from unveil_add */
1038 	if (nd.ni_vp)
1039 		vrele(nd.ni_vp);
1040 	if (nd.ni_dvp)
1041 		vrele(nd.ni_dvp);
1042 
1043 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1044 ndfree:
1045 	unveil_free_traversed_vnodes(&nd);
1046 end:
1047 	pool_put(&namei_pool, pathname);
1048 
1049 	return (error);
1050 }
1051 
1052 /*
1053  * Check permissions, allocate an open file structure,
1054  * and call the device open routine if any.
1055  */
1056 int
1057 sys_open(struct proc *p, void *v, register_t *retval)
1058 {
1059 	struct sys_open_args /* {
1060 		syscallarg(const char *) path;
1061 		syscallarg(int) flags;
1062 		syscallarg(mode_t) mode;
1063 	} */ *uap = v;
1064 
1065 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1066 	    SCARG(uap, mode), retval));
1067 }
1068 
1069 int
1070 sys_openat(struct proc *p, void *v, register_t *retval)
1071 {
1072 	struct sys_openat_args /* {
1073 		syscallarg(int) fd;
1074 		syscallarg(const char *) path;
1075 		syscallarg(int) flags;
1076 		syscallarg(mode_t) mode;
1077 	} */ *uap = v;
1078 
1079 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1080 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1081 }
1082 
1083 int
1084 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1085     register_t *retval)
1086 {
1087 	struct filedesc *fdp = p->p_fd;
1088 	struct file *fp;
1089 	struct vnode *vp;
1090 	struct vattr vattr;
1091 	int flags, cloexec, cmode;
1092 	int type, indx, error, localtrunc = 0;
1093 	struct flock lf;
1094 	struct nameidata nd;
1095 	uint64_t ni_pledge = 0;
1096 	u_char ni_unveil = 0;
1097 
1098 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1099 		error = pledge_flock(p);
1100 		if (error != 0)
1101 			return (error);
1102 	}
1103 
1104 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1105 
1106 	fdplock(fdp);
1107 	if ((error = falloc(p, &fp, &indx)) != 0)
1108 		goto out;
1109 	fdpunlock(fdp);
1110 
1111 	flags = FFLAGS(oflags);
1112 	if (flags & FREAD) {
1113 		ni_pledge |= PLEDGE_RPATH;
1114 		ni_unveil |= UNVEIL_READ;
1115 	}
1116 	if (flags & FWRITE) {
1117 		ni_pledge |= PLEDGE_WPATH;
1118 		ni_unveil |= UNVEIL_WRITE;
1119 	}
1120 	if (oflags & O_CREAT) {
1121 		ni_pledge |= PLEDGE_CPATH;
1122 		ni_unveil |= UNVEIL_CREATE;
1123 	}
1124 
1125 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1126 	if ((p->p_p->ps_flags & PS_PLEDGE))
1127 		cmode &= ACCESSPERMS;
1128 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1129 	nd.ni_pledge = ni_pledge;
1130 	nd.ni_unveil = ni_unveil;
1131 	p->p_dupfd = -1;			/* XXX check for fdopen */
1132 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1133 		localtrunc = 1;
1134 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1135 	}
1136 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1137 		fdplock(fdp);
1138 		if (error == ENODEV &&
1139 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1140 		    (error =
1141 			dupfdopen(p, indx, flags)) == 0) {
1142 			closef(fp, p);
1143 			*retval = indx;
1144 			goto out;
1145 		}
1146 		if (error == ERESTART)
1147 			error = EINTR;
1148 		fdremove(fdp, indx);
1149 		closef(fp, p);
1150 		goto out;
1151 	}
1152 	p->p_dupfd = 0;
1153 	vp = nd.ni_vp;
1154 	fp->f_flag = flags & FMASK;
1155 	fp->f_type = DTYPE_VNODE;
1156 	fp->f_ops = &vnops;
1157 	fp->f_data = vp;
1158 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1159 		lf.l_whence = SEEK_SET;
1160 		lf.l_start = 0;
1161 		lf.l_len = 0;
1162 		if (flags & O_EXLOCK)
1163 			lf.l_type = F_WRLCK;
1164 		else
1165 			lf.l_type = F_RDLCK;
1166 		type = F_FLOCK;
1167 		if ((flags & FNONBLOCK) == 0)
1168 			type |= F_WAIT;
1169 		VOP_UNLOCK(vp);
1170 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1171 		if (error) {
1172 			fdplock(fdp);
1173 			/* closef will vn_close the file for us. */
1174 			fdremove(fdp, indx);
1175 			closef(fp, p);
1176 			goto out;
1177 		}
1178 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1179 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1180 	}
1181 	if (localtrunc) {
1182 		if ((fp->f_flag & FWRITE) == 0)
1183 			error = EACCES;
1184 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1185 			error = EROFS;
1186 		else if (vp->v_type == VDIR)
1187 			error = EISDIR;
1188 		else if ((error = vn_writechk(vp)) == 0) {
1189 			VATTR_NULL(&vattr);
1190 			vattr.va_size = 0;
1191 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1192 		}
1193 		if (error) {
1194 			VOP_UNLOCK(vp);
1195 			fdplock(fdp);
1196 			/* closef will close the file for us. */
1197 			fdremove(fdp, indx);
1198 			closef(fp, p);
1199 			goto out;
1200 		}
1201 	}
1202 	VOP_UNLOCK(vp);
1203 	*retval = indx;
1204 	fdplock(fdp);
1205 	fdinsert(fdp, indx, cloexec, fp);
1206 	FRELE(fp, p);
1207 out:
1208 	fdpunlock(fdp);
1209 	return (error);
1210 }
1211 
1212 /*
1213  * Open a new created file (in /tmp) suitable for mmaping.
1214  */
1215 int
1216 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1217 {
1218 	struct sys___tmpfd_args /* {
1219 		syscallarg(int) flags;
1220 	} */ *uap = v;
1221 	struct filedesc *fdp = p->p_fd;
1222 	struct file *fp;
1223 	struct vnode *vp;
1224 	int oflags = SCARG(uap, flags);
1225 	int flags, cloexec, cmode;
1226 	int indx, error;
1227 	unsigned int i;
1228 	struct nameidata nd;
1229 	char path[64];
1230 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1231 
1232 	/* most flags are hardwired */
1233 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1234 
1235 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1236 
1237 	fdplock(fdp);
1238 	if ((error = falloc(p, &fp, &indx)) != 0)
1239 		goto out;
1240 	fdpunlock(fdp);
1241 
1242 	flags = FFLAGS(oflags);
1243 
1244 	arc4random_buf(path, sizeof(path));
1245 	memcpy(path, "/tmp/", 5);
1246 	for (i = 5; i < sizeof(path) - 1; i++)
1247 		path[i] = letters[(unsigned char)path[i] & 63];
1248 	path[sizeof(path)-1] = 0;
1249 
1250 	cmode = 0600;
1251 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1252 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1253 		fdplock(fdp);
1254 		if (error == ERESTART)
1255 			error = EINTR;
1256 		fdremove(fdp, indx);
1257 		closef(fp, p);
1258 		goto out;
1259 	}
1260 	vp = nd.ni_vp;
1261 	fp->f_flag = flags & FMASK;
1262 	fp->f_type = DTYPE_VNODE;
1263 	fp->f_ops = &vnops;
1264 	fp->f_data = vp;
1265 	VOP_UNLOCK(vp);
1266 	*retval = indx;
1267 	fdplock(fdp);
1268 	fdinsert(fdp, indx, cloexec, fp);
1269 	FRELE(fp, p);
1270 
1271 	/* unlink it */
1272 	/* XXX
1273 	 * there is a wee race here, although it is mostly inconsequential.
1274 	 * perhaps someday we can create a file like object without a name...
1275 	 */
1276 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1277 	    AT_FDCWD, path, p);
1278 	if ((error = namei(&nd)) != 0) {
1279 		printf("can't unlink temp file! %d\n", error);
1280 		error = 0;
1281 	} else {
1282 		vp = nd.ni_vp;
1283 		uvm_vnp_uncache(vp);
1284 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1285 		if (error) {
1286 			printf("error removing vop: %d\n", error);
1287 			error = 0;
1288 		}
1289 	}
1290 
1291 out:
1292 	fdpunlock(fdp);
1293 	return (error);
1294 }
1295 
1296 /*
1297  * Get file handle system call
1298  */
1299 int
1300 sys_getfh(struct proc *p, void *v, register_t *retval)
1301 {
1302 	struct sys_getfh_args /* {
1303 		syscallarg(const char *) fname;
1304 		syscallarg(fhandle_t *) fhp;
1305 	} */ *uap = v;
1306 	struct vnode *vp;
1307 	fhandle_t fh;
1308 	int error;
1309 	struct nameidata nd;
1310 
1311 	/*
1312 	 * Must be super user
1313 	 */
1314 	error = suser(p);
1315 	if (error)
1316 		return (error);
1317 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1318 	    SCARG(uap, fname), p);
1319 	error = namei(&nd);
1320 	if (error)
1321 		return (error);
1322 	vp = nd.ni_vp;
1323 	memset(&fh, 0, sizeof(fh));
1324 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1325 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1326 	vput(vp);
1327 	if (error)
1328 		return (error);
1329 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1330 	return (error);
1331 }
1332 
1333 /*
1334  * Open a file given a file handle.
1335  *
1336  * Check permissions, allocate an open file structure,
1337  * and call the device open routine if any.
1338  */
1339 int
1340 sys_fhopen(struct proc *p, void *v, register_t *retval)
1341 {
1342 	struct sys_fhopen_args /* {
1343 		syscallarg(const fhandle_t *) fhp;
1344 		syscallarg(int) flags;
1345 	} */ *uap = v;
1346 	struct filedesc *fdp = p->p_fd;
1347 	struct file *fp;
1348 	struct vnode *vp = NULL;
1349 	struct mount *mp;
1350 	struct ucred *cred = p->p_ucred;
1351 	int flags, cloexec;
1352 	int type, indx, error=0;
1353 	struct flock lf;
1354 	struct vattr va;
1355 	fhandle_t fh;
1356 
1357 	/*
1358 	 * Must be super user
1359 	 */
1360 	if ((error = suser(p)))
1361 		return (error);
1362 
1363 	flags = FFLAGS(SCARG(uap, flags));
1364 	if ((flags & (FREAD | FWRITE)) == 0)
1365 		return (EINVAL);
1366 	if ((flags & O_CREAT))
1367 		return (EINVAL);
1368 
1369 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1370 
1371 	fdplock(fdp);
1372 	if ((error = falloc(p, &fp, &indx)) != 0) {
1373 		fp = NULL;
1374 		goto bad;
1375 	}
1376 
1377 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1378 		goto bad;
1379 
1380 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1381 		error = ESTALE;
1382 		goto bad;
1383 	}
1384 
1385 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1386 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1387 		goto bad;
1388 	}
1389 
1390 	/* Now do an effective vn_open */
1391 
1392 	if (vp->v_type == VSOCK) {
1393 		error = EOPNOTSUPP;
1394 		goto bad;
1395 	}
1396 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1397 		error = ENOTDIR;
1398 		goto bad;
1399 	}
1400 	if (flags & FREAD) {
1401 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1402 			goto bad;
1403 	}
1404 	if (flags & (FWRITE | O_TRUNC)) {
1405 		if (vp->v_type == VDIR) {
1406 			error = EISDIR;
1407 			goto bad;
1408 		}
1409 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1410 		    (error = vn_writechk(vp)) != 0)
1411 			goto bad;
1412 	}
1413 	if (flags & O_TRUNC) {
1414 		VATTR_NULL(&va);
1415 		va.va_size = 0;
1416 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1417 			goto bad;
1418 	}
1419 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1420 		goto bad;
1421 	if (flags & FWRITE)
1422 		vp->v_writecount++;
1423 
1424 	/* done with modified vn_open, now finish what sys_open does. */
1425 
1426 	fp->f_flag = flags & FMASK;
1427 	fp->f_type = DTYPE_VNODE;
1428 	fp->f_ops = &vnops;
1429 	fp->f_data = vp;
1430 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1431 		lf.l_whence = SEEK_SET;
1432 		lf.l_start = 0;
1433 		lf.l_len = 0;
1434 		if (flags & O_EXLOCK)
1435 			lf.l_type = F_WRLCK;
1436 		else
1437 			lf.l_type = F_RDLCK;
1438 		type = F_FLOCK;
1439 		if ((flags & FNONBLOCK) == 0)
1440 			type |= F_WAIT;
1441 		VOP_UNLOCK(vp);
1442 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1443 		if (error) {
1444 			vp = NULL;	/* closef will vn_close the file */
1445 			goto bad;
1446 		}
1447 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1448 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1449 	}
1450 	VOP_UNLOCK(vp);
1451 	*retval = indx;
1452 	fdinsert(fdp, indx, cloexec, fp);
1453 	fdpunlock(fdp);
1454 	FRELE(fp, p);
1455 	return (0);
1456 
1457 bad:
1458 	if (fp) {
1459 		fdremove(fdp, indx);
1460 		closef(fp, p);
1461 		if (vp != NULL)
1462 			vput(vp);
1463 	}
1464 	fdpunlock(fdp);
1465 	return (error);
1466 }
1467 
1468 int
1469 sys_fhstat(struct proc *p, void *v, register_t *retval)
1470 {
1471 	struct sys_fhstat_args /* {
1472 		syscallarg(const fhandle_t *) fhp;
1473 		syscallarg(struct stat *) sb;
1474 	} */ *uap = v;
1475 	struct stat sb;
1476 	int error;
1477 	fhandle_t fh;
1478 	struct mount *mp;
1479 	struct vnode *vp;
1480 
1481 	/*
1482 	 * Must be super user
1483 	 */
1484 	if ((error = suser(p)))
1485 		return (error);
1486 
1487 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1488 		return (error);
1489 
1490 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1491 		return (ESTALE);
1492 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1493 		return (error);
1494 	error = vn_stat(vp, &sb, p);
1495 	vput(vp);
1496 	if (error)
1497 		return (error);
1498 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1499 	return (error);
1500 }
1501 
1502 int
1503 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1504 {
1505 	struct sys_fhstatfs_args /* {
1506 		syscallarg(const fhandle_t *) fhp;
1507 		syscallarg(struct statfs *) buf;
1508 	} */ *uap = v;
1509 	struct statfs *sp;
1510 	fhandle_t fh;
1511 	struct mount *mp;
1512 	struct vnode *vp;
1513 	int error;
1514 
1515 	/*
1516 	 * Must be super user
1517 	 */
1518 	if ((error = suser(p)))
1519 		return (error);
1520 
1521 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1522 		return (error);
1523 
1524 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1525 		return (ESTALE);
1526 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1527 		return (error);
1528 	mp = vp->v_mount;
1529 	sp = &mp->mnt_stat;
1530 	vput(vp);
1531 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1532 		return (error);
1533 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1534 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1535 }
1536 
1537 /*
1538  * Create a special file or named pipe.
1539  */
1540 int
1541 sys_mknod(struct proc *p, void *v, register_t *retval)
1542 {
1543 	struct sys_mknod_args /* {
1544 		syscallarg(const char *) path;
1545 		syscallarg(mode_t) mode;
1546 		syscallarg(int) dev;
1547 	} */ *uap = v;
1548 
1549 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1550 	    SCARG(uap, dev)));
1551 }
1552 
1553 int
1554 sys_mknodat(struct proc *p, void *v, register_t *retval)
1555 {
1556 	struct sys_mknodat_args /* {
1557 		syscallarg(int) fd;
1558 		syscallarg(const char *) path;
1559 		syscallarg(mode_t) mode;
1560 		syscallarg(dev_t) dev;
1561 	} */ *uap = v;
1562 
1563 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1564 	    SCARG(uap, mode), SCARG(uap, dev)));
1565 }
1566 
1567 int
1568 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1569 {
1570 	struct vnode *vp;
1571 	struct vattr vattr;
1572 	int error;
1573 	struct nameidata nd;
1574 
1575 	if (dev == VNOVAL)
1576 		return (EINVAL);
1577 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1578 	nd.ni_pledge = PLEDGE_DPATH;
1579 	nd.ni_unveil = UNVEIL_CREATE;
1580 	if ((error = namei(&nd)) != 0)
1581 		return (error);
1582 	vp = nd.ni_vp;
1583 	if (!S_ISFIFO(mode) || dev != 0) {
1584 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1585 			goto out;
1586 		if (p->p_fd->fd_rdir) {
1587 			error = EINVAL;
1588 			goto out;
1589 		}
1590 	}
1591 	if (vp != NULL)
1592 		error = EEXIST;
1593 	else {
1594 		VATTR_NULL(&vattr);
1595 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1596 		if ((p->p_p->ps_flags & PS_PLEDGE))
1597 			vattr.va_mode &= ACCESSPERMS;
1598 		vattr.va_rdev = dev;
1599 
1600 		switch (mode & S_IFMT) {
1601 		case S_IFMT:	/* used by badsect to flag bad sectors */
1602 			vattr.va_type = VBAD;
1603 			break;
1604 		case S_IFCHR:
1605 			vattr.va_type = VCHR;
1606 			break;
1607 		case S_IFBLK:
1608 			vattr.va_type = VBLK;
1609 			break;
1610 		case S_IFIFO:
1611 #ifndef FIFO
1612 			error = EOPNOTSUPP;
1613 			break;
1614 #else
1615 			if (dev == 0) {
1616 				vattr.va_type = VFIFO;
1617 				break;
1618 			}
1619 			/* FALLTHROUGH */
1620 #endif /* FIFO */
1621 		default:
1622 			error = EINVAL;
1623 			break;
1624 		}
1625 	}
1626 out:
1627 	if (!error) {
1628 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1629 		vput(nd.ni_dvp);
1630 	} else {
1631 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1632 		if (nd.ni_dvp == vp)
1633 			vrele(nd.ni_dvp);
1634 		else
1635 			vput(nd.ni_dvp);
1636 		if (vp)
1637 			vrele(vp);
1638 	}
1639 	return (error);
1640 }
1641 
1642 /*
1643  * Create a named pipe.
1644  */
1645 int
1646 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1647 {
1648 	struct sys_mkfifo_args /* {
1649 		syscallarg(const char *) path;
1650 		syscallarg(mode_t) mode;
1651 	} */ *uap = v;
1652 
1653 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1654 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1655 }
1656 
1657 int
1658 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1659 {
1660 	struct sys_mkfifoat_args /* {
1661 		syscallarg(int) fd;
1662 		syscallarg(const char *) path;
1663 		syscallarg(mode_t) mode;
1664 	} */ *uap = v;
1665 
1666 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1667 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1668 }
1669 
1670 /*
1671  * Make a hard file link.
1672  */
1673 int
1674 sys_link(struct proc *p, void *v, register_t *retval)
1675 {
1676 	struct sys_link_args /* {
1677 		syscallarg(const char *) path;
1678 		syscallarg(const char *) link;
1679 	} */ *uap = v;
1680 
1681 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1682 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1683 }
1684 
1685 int
1686 sys_linkat(struct proc *p, void *v, register_t *retval)
1687 {
1688 	struct sys_linkat_args /* {
1689 		syscallarg(int) fd1;
1690 		syscallarg(const char *) path1;
1691 		syscallarg(int) fd2;
1692 		syscallarg(const char *) path2;
1693 		syscallarg(int) flag;
1694 	} */ *uap = v;
1695 
1696 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1697 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1698 }
1699 
1700 int
1701 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1702     const char *path2, int flag)
1703 {
1704 	struct vnode *vp;
1705 	struct nameidata nd;
1706 	int error, follow;
1707 	int flags;
1708 
1709 	if (flag & ~AT_SYMLINK_FOLLOW)
1710 		return (EINVAL);
1711 
1712 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1713 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1714 	nd.ni_pledge = PLEDGE_RPATH;
1715 	nd.ni_unveil = UNVEIL_READ;
1716 	if ((error = namei(&nd)) != 0)
1717 		return (error);
1718 	vp = nd.ni_vp;
1719 
1720 	flags = LOCKPARENT;
1721 	if (vp->v_type == VDIR) {
1722 		flags |= STRIPSLASHES;
1723 	}
1724 
1725 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1726 	nd.ni_pledge = PLEDGE_CPATH;
1727 	nd.ni_unveil = UNVEIL_CREATE;
1728 	if ((error = namei(&nd)) != 0)
1729 		goto out;
1730 	if (nd.ni_vp) {
1731 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1732 		if (nd.ni_dvp == nd.ni_vp)
1733 			vrele(nd.ni_dvp);
1734 		else
1735 			vput(nd.ni_dvp);
1736 		vrele(nd.ni_vp);
1737 		error = EEXIST;
1738 		goto out;
1739 	}
1740 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1741 out:
1742 	vrele(vp);
1743 	return (error);
1744 }
1745 
1746 /*
1747  * Make a symbolic link.
1748  */
1749 int
1750 sys_symlink(struct proc *p, void *v, register_t *retval)
1751 {
1752 	struct sys_symlink_args /* {
1753 		syscallarg(const char *) path;
1754 		syscallarg(const char *) link;
1755 	} */ *uap = v;
1756 
1757 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1758 }
1759 
1760 int
1761 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1762 {
1763 	struct sys_symlinkat_args /* {
1764 		syscallarg(const char *) path;
1765 		syscallarg(int) fd;
1766 		syscallarg(const char *) link;
1767 	} */ *uap = v;
1768 
1769 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1770 	    SCARG(uap, link)));
1771 }
1772 
1773 int
1774 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1775 {
1776 	struct vattr vattr;
1777 	char *path;
1778 	int error;
1779 	struct nameidata nd;
1780 
1781 	path = pool_get(&namei_pool, PR_WAITOK);
1782 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1783 	if (error)
1784 		goto out;
1785 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1786 	nd.ni_pledge = PLEDGE_CPATH;
1787 	nd.ni_unveil = UNVEIL_CREATE;
1788 	if ((error = namei(&nd)) != 0)
1789 		goto out;
1790 	if (nd.ni_vp) {
1791 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1792 		if (nd.ni_dvp == nd.ni_vp)
1793 			vrele(nd.ni_dvp);
1794 		else
1795 			vput(nd.ni_dvp);
1796 		vrele(nd.ni_vp);
1797 		error = EEXIST;
1798 		goto out;
1799 	}
1800 	VATTR_NULL(&vattr);
1801 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1802 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1803 out:
1804 	pool_put(&namei_pool, path);
1805 	return (error);
1806 }
1807 
1808 /*
1809  * Delete a name from the filesystem.
1810  */
1811 int
1812 sys_unlink(struct proc *p, void *v, register_t *retval)
1813 {
1814 	struct sys_unlink_args /* {
1815 		syscallarg(const char *) path;
1816 	} */ *uap = v;
1817 
1818 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1819 }
1820 
1821 int
1822 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1823 {
1824 	struct sys_unlinkat_args /* {
1825 		syscallarg(int) fd;
1826 		syscallarg(const char *) path;
1827 		syscallarg(int) flag;
1828 	} */ *uap = v;
1829 
1830 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1831 	    SCARG(uap, flag)));
1832 }
1833 
1834 int
1835 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1836 {
1837 	struct vnode *vp;
1838 	int error;
1839 	struct nameidata nd;
1840 
1841 	if (flag & ~AT_REMOVEDIR)
1842 		return (EINVAL);
1843 
1844 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1845 	    fd, path, p);
1846 	nd.ni_pledge = PLEDGE_CPATH;
1847 	nd.ni_unveil = UNVEIL_CREATE;
1848 	if ((error = namei(&nd)) != 0)
1849 		return (error);
1850 	vp = nd.ni_vp;
1851 
1852 	if (flag & AT_REMOVEDIR) {
1853 		if (vp->v_type != VDIR) {
1854 			error = ENOTDIR;
1855 			goto out;
1856 		}
1857 		/*
1858 		 * No rmdir "." please.
1859 		 */
1860 		if (nd.ni_dvp == vp) {
1861 			error = EINVAL;
1862 			goto out;
1863 		}
1864 		/*
1865 		 * A mounted on directory cannot be deleted.
1866 		 */
1867 		if (vp->v_mountedhere != NULL) {
1868 			error = EBUSY;
1869 			goto out;
1870 		}
1871 	}
1872 
1873 	/*
1874 	 * The root of a mounted filesystem cannot be deleted.
1875 	 */
1876 	if (vp->v_flag & VROOT)
1877 		error = EBUSY;
1878 out:
1879 	if (!error) {
1880 		if (flag & AT_REMOVEDIR) {
1881 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1882 		} else {
1883 			(void)uvm_vnp_uncache(vp);
1884 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1885 		}
1886 	} else {
1887 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1888 		if (nd.ni_dvp == vp)
1889 			vrele(nd.ni_dvp);
1890 		else
1891 			vput(nd.ni_dvp);
1892 		vput(vp);
1893 	}
1894 	return (error);
1895 }
1896 
1897 /*
1898  * Reposition read/write file offset.
1899  */
1900 int
1901 sys_lseek(struct proc *p, void *v, register_t *retval)
1902 {
1903 	struct sys_lseek_args /* {
1904 		syscallarg(int) fd;
1905 		syscallarg(int) pad;
1906 		syscallarg(off_t) offset;
1907 		syscallarg(int) whence;
1908 	} */ *uap = v;
1909 	struct filedesc *fdp = p->p_fd;
1910 	struct file *fp;
1911 	off_t offset;
1912 	int error;
1913 
1914 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1915 		return (EBADF);
1916 	if (fp->f_ops->fo_seek == NULL) {
1917 		error = ESPIPE;
1918 		goto bad;
1919 	}
1920 	offset = SCARG(uap, offset);
1921 
1922 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1923 	if (error)
1924 		goto bad;
1925 
1926 	*(off_t *)retval = offset;
1927 	mtx_enter(&fp->f_mtx);
1928 	fp->f_seek++;
1929 	mtx_leave(&fp->f_mtx);
1930 	error = 0;
1931  bad:
1932 	FRELE(fp, p);
1933 	return (error);
1934 }
1935 
1936 /*
1937  * Check access permissions.
1938  */
1939 int
1940 sys_access(struct proc *p, void *v, register_t *retval)
1941 {
1942 	struct sys_access_args /* {
1943 		syscallarg(const char *) path;
1944 		syscallarg(int) amode;
1945 	} */ *uap = v;
1946 
1947 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1948 	    SCARG(uap, amode), 0));
1949 }
1950 
1951 int
1952 sys_faccessat(struct proc *p, void *v, register_t *retval)
1953 {
1954 	struct sys_faccessat_args /* {
1955 		syscallarg(int) fd;
1956 		syscallarg(const char *) path;
1957 		syscallarg(int) amode;
1958 		syscallarg(int) flag;
1959 	} */ *uap = v;
1960 
1961 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1962 	    SCARG(uap, amode), SCARG(uap, flag)));
1963 }
1964 
1965 int
1966 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1967 {
1968 	struct vnode *vp;
1969 	struct ucred *newcred, *oldcred;
1970 	struct nameidata nd;
1971 	int error;
1972 
1973 	if (amode & ~(R_OK | W_OK | X_OK))
1974 		return (EINVAL);
1975 	if (flag & ~AT_EACCESS)
1976 		return (EINVAL);
1977 
1978 	newcred = NULL;
1979 	oldcred = p->p_ucred;
1980 
1981 	/*
1982 	 * If access as real ids was requested and they really differ,
1983 	 * give the thread new creds with them reset
1984 	 */
1985 	if ((flag & AT_EACCESS) == 0 &&
1986 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1987 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1988 		p->p_ucred = newcred = crdup(oldcred);
1989 		newcred->cr_uid = newcred->cr_ruid;
1990 		newcred->cr_gid = newcred->cr_rgid;
1991 	}
1992 
1993 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1994 	nd.ni_pledge = PLEDGE_RPATH;
1995 	nd.ni_unveil = UNVEIL_READ;
1996 	if ((error = namei(&nd)) != 0)
1997 		goto out;
1998 	vp = nd.ni_vp;
1999 
2000 	/* Flags == 0 means only check for existence. */
2001 	if (amode) {
2002 		int vflags = 0;
2003 
2004 		if (amode & R_OK)
2005 			vflags |= VREAD;
2006 		if (amode & W_OK)
2007 			vflags |= VWRITE;
2008 		if (amode & X_OK)
2009 			vflags |= VEXEC;
2010 
2011 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2012 		if (!error && (vflags & VWRITE))
2013 			error = vn_writechk(vp);
2014 	}
2015 	vput(vp);
2016 out:
2017 	if (newcred != NULL) {
2018 		p->p_ucred = oldcred;
2019 		crfree(newcred);
2020 	}
2021 	return (error);
2022 }
2023 
2024 /*
2025  * Get file status; this version follows links.
2026  */
2027 int
2028 sys_stat(struct proc *p, void *v, register_t *retval)
2029 {
2030 	struct sys_stat_args /* {
2031 		syscallarg(const char *) path;
2032 		syscallarg(struct stat *) ub;
2033 	} */ *uap = v;
2034 
2035 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2036 }
2037 
2038 int
2039 sys_fstatat(struct proc *p, void *v, register_t *retval)
2040 {
2041 	struct sys_fstatat_args /* {
2042 		syscallarg(int) fd;
2043 		syscallarg(const char *) path;
2044 		syscallarg(struct stat *) buf;
2045 		syscallarg(int) flag;
2046 	} */ *uap = v;
2047 
2048 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2049 	    SCARG(uap, buf), SCARG(uap, flag)));
2050 }
2051 
2052 int
2053 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2054 {
2055 	struct stat sb;
2056 	int error, follow;
2057 	struct nameidata nd;
2058 
2059 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2060 		return (EINVAL);
2061 
2062 
2063 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2064 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2065 	nd.ni_pledge = PLEDGE_RPATH;
2066 	nd.ni_unveil = UNVEIL_READ;
2067 	if ((error = namei(&nd)) != 0)
2068 		return (error);
2069 	error = vn_stat(nd.ni_vp, &sb, p);
2070 	vput(nd.ni_vp);
2071 	if (error)
2072 		return (error);
2073 	/* Don't let non-root see generation numbers (for NFS security) */
2074 	if (suser(p))
2075 		sb.st_gen = 0;
2076 	error = copyout(&sb, buf, sizeof(sb));
2077 #ifdef KTRACE
2078 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2079 		ktrstat(p, &sb);
2080 #endif
2081 	return (error);
2082 }
2083 
2084 /*
2085  * Get file status; this version does not follow links.
2086  */
2087 int
2088 sys_lstat(struct proc *p, void *v, register_t *retval)
2089 {
2090 	struct sys_lstat_args /* {
2091 		syscallarg(const char *) path;
2092 		syscallarg(struct stat *) ub;
2093 	} */ *uap = v;
2094 
2095 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2096 	    AT_SYMLINK_NOFOLLOW));
2097 }
2098 
2099 /*
2100  * Get configurable pathname variables.
2101  */
2102 int
2103 sys_pathconf(struct proc *p, void *v, register_t *retval)
2104 {
2105 	struct sys_pathconf_args /* {
2106 		syscallarg(const char *) path;
2107 		syscallarg(int) name;
2108 	} */ *uap = v;
2109 	int error;
2110 	struct nameidata nd;
2111 
2112 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2113 	    SCARG(uap, path), p);
2114 	nd.ni_pledge = PLEDGE_RPATH;
2115 	nd.ni_unveil = UNVEIL_READ;
2116 	if ((error = namei(&nd)) != 0)
2117 		return (error);
2118 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2119 	vput(nd.ni_vp);
2120 	return (error);
2121 }
2122 
2123 /*
2124  * Return target name of a symbolic link.
2125  */
2126 int
2127 sys_readlink(struct proc *p, void *v, register_t *retval)
2128 {
2129 	struct sys_readlink_args /* {
2130 		syscallarg(const char *) path;
2131 		syscallarg(char *) buf;
2132 		syscallarg(size_t) count;
2133 	} */ *uap = v;
2134 
2135 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2136 	    SCARG(uap, count), retval));
2137 }
2138 
2139 int
2140 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2141 {
2142 	struct sys_readlinkat_args /* {
2143 		syscallarg(int) fd;
2144 		syscallarg(const char *) path;
2145 		syscallarg(char *) buf;
2146 		syscallarg(size_t) count;
2147 	} */ *uap = v;
2148 
2149 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2150 	    SCARG(uap, buf), SCARG(uap, count), retval));
2151 }
2152 
2153 int
2154 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2155     size_t count, register_t *retval)
2156 {
2157 	struct vnode *vp;
2158 	struct iovec aiov;
2159 	struct uio auio;
2160 	int error;
2161 	struct nameidata nd;
2162 
2163 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2164 	nd.ni_pledge = PLEDGE_RPATH;
2165 	nd.ni_unveil = UNVEIL_READ;
2166 	if ((error = namei(&nd)) != 0)
2167 		return (error);
2168 	vp = nd.ni_vp;
2169 	if (vp->v_type != VLNK)
2170 		error = EINVAL;
2171 	else {
2172 		aiov.iov_base = buf;
2173 		aiov.iov_len = count;
2174 		auio.uio_iov = &aiov;
2175 		auio.uio_iovcnt = 1;
2176 		auio.uio_offset = 0;
2177 		auio.uio_rw = UIO_READ;
2178 		auio.uio_segflg = UIO_USERSPACE;
2179 		auio.uio_procp = p;
2180 		auio.uio_resid = count;
2181 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2182 		*retval = count - auio.uio_resid;
2183 	}
2184 	vput(vp);
2185 	return (error);
2186 }
2187 
2188 /*
2189  * Change flags of a file given a path name.
2190  */
2191 int
2192 sys_chflags(struct proc *p, void *v, register_t *retval)
2193 {
2194 	struct sys_chflags_args /* {
2195 		syscallarg(const char *) path;
2196 		syscallarg(u_int) flags;
2197 	} */ *uap = v;
2198 
2199 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2200 	    SCARG(uap, flags), 0));
2201 }
2202 
2203 int
2204 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2205 {
2206 	struct sys_chflagsat_args /* {
2207 		syscallarg(int) fd;
2208 		syscallarg(const char *) path;
2209 		syscallarg(u_int) flags;
2210 		syscallarg(int) atflags;
2211 	} */ *uap = v;
2212 
2213 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2214 	    SCARG(uap, flags), SCARG(uap, atflags)));
2215 }
2216 
2217 int
2218 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2219 {
2220 	struct nameidata nd;
2221 	int error, follow;
2222 
2223 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2224 		return (EINVAL);
2225 
2226 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2227 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2228 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2229 	nd.ni_unveil = UNVEIL_WRITE;
2230 	if ((error = namei(&nd)) != 0)
2231 		return (error);
2232 	return (dovchflags(p, nd.ni_vp, flags));
2233 }
2234 
2235 /*
2236  * Change flags of a file given a file descriptor.
2237  */
2238 int
2239 sys_fchflags(struct proc *p, void *v, register_t *retval)
2240 {
2241 	struct sys_fchflags_args /* {
2242 		syscallarg(int) fd;
2243 		syscallarg(u_int) flags;
2244 	} */ *uap = v;
2245 	struct file *fp;
2246 	struct vnode *vp;
2247 	int error;
2248 
2249 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2250 		return (error);
2251 	vp = fp->f_data;
2252 	vref(vp);
2253 	FRELE(fp, p);
2254 	return (dovchflags(p, vp, SCARG(uap, flags)));
2255 }
2256 
2257 int
2258 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2259 {
2260 	struct vattr vattr;
2261 	int error;
2262 
2263 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2264 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2265 		error = EROFS;
2266 	else if (flags == VNOVAL)
2267 		error = EINVAL;
2268 	else {
2269 		if (suser(p)) {
2270 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2271 			    != 0)
2272 				goto out;
2273 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2274 				error = EINVAL;
2275 				goto out;
2276 			}
2277 		}
2278 		VATTR_NULL(&vattr);
2279 		vattr.va_flags = flags;
2280 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2281 	}
2282 out:
2283 	vput(vp);
2284 	return (error);
2285 }
2286 
2287 /*
2288  * Change mode of a file given path name.
2289  */
2290 int
2291 sys_chmod(struct proc *p, void *v, register_t *retval)
2292 {
2293 	struct sys_chmod_args /* {
2294 		syscallarg(const char *) path;
2295 		syscallarg(mode_t) mode;
2296 	} */ *uap = v;
2297 
2298 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2299 }
2300 
2301 int
2302 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2303 {
2304 	struct sys_fchmodat_args /* {
2305 		syscallarg(int) fd;
2306 		syscallarg(const char *) path;
2307 		syscallarg(mode_t) mode;
2308 		syscallarg(int) flag;
2309 	} */ *uap = v;
2310 
2311 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2312 	    SCARG(uap, mode), SCARG(uap, flag)));
2313 }
2314 
2315 int
2316 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2317 {
2318 	struct vnode *vp;
2319 	struct vattr vattr;
2320 	int error, follow;
2321 	struct nameidata nd;
2322 
2323 	if (mode & ~(S_IFMT | ALLPERMS))
2324 		return (EINVAL);
2325 	if ((p->p_p->ps_flags & PS_PLEDGE))
2326 		mode &= ACCESSPERMS;
2327 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2328 		return (EINVAL);
2329 
2330 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2331 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2332 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2333 	nd.ni_unveil = UNVEIL_WRITE;
2334 	if ((error = namei(&nd)) != 0)
2335 		return (error);
2336 	vp = nd.ni_vp;
2337 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2338 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2339 		error = EROFS;
2340 	else {
2341 		VATTR_NULL(&vattr);
2342 		vattr.va_mode = mode & ALLPERMS;
2343 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2344 	}
2345 	vput(vp);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * Change mode of a file given a file descriptor.
2351  */
2352 int
2353 sys_fchmod(struct proc *p, void *v, register_t *retval)
2354 {
2355 	struct sys_fchmod_args /* {
2356 		syscallarg(int) fd;
2357 		syscallarg(mode_t) mode;
2358 	} */ *uap = v;
2359 	struct vattr vattr;
2360 	struct vnode *vp;
2361 	struct file *fp;
2362 	mode_t mode = SCARG(uap, mode);
2363 	int error;
2364 
2365 	if (mode & ~(S_IFMT | ALLPERMS))
2366 		return (EINVAL);
2367 	if ((p->p_p->ps_flags & PS_PLEDGE))
2368 		mode &= ACCESSPERMS;
2369 
2370 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2371 		return (error);
2372 	vp = fp->f_data;
2373 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2374 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2375 		error = EROFS;
2376 	else {
2377 		VATTR_NULL(&vattr);
2378 		vattr.va_mode = mode & ALLPERMS;
2379 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2380 	}
2381 	VOP_UNLOCK(vp);
2382 	FRELE(fp, p);
2383 	return (error);
2384 }
2385 
2386 /*
2387  * Set ownership given a path name.
2388  */
2389 int
2390 sys_chown(struct proc *p, void *v, register_t *retval)
2391 {
2392 	struct sys_chown_args /* {
2393 		syscallarg(const char *) path;
2394 		syscallarg(uid_t) uid;
2395 		syscallarg(gid_t) gid;
2396 	} */ *uap = v;
2397 
2398 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2399 	    SCARG(uap, gid), 0));
2400 }
2401 
2402 int
2403 sys_fchownat(struct proc *p, void *v, register_t *retval)
2404 {
2405 	struct sys_fchownat_args /* {
2406 		syscallarg(int) fd;
2407 		syscallarg(const char *) path;
2408 		syscallarg(uid_t) uid;
2409 		syscallarg(gid_t) gid;
2410 		syscallarg(int) flag;
2411 	} */ *uap = v;
2412 
2413 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2414 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2415 }
2416 
2417 int
2418 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2419     int flag)
2420 {
2421 	struct vnode *vp;
2422 	struct vattr vattr;
2423 	int error, follow;
2424 	struct nameidata nd;
2425 	mode_t mode;
2426 
2427 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2428 		return (EINVAL);
2429 
2430 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2431 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2432 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2433 	nd.ni_unveil = UNVEIL_WRITE;
2434 	if ((error = namei(&nd)) != 0)
2435 		return (error);
2436 	vp = nd.ni_vp;
2437 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2438 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2439 		error = EROFS;
2440 	else {
2441 		if ((error = pledge_chown(p, uid, gid)))
2442 			goto out;
2443 		if ((uid != -1 || gid != -1) &&
2444 		    !vnoperm(vp) &&
2445 		    (suser(p) || suid_clear)) {
2446 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2447 			if (error)
2448 				goto out;
2449 			mode = vattr.va_mode & ~(VSUID | VSGID);
2450 			if (mode == vattr.va_mode)
2451 				mode = VNOVAL;
2452 		} else
2453 			mode = VNOVAL;
2454 		VATTR_NULL(&vattr);
2455 		vattr.va_uid = uid;
2456 		vattr.va_gid = gid;
2457 		vattr.va_mode = mode;
2458 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2459 	}
2460 out:
2461 	vput(vp);
2462 	return (error);
2463 }
2464 
2465 /*
2466  * Set ownership given a path name, without following links.
2467  */
2468 int
2469 sys_lchown(struct proc *p, void *v, register_t *retval)
2470 {
2471 	struct sys_lchown_args /* {
2472 		syscallarg(const char *) path;
2473 		syscallarg(uid_t) uid;
2474 		syscallarg(gid_t) gid;
2475 	} */ *uap = v;
2476 	struct vnode *vp;
2477 	struct vattr vattr;
2478 	int error;
2479 	struct nameidata nd;
2480 	mode_t mode;
2481 	uid_t uid = SCARG(uap, uid);
2482 	gid_t gid = SCARG(uap, gid);
2483 
2484 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2485 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2486 	nd.ni_unveil = UNVEIL_WRITE;
2487 	if ((error = namei(&nd)) != 0)
2488 		return (error);
2489 	vp = nd.ni_vp;
2490 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2491 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2492 		error = EROFS;
2493 	else {
2494 		if ((error = pledge_chown(p, uid, gid)))
2495 			goto out;
2496 		if ((uid != -1 || gid != -1) &&
2497 		    !vnoperm(vp) &&
2498 		    (suser(p) || suid_clear)) {
2499 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2500 			if (error)
2501 				goto out;
2502 			mode = vattr.va_mode & ~(VSUID | VSGID);
2503 			if (mode == vattr.va_mode)
2504 				mode = VNOVAL;
2505 		} else
2506 			mode = VNOVAL;
2507 		VATTR_NULL(&vattr);
2508 		vattr.va_uid = uid;
2509 		vattr.va_gid = gid;
2510 		vattr.va_mode = mode;
2511 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2512 	}
2513 out:
2514 	vput(vp);
2515 	return (error);
2516 }
2517 
2518 /*
2519  * Set ownership given a file descriptor.
2520  */
2521 int
2522 sys_fchown(struct proc *p, void *v, register_t *retval)
2523 {
2524 	struct sys_fchown_args /* {
2525 		syscallarg(int) fd;
2526 		syscallarg(uid_t) uid;
2527 		syscallarg(gid_t) gid;
2528 	} */ *uap = v;
2529 	struct vnode *vp;
2530 	struct vattr vattr;
2531 	int error;
2532 	struct file *fp;
2533 	mode_t mode;
2534 	uid_t uid = SCARG(uap, uid);
2535 	gid_t gid = SCARG(uap, gid);
2536 
2537 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2538 		return (error);
2539 	vp = fp->f_data;
2540 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2541 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2542 		error = EROFS;
2543 	else {
2544 		if ((error = pledge_chown(p, uid, gid)))
2545 			goto out;
2546 		if ((uid != -1 || gid != -1) &&
2547 		    !vnoperm(vp) &&
2548 		    (suser(p) || suid_clear)) {
2549 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2550 			if (error)
2551 				goto out;
2552 			mode = vattr.va_mode & ~(VSUID | VSGID);
2553 			if (mode == vattr.va_mode)
2554 				mode = VNOVAL;
2555 		} else
2556 			mode = VNOVAL;
2557 		VATTR_NULL(&vattr);
2558 		vattr.va_uid = uid;
2559 		vattr.va_gid = gid;
2560 		vattr.va_mode = mode;
2561 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2562 	}
2563 out:
2564 	VOP_UNLOCK(vp);
2565 	FRELE(fp, p);
2566 	return (error);
2567 }
2568 
2569 /*
2570  * Set the access and modification times given a path name.
2571  */
2572 int
2573 sys_utimes(struct proc *p, void *v, register_t *retval)
2574 {
2575 	struct sys_utimes_args /* {
2576 		syscallarg(const char *) path;
2577 		syscallarg(const struct timeval *) tptr;
2578 	} */ *uap = v;
2579 
2580 	struct timespec ts[2];
2581 	struct timeval tv[2];
2582 	const struct timeval *tvp;
2583 	int error;
2584 
2585 	tvp = SCARG(uap, tptr);
2586 	if (tvp != NULL) {
2587 		error = copyin(tvp, tv, sizeof(tv));
2588 		if (error)
2589 			return (error);
2590 #ifdef KTRACE
2591 		if (KTRPOINT(p, KTR_STRUCT))
2592 			ktrabstimeval(p, &tv);
2593 #endif
2594 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2595 			return (EINVAL);
2596 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2597 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2598 	} else
2599 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2600 
2601 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2602 }
2603 
2604 int
2605 sys_utimensat(struct proc *p, void *v, register_t *retval)
2606 {
2607 	struct sys_utimensat_args /* {
2608 		syscallarg(int) fd;
2609 		syscallarg(const char *) path;
2610 		syscallarg(const struct timespec *) times;
2611 		syscallarg(int) flag;
2612 	} */ *uap = v;
2613 
2614 	struct timespec ts[2];
2615 	const struct timespec *tsp;
2616 	int error, i;
2617 
2618 	tsp = SCARG(uap, times);
2619 	if (tsp != NULL) {
2620 		error = copyin(tsp, ts, sizeof(ts));
2621 		if (error)
2622 			return (error);
2623 		for (i = 0; i < nitems(ts); i++) {
2624 			if (ts[i].tv_nsec == UTIME_NOW)
2625 				continue;
2626 			if (ts[i].tv_nsec == UTIME_OMIT)
2627 				continue;
2628 #ifdef KTRACE
2629 			if (KTRPOINT(p, KTR_STRUCT))
2630 				ktrabstimespec(p, &ts[i]);
2631 #endif
2632 			if (!timespecisvalid(&ts[i]))
2633 				return (EINVAL);
2634 		}
2635 	} else
2636 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2637 
2638 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2639 	    SCARG(uap, flag)));
2640 }
2641 
2642 int
2643 doutimensat(struct proc *p, int fd, const char *path,
2644     struct timespec ts[2], int flag)
2645 {
2646 	struct vnode *vp;
2647 	int error, follow;
2648 	struct nameidata nd;
2649 
2650 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2651 		return (EINVAL);
2652 
2653 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2654 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2655 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2656 	nd.ni_unveil = UNVEIL_WRITE;
2657 	if ((error = namei(&nd)) != 0)
2658 		return (error);
2659 	vp = nd.ni_vp;
2660 
2661 	return (dovutimens(p, vp, ts));
2662 }
2663 
2664 int
2665 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2666 {
2667 	struct vattr vattr;
2668 	struct timespec now;
2669 	int error;
2670 
2671 #ifdef KTRACE
2672 	/* if they're both UTIME_NOW, then don't report either */
2673 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2674 	    KTRPOINT(p, KTR_STRUCT)) {
2675 		ktrabstimespec(p, &ts[0]);
2676 		ktrabstimespec(p, &ts[1]);
2677 	}
2678 #endif
2679 
2680 	VATTR_NULL(&vattr);
2681 
2682 	/*  make sure ctime is updated even if neither mtime nor atime is */
2683 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2684 
2685 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2686 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2687 			vattr.va_vaflags |= VA_UTIMES_NULL;
2688 
2689 		getnanotime(&now);
2690 		if (ts[0].tv_nsec == UTIME_NOW)
2691 			ts[0] = now;
2692 		if (ts[1].tv_nsec == UTIME_NOW)
2693 			ts[1] = now;
2694 	}
2695 
2696 	if (ts[0].tv_nsec != UTIME_OMIT)
2697 		vattr.va_atime = ts[0];
2698 	if (ts[1].tv_nsec != UTIME_OMIT)
2699 		vattr.va_mtime = ts[1];
2700 
2701 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2702 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2703 		error = EROFS;
2704 	else
2705 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2706 	vput(vp);
2707 	return (error);
2708 }
2709 
2710 /*
2711  * Set the access and modification times given a file descriptor.
2712  */
2713 int
2714 sys_futimes(struct proc *p, void *v, register_t *retval)
2715 {
2716 	struct sys_futimes_args /* {
2717 		syscallarg(int) fd;
2718 		syscallarg(const struct timeval *) tptr;
2719 	} */ *uap = v;
2720 	struct timeval tv[2];
2721 	struct timespec ts[2];
2722 	const struct timeval *tvp;
2723 	int error;
2724 
2725 	tvp = SCARG(uap, tptr);
2726 	if (tvp != NULL) {
2727 		error = copyin(tvp, tv, sizeof(tv));
2728 		if (error)
2729 			return (error);
2730 #ifdef KTRACE
2731 		if (KTRPOINT(p, KTR_STRUCT)) {
2732 			ktrabstimeval(p, &tv[0]);
2733 			ktrabstimeval(p, &tv[1]);
2734 		}
2735 #endif
2736 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2737 			return (EINVAL);
2738 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2739 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2740 	} else
2741 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2742 
2743 	return (dofutimens(p, SCARG(uap, fd), ts));
2744 }
2745 
2746 int
2747 sys_futimens(struct proc *p, void *v, register_t *retval)
2748 {
2749 	struct sys_futimens_args /* {
2750 		syscallarg(int) fd;
2751 		syscallarg(const struct timespec *) times;
2752 	} */ *uap = v;
2753 	struct timespec ts[2];
2754 	const struct timespec *tsp;
2755 	int error, i;
2756 
2757 	tsp = SCARG(uap, times);
2758 	if (tsp != NULL) {
2759 		error = copyin(tsp, ts, sizeof(ts));
2760 		if (error)
2761 			return (error);
2762 		for (i = 0; i < nitems(ts); i++) {
2763 			if (ts[i].tv_nsec == UTIME_NOW)
2764 				continue;
2765 			if (ts[i].tv_nsec == UTIME_OMIT)
2766 				continue;
2767 #ifdef KTRACE
2768 			if (KTRPOINT(p, KTR_STRUCT))
2769 				ktrabstimespec(p, &ts[i]);
2770 #endif
2771 			if (!timespecisvalid(&ts[i]))
2772 				return (EINVAL);
2773 		}
2774 	} else
2775 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2776 
2777 	return (dofutimens(p, SCARG(uap, fd), ts));
2778 }
2779 
2780 int
2781 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2782 {
2783 	struct file *fp;
2784 	struct vnode *vp;
2785 	int error;
2786 
2787 	if ((error = getvnode(p, fd, &fp)) != 0)
2788 		return (error);
2789 	vp = fp->f_data;
2790 	vref(vp);
2791 	FRELE(fp, p);
2792 
2793 	return (dovutimens(p, vp, ts));
2794 }
2795 
2796 /*
2797  * Truncate a file given its path name.
2798  */
2799 int
2800 sys_truncate(struct proc *p, void *v, register_t *retval)
2801 {
2802 	struct sys_truncate_args /* {
2803 		syscallarg(const char *) path;
2804 		syscallarg(int) pad;
2805 		syscallarg(off_t) length;
2806 	} */ *uap = v;
2807 	struct vnode *vp;
2808 	struct vattr vattr;
2809 	int error;
2810 	struct nameidata nd;
2811 
2812 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2813 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2814 	nd.ni_unveil = UNVEIL_WRITE;
2815 	if ((error = namei(&nd)) != 0)
2816 		return (error);
2817 	vp = nd.ni_vp;
2818 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2819 	if (vp->v_type == VDIR)
2820 		error = EISDIR;
2821 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2822 	    (error = vn_writechk(vp)) == 0) {
2823 		VATTR_NULL(&vattr);
2824 		vattr.va_size = SCARG(uap, length);
2825 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2826 	}
2827 	vput(vp);
2828 	return (error);
2829 }
2830 
2831 /*
2832  * Truncate a file given a file descriptor.
2833  */
2834 int
2835 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2836 {
2837 	struct sys_ftruncate_args /* {
2838 		syscallarg(int) fd;
2839 		syscallarg(int) pad;
2840 		syscallarg(off_t) length;
2841 	} */ *uap = v;
2842 	struct vattr vattr;
2843 	struct vnode *vp;
2844 	struct file *fp;
2845 	off_t len;
2846 	int error;
2847 
2848 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2849 		return (error);
2850 	len = SCARG(uap, length);
2851 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2852 		error = EINVAL;
2853 		goto bad;
2854 	}
2855 	vp = fp->f_data;
2856 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2857 	if (vp->v_type == VDIR)
2858 		error = EISDIR;
2859 	else if ((error = vn_writechk(vp)) == 0) {
2860 		VATTR_NULL(&vattr);
2861 		vattr.va_size = len;
2862 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2863 	}
2864 	VOP_UNLOCK(vp);
2865 bad:
2866 	FRELE(fp, p);
2867 	return (error);
2868 }
2869 
2870 /*
2871  * Sync an open file.
2872  */
2873 int
2874 sys_fsync(struct proc *p, void *v, register_t *retval)
2875 {
2876 	struct sys_fsync_args /* {
2877 		syscallarg(int) fd;
2878 	} */ *uap = v;
2879 	struct vnode *vp;
2880 	struct file *fp;
2881 	int error;
2882 
2883 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2884 		return (error);
2885 	vp = fp->f_data;
2886 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2887 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2888 #ifdef FFS_SOFTUPDATES
2889 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2890 		error = softdep_fsync(vp);
2891 #endif
2892 
2893 	VOP_UNLOCK(vp);
2894 	FRELE(fp, p);
2895 	return (error);
2896 }
2897 
2898 /*
2899  * Rename files.  Source and destination must either both be directories,
2900  * or both not be directories.  If target is a directory, it must be empty.
2901  */
2902 int
2903 sys_rename(struct proc *p, void *v, register_t *retval)
2904 {
2905 	struct sys_rename_args /* {
2906 		syscallarg(const char *) from;
2907 		syscallarg(const char *) to;
2908 	} */ *uap = v;
2909 
2910 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2911 	    SCARG(uap, to)));
2912 }
2913 
2914 int
2915 sys_renameat(struct proc *p, void *v, register_t *retval)
2916 {
2917 	struct sys_renameat_args /* {
2918 		syscallarg(int) fromfd;
2919 		syscallarg(const char *) from;
2920 		syscallarg(int) tofd;
2921 		syscallarg(const char *) to;
2922 	} */ *uap = v;
2923 
2924 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2925 	    SCARG(uap, tofd), SCARG(uap, to)));
2926 }
2927 
2928 int
2929 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2930     const char *to)
2931 {
2932 	struct vnode *tvp, *fvp, *tdvp;
2933 	struct nameidata fromnd, tond;
2934 	int error;
2935 	int flags;
2936 
2937 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2938 	    fromfd, from, p);
2939 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2940 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2941 	if ((error = namei(&fromnd)) != 0)
2942 		return (error);
2943 	fvp = fromnd.ni_vp;
2944 
2945 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2946 	/*
2947 	 * rename("foo/", "bar/");  is  OK
2948 	 */
2949 	if (fvp->v_type == VDIR)
2950 		flags |= STRIPSLASHES;
2951 
2952 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2953 	tond.ni_pledge = PLEDGE_CPATH;
2954 	tond.ni_unveil = UNVEIL_CREATE;
2955 	if ((error = namei(&tond)) != 0) {
2956 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2957 		vrele(fromnd.ni_dvp);
2958 		vrele(fvp);
2959 		goto out1;
2960 	}
2961 	tdvp = tond.ni_dvp;
2962 	tvp = tond.ni_vp;
2963 	if (tvp != NULL) {
2964 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2965 			error = ENOTDIR;
2966 			goto out;
2967 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2968 			error = EISDIR;
2969 			goto out;
2970 		}
2971 	}
2972 	if (fvp == tdvp)
2973 		error = EINVAL;
2974 	/*
2975 	 * If source is the same as the destination (that is the
2976 	 * same inode number)
2977 	 */
2978 	if (fvp == tvp)
2979 		error = -1;
2980 out:
2981 	if (!error) {
2982 		if (tvp) {
2983 			(void)uvm_vnp_uncache(tvp);
2984 		}
2985 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2986 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2987 	} else {
2988 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2989 		if (tdvp == tvp)
2990 			vrele(tdvp);
2991 		else
2992 			vput(tdvp);
2993 		if (tvp)
2994 			vput(tvp);
2995 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2996 		vrele(fromnd.ni_dvp);
2997 		vrele(fvp);
2998 	}
2999 	vrele(tond.ni_startdir);
3000 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3001 out1:
3002 	if (fromnd.ni_startdir)
3003 		vrele(fromnd.ni_startdir);
3004 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3005 	if (error == -1)
3006 		return (0);
3007 	return (error);
3008 }
3009 
3010 /*
3011  * Make a directory file.
3012  */
3013 int
3014 sys_mkdir(struct proc *p, void *v, register_t *retval)
3015 {
3016 	struct sys_mkdir_args /* {
3017 		syscallarg(const char *) path;
3018 		syscallarg(mode_t) mode;
3019 	} */ *uap = v;
3020 
3021 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3022 }
3023 
3024 int
3025 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3026 {
3027 	struct sys_mkdirat_args /* {
3028 		syscallarg(int) fd;
3029 		syscallarg(const char *) path;
3030 		syscallarg(mode_t) mode;
3031 	} */ *uap = v;
3032 
3033 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3034 	    SCARG(uap, mode)));
3035 }
3036 
3037 int
3038 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3039 {
3040 	struct vnode *vp;
3041 	struct vattr vattr;
3042 	int error;
3043 	struct nameidata nd;
3044 
3045 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3046 	    fd, path, p);
3047 	nd.ni_pledge = PLEDGE_CPATH;
3048 	nd.ni_unveil = UNVEIL_CREATE;
3049 	if ((error = namei(&nd)) != 0)
3050 		return (error);
3051 	vp = nd.ni_vp;
3052 	if (vp != NULL) {
3053 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3054 		if (nd.ni_dvp == vp)
3055 			vrele(nd.ni_dvp);
3056 		else
3057 			vput(nd.ni_dvp);
3058 		vrele(vp);
3059 		return (EEXIST);
3060 	}
3061 	VATTR_NULL(&vattr);
3062 	vattr.va_type = VDIR;
3063 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3064 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3065 	if (!error)
3066 		vput(nd.ni_vp);
3067 	return (error);
3068 }
3069 
3070 /*
3071  * Remove a directory file.
3072  */
3073 int
3074 sys_rmdir(struct proc *p, void *v, register_t *retval)
3075 {
3076 	struct sys_rmdir_args /* {
3077 		syscallarg(const char *) path;
3078 	} */ *uap = v;
3079 
3080 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3081 }
3082 
3083 /*
3084  * Read a block of directory entries in a file system independent format.
3085  */
3086 int
3087 sys_getdents(struct proc *p, void *v, register_t *retval)
3088 {
3089 	struct sys_getdents_args /* {
3090 		syscallarg(int) fd;
3091 		syscallarg(void *) buf;
3092 		syscallarg(size_t) buflen;
3093 	} */ *uap = v;
3094 	struct vnode *vp;
3095 	struct file *fp;
3096 	struct uio auio;
3097 	struct iovec aiov;
3098 	size_t buflen;
3099 	int error, eofflag;
3100 
3101 	buflen = SCARG(uap, buflen);
3102 
3103 	if (buflen > INT_MAX)
3104 		return (EINVAL);
3105 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3106 		return (error);
3107 	if ((fp->f_flag & FREAD) == 0) {
3108 		error = EBADF;
3109 		goto bad;
3110 	}
3111 	vp = fp->f_data;
3112 	if (vp->v_type != VDIR) {
3113 		error = EINVAL;
3114 		goto bad;
3115 	}
3116 
3117 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3118 
3119 	if (fp->f_offset < 0) {
3120 		VOP_UNLOCK(vp);
3121 		error = EINVAL;
3122 		goto bad;
3123 	}
3124 
3125 	aiov.iov_base = SCARG(uap, buf);
3126 	aiov.iov_len = buflen;
3127 	auio.uio_iov = &aiov;
3128 	auio.uio_iovcnt = 1;
3129 	auio.uio_rw = UIO_READ;
3130 	auio.uio_segflg = UIO_USERSPACE;
3131 	auio.uio_procp = p;
3132 	auio.uio_resid = buflen;
3133 	auio.uio_offset = fp->f_offset;
3134 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3135 	mtx_enter(&fp->f_mtx);
3136 	fp->f_offset = auio.uio_offset;
3137 	mtx_leave(&fp->f_mtx);
3138 	VOP_UNLOCK(vp);
3139 	if (error)
3140 		goto bad;
3141 	*retval = buflen - auio.uio_resid;
3142 bad:
3143 	FRELE(fp, p);
3144 	return (error);
3145 }
3146 
3147 /*
3148  * Set the mode mask for creation of filesystem nodes.
3149  */
3150 int
3151 sys_umask(struct proc *p, void *v, register_t *retval)
3152 {
3153 	struct sys_umask_args /* {
3154 		syscallarg(mode_t) newmask;
3155 	} */ *uap = v;
3156 	struct filedesc *fdp = p->p_fd;
3157 
3158 	fdplock(fdp);
3159 	*retval = fdp->fd_cmask;
3160 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3161 	fdpunlock(fdp);
3162 	return (0);
3163 }
3164 
3165 /*
3166  * Void all references to file by ripping underlying filesystem
3167  * away from vnode.
3168  */
3169 int
3170 sys_revoke(struct proc *p, void *v, register_t *retval)
3171 {
3172 	struct sys_revoke_args /* {
3173 		syscallarg(const char *) path;
3174 	} */ *uap = v;
3175 	struct vnode *vp;
3176 	struct vattr vattr;
3177 	int error;
3178 	struct nameidata nd;
3179 
3180 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3181 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3182 	nd.ni_unveil = UNVEIL_READ;
3183 	if ((error = namei(&nd)) != 0)
3184 		return (error);
3185 	vp = nd.ni_vp;
3186 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3187 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3188 		error = ENOTTY;
3189 		goto out;
3190 	}
3191 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3192 		goto out;
3193 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3194 	    (error = suser(p)))
3195 		goto out;
3196 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3197 		VOP_REVOKE(vp, REVOKEALL);
3198 out:
3199 	vrele(vp);
3200 	return (error);
3201 }
3202 
3203 /*
3204  * Convert a user file descriptor to a kernel file entry.
3205  *
3206  * On return *fpp is FREF:ed.
3207  */
3208 int
3209 getvnode(struct proc *p, int fd, struct file **fpp)
3210 {
3211 	struct file *fp;
3212 	struct vnode *vp;
3213 
3214 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3215 		return (EBADF);
3216 
3217 	if (fp->f_type != DTYPE_VNODE) {
3218 		FRELE(fp, p);
3219 		return (EINVAL);
3220 	}
3221 
3222 	vp = fp->f_data;
3223 	if (vp->v_type == VBAD) {
3224 		FRELE(fp, p);
3225 		return (EBADF);
3226 	}
3227 
3228 	*fpp = fp;
3229 
3230 	return (0);
3231 }
3232 
3233 /*
3234  * Positional read system call.
3235  */
3236 int
3237 sys_pread(struct proc *p, void *v, register_t *retval)
3238 {
3239 	struct sys_pread_args /* {
3240 		syscallarg(int) fd;
3241 		syscallarg(void *) buf;
3242 		syscallarg(size_t) nbyte;
3243 		syscallarg(int) pad;
3244 		syscallarg(off_t) offset;
3245 	} */ *uap = v;
3246 	struct iovec iov;
3247 	struct uio auio;
3248 
3249 	iov.iov_base = SCARG(uap, buf);
3250 	iov.iov_len = SCARG(uap, nbyte);
3251 	if (iov.iov_len > SSIZE_MAX)
3252 		return (EINVAL);
3253 
3254 	auio.uio_iov = &iov;
3255 	auio.uio_iovcnt = 1;
3256 	auio.uio_resid = iov.iov_len;
3257 	auio.uio_offset = SCARG(uap, offset);
3258 
3259 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3260 }
3261 
3262 /*
3263  * Positional scatter read system call.
3264  */
3265 int
3266 sys_preadv(struct proc *p, void *v, register_t *retval)
3267 {
3268 	struct sys_preadv_args /* {
3269 		syscallarg(int) fd;
3270 		syscallarg(const struct iovec *) iovp;
3271 		syscallarg(int) iovcnt;
3272 		syscallarg(int) pad;
3273 		syscallarg(off_t) offset;
3274 	} */ *uap = v;
3275 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3276 	int error, iovcnt = SCARG(uap, iovcnt);
3277 	struct uio auio;
3278 	size_t resid;
3279 
3280 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3281 	if (error)
3282 		goto done;
3283 
3284 	auio.uio_iov = iov;
3285 	auio.uio_iovcnt = iovcnt;
3286 	auio.uio_resid = resid;
3287 	auio.uio_offset = SCARG(uap, offset);
3288 
3289 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3290  done:
3291 	iovec_free(iov, iovcnt);
3292 	return (error);
3293 }
3294 
3295 /*
3296  * Positional write system call.
3297  */
3298 int
3299 sys_pwrite(struct proc *p, void *v, register_t *retval)
3300 {
3301 	struct sys_pwrite_args /* {
3302 		syscallarg(int) fd;
3303 		syscallarg(const void *) buf;
3304 		syscallarg(size_t) nbyte;
3305 		syscallarg(int) pad;
3306 		syscallarg(off_t) offset;
3307 	} */ *uap = v;
3308 	struct iovec iov;
3309 	struct uio auio;
3310 
3311 	iov.iov_base = (void *)SCARG(uap, buf);
3312 	iov.iov_len = SCARG(uap, nbyte);
3313 	if (iov.iov_len > SSIZE_MAX)
3314 		return (EINVAL);
3315 
3316 	auio.uio_iov = &iov;
3317 	auio.uio_iovcnt = 1;
3318 	auio.uio_resid = iov.iov_len;
3319 	auio.uio_offset = SCARG(uap, offset);
3320 
3321 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3322 }
3323 
3324 /*
3325  * Positional gather write system call.
3326  */
3327 int
3328 sys_pwritev(struct proc *p, void *v, register_t *retval)
3329 {
3330 	struct sys_pwritev_args /* {
3331 		syscallarg(int) fd;
3332 		syscallarg(const struct iovec *) iovp;
3333 		syscallarg(int) iovcnt;
3334 		syscallarg(int) pad;
3335 		syscallarg(off_t) offset;
3336 	} */ *uap = v;
3337 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3338 	int error, iovcnt = SCARG(uap, iovcnt);
3339 	struct uio auio;
3340 	size_t resid;
3341 
3342 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3343 	if (error)
3344 		goto done;
3345 
3346 	auio.uio_iov = iov;
3347 	auio.uio_iovcnt = iovcnt;
3348 	auio.uio_resid = resid;
3349 	auio.uio_offset = SCARG(uap, offset);
3350 
3351 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3352  done:
3353 	iovec_free(iov, iovcnt);
3354 	return (error);
3355 }
3356