xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.358 2022/07/20 05:56:36 deraadt Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 
66 #include <sys/syscallargs.h>
67 
68 extern int suid_clear;
69 
70 static int change_dir(struct nameidata *, struct proc *);
71 
72 void checkdirs(struct vnode *);
73 
74 int copyout_statfs(struct statfs *, void *, struct proc *);
75 
76 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
77 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
78 int dolinkat(struct proc *, int, const char *, int, const char *, int);
79 int dosymlinkat(struct proc *, const char *, int, const char *);
80 int dounlinkat(struct proc *, int, const char *, int);
81 int dofaccessat(struct proc *, int, const char *, int, int);
82 int dofstatat(struct proc *, int, const char *, struct stat *, int);
83 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
84     register_t *);
85 int dochflagsat(struct proc *, int, const char *, u_int, int);
86 int dovchflags(struct proc *, struct vnode *, u_int);
87 int dofchmodat(struct proc *, int, const char *, mode_t, int);
88 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
89 int dorenameat(struct proc *, int, const char *, int, const char *);
90 int domkdirat(struct proc *, int, const char *, mode_t);
91 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
92 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
93 int dofutimens(struct proc *, int, struct timespec [2]);
94 int dounmount_leaf(struct mount *, int, struct proc *);
95 
96 /*
97  * Virtual File System System Calls
98  */
99 
100 /*
101  * Mount a file system.
102  */
103 int
104 sys_mount(struct proc *p, void *v, register_t *retval)
105 {
106 	struct sys_mount_args /* {
107 		syscallarg(const char *) type;
108 		syscallarg(const char *) path;
109 		syscallarg(int) flags;
110 		syscallarg(void *) data;
111 	} */ *uap = v;
112 	struct vnode *vp;
113 	struct mount *mp;
114 	int error, mntflag = 0;
115 	char fstypename[MFSNAMELEN];
116 	char fspath[MNAMELEN];
117 	struct nameidata nd;
118 	struct vfsconf *vfsp;
119 	int flags = SCARG(uap, flags);
120 	void *args = NULL;
121 
122 	if ((error = suser(p)))
123 		return (error);
124 
125 	/*
126 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
127 	 */
128 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
129 	if (error)
130 		return(error);
131 
132 	/*
133 	 * Get vnode to be covered
134 	 */
135 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
136 	if ((error = namei(&nd)) != 0)
137 		goto fail;
138 	vp = nd.ni_vp;
139 	if (flags & MNT_UPDATE) {
140 		if ((vp->v_flag & VROOT) == 0) {
141 			vput(vp);
142 			error = EINVAL;
143 			goto fail;
144 		}
145 		mp = vp->v_mount;
146 		vfsp = mp->mnt_vfc;
147 
148 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
149 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
150 		if (error) {
151 			vput(vp);
152 			goto fail;
153 		}
154 
155 		mntflag = mp->mnt_flag;
156 		/*
157 		 * We only allow the filesystem to be reloaded if it
158 		 * is currently mounted read-only.
159 		 */
160 		if ((flags & MNT_RELOAD) &&
161 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
162 			vput(vp);
163 			error = EOPNOTSUPP;	/* Needs translation */
164 			goto fail;
165 		}
166 
167 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
168 			vput(vp);
169 			goto fail;
170 		}
171 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
172 		goto update;
173 	}
174 	/*
175 	 * Do not allow disabling of permission checks unless exec and access to
176 	 * device files is disabled too.
177 	 */
178 	if ((flags & MNT_NOPERM) &&
179 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
180 		vput(vp);
181 		error = EPERM;
182 		goto fail;
183 	}
184 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
185 		vput(vp);
186 		goto fail;
187 	}
188 	if (vp->v_type != VDIR) {
189 		vput(vp);
190 		goto fail;
191 	}
192 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
193 	if (error) {
194 		vput(vp);
195 		goto fail;
196 	}
197 	vfsp = vfs_byname(fstypename);
198 	if (vfsp == NULL) {
199 		vput(vp);
200 		error = EOPNOTSUPP;
201 		goto fail;
202 	}
203 
204 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
205 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
206 	if (error) {
207 		vput(vp);
208 		goto fail;
209 	}
210 
211 	if (vp->v_mountedhere != NULL) {
212 		vput(vp);
213 		error = EBUSY;
214 		goto fail;
215 	}
216 
217 	/*
218 	 * Allocate and initialize the file system.
219 	 */
220 	mp = vfs_mount_alloc(vp, vfsp);
221 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
222 
223 update:
224 	/* Ensure that the parent mountpoint does not get unmounted. */
225 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
226 	if (error) {
227 		if (mp->mnt_flag & MNT_UPDATE) {
228 			mp->mnt_flag = mntflag;
229 			vfs_unbusy(mp);
230 		} else {
231 			vfs_unbusy(mp);
232 			vfs_mount_free(mp);
233 		}
234 		vput(vp);
235 		goto fail;
236 	}
237 
238 	/*
239 	 * Set the mount level flags.
240 	 */
241 	if (flags & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_RDONLY;
243 	else if (mp->mnt_flag & MNT_RDONLY)
244 		mp->mnt_flag |= MNT_WANTRDWR;
245 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
246 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
247 	    MNT_NOPERM | MNT_FORCE);
248 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
249 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
250 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
251 	/*
252 	 * Mount the filesystem.
253 	 */
254 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
255 	if (!error) {
256 		mp->mnt_stat.f_ctime = gettime();
257 	}
258 	if (mp->mnt_flag & MNT_UPDATE) {
259 		vfs_unbusy(vp->v_mount);
260 		vput(vp);
261 		if (mp->mnt_flag & MNT_WANTRDWR)
262 			mp->mnt_flag &= ~MNT_RDONLY;
263 		mp->mnt_flag &= ~MNT_OP_FLAGS;
264 		if (error)
265 			mp->mnt_flag = mntflag;
266 
267 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
268 			if (mp->mnt_syncer == NULL)
269 				error = vfs_allocate_syncvnode(mp);
270 		} else {
271 			if (mp->mnt_syncer != NULL)
272 				vgone(mp->mnt_syncer);
273 			mp->mnt_syncer = NULL;
274 		}
275 
276 		vfs_unbusy(mp);
277 		goto fail;
278 	}
279 
280 	mp->mnt_flag &= ~MNT_OP_FLAGS;
281 	vp->v_mountedhere = mp;
282 
283 	/*
284 	 * Put the new filesystem on the mount list after root.
285 	 */
286 	cache_purge(vp);
287 	if (!error) {
288 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
289 		checkdirs(vp);
290 		vfs_unbusy(vp->v_mount);
291 		VOP_UNLOCK(vp);
292 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
293 			error = vfs_allocate_syncvnode(mp);
294 		vfs_unbusy(mp);
295 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
296 		if ((error = VFS_START(mp, 0, p)) != 0)
297 			vrele(vp);
298 	} else {
299 		mp->mnt_vnodecovered->v_mountedhere = NULL;
300 		vfs_unbusy(mp);
301 		vfs_mount_free(mp);
302 		vfs_unbusy(vp->v_mount);
303 		vput(vp);
304 	}
305 fail:
306 	if (args)
307 		free(args, M_TEMP, vfsp->vfc_datasize);
308 	return (error);
309 }
310 
311 /*
312  * Scan all active processes to see if any of them have a current
313  * or root directory onto which the new filesystem has just been
314  * mounted. If so, replace them with the new mount point, keeping
315  * track of how many were replaced.  That's the number of references
316  * the old vnode had that we've replaced, so finish by vrele()'ing
317  * it that many times.  This puts off any possible sleeping until
318  * we've finished walking the allprocess list.
319  */
320 void
321 checkdirs(struct vnode *olddp)
322 {
323 	struct filedesc *fdp;
324 	struct vnode *newdp;
325 	struct process *pr;
326 	u_int  free_count = 0;
327 
328 	if (olddp->v_usecount == 1)
329 		return;
330 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
331 		panic("mount: lost mount");
332 	LIST_FOREACH(pr, &allprocess, ps_list) {
333 		fdp = pr->ps_fd;
334 		if (fdp->fd_cdir == olddp) {
335 			free_count++;
336 			vref(newdp);
337 			fdp->fd_cdir = newdp;
338 		}
339 		if (fdp->fd_rdir == olddp) {
340 			free_count++;
341 			vref(newdp);
342 			fdp->fd_rdir = newdp;
343 		}
344 	}
345 	if (rootvnode == olddp) {
346 		free_count++;
347 		vref(newdp);
348 		rootvnode = newdp;
349 	}
350 	while (free_count-- > 0)
351 		vrele(olddp);
352 	vput(newdp);
353 }
354 
355 /*
356  * Unmount a file system.
357  *
358  * Note: unmount takes a path to the vnode mounted on as argument,
359  * not special file (as before).
360  */
361 int
362 sys_unmount(struct proc *p, void *v, register_t *retval)
363 {
364 	struct sys_unmount_args /* {
365 		syscallarg(const char *) path;
366 		syscallarg(int) flags;
367 	} */ *uap = v;
368 	struct vnode *vp;
369 	struct mount *mp;
370 	int error;
371 	struct nameidata nd;
372 
373 	if ((error = suser(p)) != 0)
374 		return (error);
375 
376 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
377 	    SCARG(uap, path), p);
378 	if ((error = namei(&nd)) != 0)
379 		return (error);
380 	vp = nd.ni_vp;
381 	mp = vp->v_mount;
382 
383 	/*
384 	 * Don't allow unmounting the root file system.
385 	 */
386 	if (mp->mnt_flag & MNT_ROOTFS) {
387 		vput(vp);
388 		return (EINVAL);
389 	}
390 
391 	/*
392 	 * Must be the root of the filesystem
393 	 */
394 	if ((vp->v_flag & VROOT) == 0) {
395 		vput(vp);
396 		return (EINVAL);
397 	}
398 	vput(vp);
399 
400 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
401 		return (EBUSY);
402 
403 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
404 }
405 
406 /*
407  * Do the actual file system unmount.
408  */
409 int
410 dounmount(struct mount *mp, int flags, struct proc *p)
411 {
412 	SLIST_HEAD(, mount) mplist;
413 	struct mount *nmp;
414 	int error;
415 
416 	SLIST_INIT(&mplist);
417 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
418 
419 	/*
420 	 * Collect nested mount points. This takes advantage of the mount list
421 	 * being ordered - nested mount points come after their parent.
422 	 */
423 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
424 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
425 			if (mp->mnt_vnodecovered == NULLVP ||
426 			    mp->mnt_vnodecovered->v_mount != nmp)
427 				continue;
428 
429 			if ((flags & MNT_FORCE) == 0) {
430 				error = EBUSY;
431 				goto err;
432 			}
433 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
434 			if (error) {
435 				if ((flags & MNT_DOOMED)) {
436 					/*
437 					 * If the mount point was busy due to
438 					 * being unmounted, it has been removed
439 					 * from the mount list already.
440 					 * Restart the iteration from the last
441 					 * collected busy entry.
442 					 */
443 					mp = SLIST_FIRST(&mplist);
444 					break;
445 				}
446 				goto err;
447 			}
448 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
449 			break;
450 		}
451 	}
452 
453 	/*
454 	 * Nested mount points cannot appear during this loop as mounting
455 	 * requires a read lock for the parent mount point.
456 	 */
457 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
458 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
459 		error = dounmount_leaf(mp, flags, p);
460 		if (error)
461 			goto err;
462 	}
463 	return (0);
464 
465 err:
466 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
467 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
468 		vfs_unbusy(mp);
469 	}
470 	return (error);
471 }
472 
473 int
474 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
475 {
476 	struct vnode *coveredvp;
477 	struct vnode *vp, *nvp;
478 	int error;
479 	int hadsyncer = 0;
480 
481 	mp->mnt_flag &=~ MNT_ASYNC;
482 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
483 	if (mp->mnt_syncer != NULL) {
484 		hadsyncer = 1;
485 		vgone(mp->mnt_syncer);
486 		mp->mnt_syncer = NULL;
487 	}
488 
489 	/*
490 	 * Before calling file system unmount, make sure
491 	 * all unveils to vnodes in here are dropped.
492 	 */
493 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
494 		unveil_removevnode(vp);
495 	}
496 
497 	if (((mp->mnt_flag & MNT_RDONLY) ||
498 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
499 	    (flags & MNT_FORCE))
500 		error = VFS_UNMOUNT(mp, flags, p);
501 
502 	if (error && !(flags & MNT_DOOMED)) {
503 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
504 			(void) vfs_allocate_syncvnode(mp);
505 		vfs_unbusy(mp);
506 		return (error);
507 	}
508 
509 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
510 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
511 		coveredvp->v_mountedhere = NULL;
512 		vrele(coveredvp);
513 	}
514 
515 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
516 		panic("unmount: dangling vnode");
517 
518 	vfs_unbusy(mp);
519 	vfs_mount_free(mp);
520 
521 	return (0);
522 }
523 
524 /*
525  * Sync each mounted filesystem.
526  */
527 int
528 sys_sync(struct proc *p, void *v, register_t *retval)
529 {
530 	struct mount *mp;
531 	int asyncflag;
532 
533 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
534 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
535 			continue;
536 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
537 			asyncflag = mp->mnt_flag & MNT_ASYNC;
538 			mp->mnt_flag &= ~MNT_ASYNC;
539 			uvm_vnp_sync(mp);
540 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
541 			if (asyncflag)
542 				mp->mnt_flag |= MNT_ASYNC;
543 		}
544 		vfs_unbusy(mp);
545 	}
546 
547 	return (0);
548 }
549 
550 /*
551  * Change filesystem quotas.
552  */
553 int
554 sys_quotactl(struct proc *p, void *v, register_t *retval)
555 {
556 	struct sys_quotactl_args /* {
557 		syscallarg(const char *) path;
558 		syscallarg(int) cmd;
559 		syscallarg(int) uid;
560 		syscallarg(char *) arg;
561 	} */ *uap = v;
562 	struct mount *mp;
563 	int error;
564 	struct nameidata nd;
565 
566 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
567 	if ((error = namei(&nd)) != 0)
568 		return (error);
569 	mp = nd.ni_vp->v_mount;
570 	vrele(nd.ni_vp);
571 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
572 	    SCARG(uap, arg), p));
573 }
574 
575 int
576 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
577 {
578 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
579 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
580 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
581 	char *s, *d;
582 	int error;
583 
584 	/* Don't let non-root see filesystem id (for NFS security) */
585 	if (suser(p)) {
586 		fsid_t fsid;
587 
588 		s = (char *)sp;
589 		d = (char *)uaddr;
590 
591 		memset(&fsid, 0, sizeof(fsid));
592 
593 		if ((error = copyout(s, d, co_sz1)) != 0)
594 			return (error);
595 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
596 			return (error);
597 		return (copyout(s + co_off2, d + co_off2, co_sz2));
598 	}
599 
600 	return (copyout(sp, uaddr, sizeof(*sp)));
601 }
602 
603 /*
604  * Get filesystem statistics.
605  */
606 int
607 sys_statfs(struct proc *p, void *v, register_t *retval)
608 {
609 	struct sys_statfs_args /* {
610 		syscallarg(const char *) path;
611 		syscallarg(struct statfs *) buf;
612 	} */ *uap = v;
613 	struct mount *mp;
614 	struct statfs *sp;
615 	int error;
616 	struct nameidata nd;
617 
618 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
619 	    SCARG(uap, path), p);
620 	nd.ni_pledge = PLEDGE_RPATH;
621 	nd.ni_unveil = UNVEIL_READ;
622 	if ((error = namei(&nd)) != 0)
623 		return (error);
624 	mp = nd.ni_vp->v_mount;
625 	sp = &mp->mnt_stat;
626 	vrele(nd.ni_vp);
627 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
628 		return (error);
629 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
630 
631 	return (copyout_statfs(sp, SCARG(uap, buf), p));
632 }
633 
634 /*
635  * Get filesystem statistics.
636  */
637 int
638 sys_fstatfs(struct proc *p, void *v, register_t *retval)
639 {
640 	struct sys_fstatfs_args /* {
641 		syscallarg(int) fd;
642 		syscallarg(struct statfs *) buf;
643 	} */ *uap = v;
644 	struct file *fp;
645 	struct mount *mp;
646 	struct statfs *sp;
647 	int error;
648 
649 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
650 		return (error);
651 	mp = ((struct vnode *)fp->f_data)->v_mount;
652 	if (!mp) {
653 		FRELE(fp, p);
654 		return (ENOENT);
655 	}
656 	sp = &mp->mnt_stat;
657 	error = VFS_STATFS(mp, sp, p);
658 	FRELE(fp, p);
659 	if (error)
660 		return (error);
661 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
662 
663 	return (copyout_statfs(sp, SCARG(uap, buf), p));
664 }
665 
666 /*
667  * Get statistics on all filesystems.
668  */
669 int
670 sys_getfsstat(struct proc *p, void *v, register_t *retval)
671 {
672 	struct sys_getfsstat_args /* {
673 		syscallarg(struct statfs *) buf;
674 		syscallarg(size_t) bufsize;
675 		syscallarg(int) flags;
676 	} */ *uap = v;
677 	struct mount *mp;
678 	struct statfs *sp;
679 	struct statfs *sfsp;
680 	size_t count, maxcount;
681 	int error, flags = SCARG(uap, flags);
682 
683 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
684 	sfsp = SCARG(uap, buf);
685 	count = 0;
686 
687 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
688 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
689 			continue;
690 		if (sfsp && count < maxcount) {
691 			sp = &mp->mnt_stat;
692 
693 			/* Refresh stats unless MNT_NOWAIT is specified */
694 			if (flags != MNT_NOWAIT &&
695 			    flags != MNT_LAZY &&
696 			    (flags == MNT_WAIT ||
697 			    flags == 0) &&
698 			    (error = VFS_STATFS(mp, sp, p))) {
699 				vfs_unbusy(mp);
700 				continue;
701 			}
702 
703 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
704 #if notyet
705 			if (mp->mnt_flag & MNT_SOFTDEP)
706 				sp->f_eflags = STATFS_SOFTUPD;
707 #endif
708 			error = (copyout_statfs(sp, sfsp, p));
709 			if (error) {
710 				vfs_unbusy(mp);
711 				return (error);
712 			}
713 			sfsp++;
714 		}
715 		count++;
716 		vfs_unbusy(mp);
717 	}
718 
719 	if (sfsp && count > maxcount)
720 		*retval = maxcount;
721 	else
722 		*retval = count;
723 
724 	return (0);
725 }
726 
727 /*
728  * Change current working directory to a given file descriptor.
729  */
730 int
731 sys_fchdir(struct proc *p, void *v, register_t *retval)
732 {
733 	struct sys_fchdir_args /* {
734 		syscallarg(int) fd;
735 	} */ *uap = v;
736 	struct filedesc *fdp = p->p_fd;
737 	struct vnode *vp, *tdp, *old_cdir;
738 	struct mount *mp;
739 	struct file *fp;
740 	int error;
741 
742 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
743 		return (EBADF);
744 	vp = fp->f_data;
745 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
746 		FRELE(fp, p);
747 		return (ENOTDIR);
748 	}
749 	vref(vp);
750 	FRELE(fp, p);
751 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
752 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
753 
754 	while (!error && (mp = vp->v_mountedhere) != NULL) {
755 		if (vfs_busy(mp, VB_READ|VB_WAIT))
756 			continue;
757 		error = VFS_ROOT(mp, &tdp);
758 		vfs_unbusy(mp);
759 		if (error)
760 			break;
761 		vput(vp);
762 		vp = tdp;
763 	}
764 	if (error) {
765 		vput(vp);
766 		return (error);
767 	}
768 	VOP_UNLOCK(vp);
769 	old_cdir = fdp->fd_cdir;
770 	fdp->fd_cdir = vp;
771 	vrele(old_cdir);
772 	return (0);
773 }
774 
775 /*
776  * Change current working directory (``.'').
777  */
778 int
779 sys_chdir(struct proc *p, void *v, register_t *retval)
780 {
781 	struct sys_chdir_args /* {
782 		syscallarg(const char *) path;
783 	} */ *uap = v;
784 	struct filedesc *fdp = p->p_fd;
785 	struct vnode *old_cdir;
786 	int error;
787 	struct nameidata nd;
788 
789 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
790 	    SCARG(uap, path), p);
791 	nd.ni_pledge = PLEDGE_RPATH;
792 	nd.ni_unveil = UNVEIL_READ;
793 	if ((error = change_dir(&nd, p)) != 0)
794 		return (error);
795 	old_cdir = fdp->fd_cdir;
796 	fdp->fd_cdir = nd.ni_vp;
797 	vrele(old_cdir);
798 	return (0);
799 }
800 
801 /*
802  * Change notion of root (``/'') directory.
803  */
804 int
805 sys_chroot(struct proc *p, void *v, register_t *retval)
806 {
807 	struct sys_chroot_args /* {
808 		syscallarg(const char *) path;
809 	} */ *uap = v;
810 	struct filedesc *fdp = p->p_fd;
811 	struct vnode *old_cdir, *old_rdir;
812 	int error;
813 	struct nameidata nd;
814 
815 	if ((error = suser(p)) != 0)
816 		return (error);
817 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
818 	    SCARG(uap, path), p);
819 	if ((error = change_dir(&nd, p)) != 0)
820 		return (error);
821 	if (fdp->fd_rdir != NULL) {
822 		/*
823 		 * A chroot() done inside a changed root environment does
824 		 * an automatic chdir to avoid the out-of-tree experience.
825 		 */
826 		vref(nd.ni_vp);
827 		old_rdir = fdp->fd_rdir;
828 		old_cdir = fdp->fd_cdir;
829 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
830 		vrele(old_rdir);
831 		vrele(old_cdir);
832 	} else
833 		fdp->fd_rdir = nd.ni_vp;
834 	atomic_setbits_int(&p->p_p->ps_flags, PS_CHROOT);
835 	return (0);
836 }
837 
838 /*
839  * Common routine for chroot and chdir.
840  */
841 static int
842 change_dir(struct nameidata *ndp, struct proc *p)
843 {
844 	struct vnode *vp;
845 	int error;
846 
847 	if ((error = namei(ndp)) != 0)
848 		return (error);
849 	vp = ndp->ni_vp;
850 	if (vp->v_type != VDIR)
851 		error = ENOTDIR;
852 	else
853 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
854 	if (error)
855 		vput(vp);
856 	else
857 		VOP_UNLOCK(vp);
858 	return (error);
859 }
860 
861 int
862 sys___realpath(struct proc *p, void *v, register_t *retval)
863 {
864 	struct sys___realpath_args /* {
865 		syscallarg(const char *) pathname;
866 		syscallarg(char *) resolved;
867 	} */ *uap = v;
868 	char *pathname;
869 	char *rpbuf;
870 	struct nameidata nd;
871 	size_t pathlen;
872 	int error = 0;
873 
874 	if (SCARG(uap, pathname) == NULL)
875 		return (EINVAL);
876 
877 	pathname = pool_get(&namei_pool, PR_WAITOK);
878 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
879 
880 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
881 	    &pathlen)))
882 		goto end;
883 
884 	if (pathlen == 1) { /* empty string "" */
885 		error = ENOENT;
886 		goto end;
887 	}
888 	if (pathlen < 2) {
889 		error = EINVAL;
890 		goto end;
891 	}
892 
893 	/* Get cwd for relative path if needed, prepend to rpbuf */
894 	rpbuf[0] = '\0';
895 	if (pathname[0] != '/') {
896 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
897 		char *cwdbuf, *bp;
898 
899 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
900 
901 		/* vfs_getcwd_common fills this in backwards */
902 		bp = &cwdbuf[cwdlen - 1];
903 		*bp = '\0';
904 
905 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
906 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
907 
908 		if (error) {
909 			free(cwdbuf, M_TEMP, cwdlen);
910 			goto end;
911 		}
912 
913 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
914 			free(cwdbuf, M_TEMP, cwdlen);
915 			error = ENAMETOOLONG;
916 			goto end;
917 		}
918 
919 		free(cwdbuf, M_TEMP, cwdlen);
920 	}
921 
922 	NDINIT(&nd, LOOKUP, FOLLOW | SAVENAME | REALPATH, UIO_SYSSPACE,
923 	    pathname, p);
924 
925 	nd.ni_cnd.cn_rpbuf = rpbuf;
926 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
927 
928 	nd.ni_pledge = PLEDGE_RPATH;
929 	nd.ni_unveil = UNVEIL_READ;
930 	if ((error = namei(&nd)) != 0)
931 		goto end;
932 
933 	/* release reference from namei */
934 	if (nd.ni_vp)
935 		vrele(nd.ni_vp);
936 
937 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
938 	    MAXPATHLEN, NULL);
939 
940 #ifdef KTRACE
941 	if (KTRPOINT(p, KTR_NAMEI))
942 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
943 #endif
944 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
945 end:
946 	pool_put(&namei_pool, rpbuf);
947 	pool_put(&namei_pool, pathname);
948 	return (error);
949 }
950 
951 int
952 sys_unveil(struct proc *p, void *v, register_t *retval)
953 {
954 	struct sys_unveil_args /* {
955 		syscallarg(const char *) path;
956 		syscallarg(const char *) permissions;
957 	} */ *uap = v;
958 	struct process *pr = p->p_p;
959 	char *pathname, *c;
960 	struct nameidata nd;
961 	size_t pathlen;
962 	char permissions[5];
963 	int error, allow;
964 
965 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
966 		pr->ps_uvdone = 1;
967 		return (0);
968 	}
969 
970 	if (pr->ps_uvdone != 0)
971 		return EPERM;
972 
973 	error = copyinstr(SCARG(uap, permissions), permissions,
974 	    sizeof(permissions), NULL);
975 	if (error)
976 		return (error);
977 	pathname = pool_get(&namei_pool, PR_WAITOK);
978 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
979 	if (error)
980 		goto end;
981 
982 #ifdef KTRACE
983 	if (KTRPOINT(p, KTR_STRUCT))
984 		ktrstruct(p, "unveil", permissions, strlen(permissions));
985 #endif
986 	if (pathlen < 2) {
987 		error = EINVAL;
988 		goto end;
989 	}
990 
991 	/* find root "/" or "//" */
992 	for (c = pathname; *c != '\0'; c++) {
993 		if (*c != '/')
994 			break;
995 	}
996 	if (*c == '\0')
997 		/* root directory */
998 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
999 		    UIO_SYSSPACE, pathname, p);
1000 	else
1001 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
1002 		    UIO_SYSSPACE, pathname, p);
1003 
1004 	nd.ni_pledge = PLEDGE_UNVEIL;
1005 	if ((error = namei(&nd)) != 0)
1006 		goto end;
1007 
1008 	/*
1009 	 * XXX Any access to the file or directory will allow us to
1010 	 * pledge path it
1011 	 */
1012 	allow = ((nd.ni_vp &&
1013 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1014 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1015 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1016 	    (nd.ni_dvp &&
1017 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1018 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1019 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1020 
1021 	/* release lock from namei, but keep ref */
1022 	if (nd.ni_vp)
1023 		VOP_UNLOCK(nd.ni_vp);
1024 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1025 		VOP_UNLOCK(nd.ni_dvp);
1026 
1027 	if (allow)
1028 		error = unveil_add(p, &nd, permissions);
1029 	else
1030 		error = EPERM;
1031 
1032 	/* release vref from namei, but not vref from unveil_add */
1033 	if (nd.ni_vp)
1034 		vrele(nd.ni_vp);
1035 	if (nd.ni_dvp)
1036 		vrele(nd.ni_dvp);
1037 
1038 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1039 end:
1040 	pool_put(&namei_pool, pathname);
1041 
1042 	return (error);
1043 }
1044 
1045 /*
1046  * Check permissions, allocate an open file structure,
1047  * and call the device open routine if any.
1048  */
1049 int
1050 sys_open(struct proc *p, void *v, register_t *retval)
1051 {
1052 	struct sys_open_args /* {
1053 		syscallarg(const char *) path;
1054 		syscallarg(int) flags;
1055 		syscallarg(mode_t) mode;
1056 	} */ *uap = v;
1057 
1058 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1059 	    SCARG(uap, mode), retval));
1060 }
1061 
1062 int
1063 sys_openat(struct proc *p, void *v, register_t *retval)
1064 {
1065 	struct sys_openat_args /* {
1066 		syscallarg(int) fd;
1067 		syscallarg(const char *) path;
1068 		syscallarg(int) flags;
1069 		syscallarg(mode_t) mode;
1070 	} */ *uap = v;
1071 
1072 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1073 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1074 }
1075 
1076 int
1077 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1078     register_t *retval)
1079 {
1080 	struct filedesc *fdp = p->p_fd;
1081 	struct file *fp;
1082 	struct vnode *vp;
1083 	struct vattr vattr;
1084 	int flags, cloexec, cmode;
1085 	int type, indx, error, localtrunc = 0;
1086 	struct flock lf;
1087 	struct nameidata nd;
1088 	uint64_t ni_pledge = 0;
1089 	u_char ni_unveil = 0;
1090 
1091 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1092 		error = pledge_flock(p);
1093 		if (error != 0)
1094 			return (error);
1095 	}
1096 
1097 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1098 
1099 	fdplock(fdp);
1100 	if ((error = falloc(p, &fp, &indx)) != 0) {
1101 		fdpunlock(fdp);
1102 		return (error);
1103 	}
1104 	fdpunlock(fdp);
1105 
1106 	flags = FFLAGS(oflags);
1107 	if (flags & FREAD) {
1108 		ni_pledge |= PLEDGE_RPATH;
1109 		ni_unveil |= UNVEIL_READ;
1110 	}
1111 	if (flags & FWRITE) {
1112 		ni_pledge |= PLEDGE_WPATH;
1113 		ni_unveil |= UNVEIL_WRITE;
1114 	}
1115 	if (oflags & O_CREAT) {
1116 		ni_pledge |= PLEDGE_CPATH;
1117 		ni_unveil |= UNVEIL_CREATE;
1118 	}
1119 
1120 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1121 	if ((p->p_p->ps_flags & PS_PLEDGE))
1122 		cmode &= ACCESSPERMS;
1123 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1124 	nd.ni_pledge = ni_pledge;
1125 	nd.ni_unveil = ni_unveil;
1126 	p->p_dupfd = -1;			/* XXX check for fdopen */
1127 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1128 		localtrunc = 1;
1129 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1130 	}
1131 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1132 		fdplock(fdp);
1133 		if (error == ENODEV &&
1134 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1135 		    (error =
1136 			dupfdopen(p, indx, flags)) == 0) {
1137 			fdpunlock(fdp);
1138 			closef(fp, p);
1139 			*retval = indx;
1140 			return (error);
1141 		}
1142 		if (error == ERESTART)
1143 			error = EINTR;
1144 		fdremove(fdp, indx);
1145 		fdpunlock(fdp);
1146 		closef(fp, p);
1147 		return (error);
1148 	}
1149 	p->p_dupfd = 0;
1150 	vp = nd.ni_vp;
1151 	fp->f_flag = flags & FMASK;
1152 	fp->f_type = DTYPE_VNODE;
1153 	fp->f_ops = &vnops;
1154 	fp->f_data = vp;
1155 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1156 		lf.l_whence = SEEK_SET;
1157 		lf.l_start = 0;
1158 		lf.l_len = 0;
1159 		if (flags & O_EXLOCK)
1160 			lf.l_type = F_WRLCK;
1161 		else
1162 			lf.l_type = F_RDLCK;
1163 		type = F_FLOCK;
1164 		if ((flags & FNONBLOCK) == 0)
1165 			type |= F_WAIT;
1166 		VOP_UNLOCK(vp);
1167 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1168 		if (error) {
1169 			fdplock(fdp);
1170 			/* closef will vn_close the file for us. */
1171 			fdremove(fdp, indx);
1172 			fdpunlock(fdp);
1173 			closef(fp, p);
1174 			return (error);
1175 		}
1176 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1177 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1178 	}
1179 	if (localtrunc) {
1180 		if ((fp->f_flag & FWRITE) == 0)
1181 			error = EACCES;
1182 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1183 			error = EROFS;
1184 		else if (vp->v_type == VDIR)
1185 			error = EISDIR;
1186 		else if ((error = vn_writechk(vp)) == 0) {
1187 			VATTR_NULL(&vattr);
1188 			vattr.va_size = 0;
1189 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1190 		}
1191 		if (error) {
1192 			VOP_UNLOCK(vp);
1193 			fdplock(fdp);
1194 			/* closef will close the file for us. */
1195 			fdremove(fdp, indx);
1196 			fdpunlock(fdp);
1197 			closef(fp, p);
1198 			return (error);
1199 		}
1200 	}
1201 	VOP_UNLOCK(vp);
1202 	*retval = indx;
1203 	fdplock(fdp);
1204 	fdinsert(fdp, indx, cloexec, fp);
1205 	fdpunlock(fdp);
1206 	FRELE(fp, p);
1207 	return (error);
1208 }
1209 
1210 /*
1211  * Open a new created file (in /tmp) suitable for mmaping.
1212  */
1213 int
1214 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1215 {
1216 	struct sys___tmpfd_args /* {
1217 		syscallarg(int) flags;
1218 	} */ *uap = v;
1219 	struct filedesc *fdp = p->p_fd;
1220 	struct file *fp;
1221 	struct vnode *vp;
1222 	int oflags = SCARG(uap, flags);
1223 	int flags, cloexec, cmode;
1224 	int indx, error;
1225 	unsigned int i;
1226 	struct nameidata nd;
1227 	char path[64];
1228 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1229 
1230 	/* most flags are hardwired */
1231 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1232 
1233 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1234 
1235 	fdplock(fdp);
1236 	if ((error = falloc(p, &fp, &indx)) != 0) {
1237 		fdpunlock(fdp);
1238 		return (error);
1239 	}
1240 	fdpunlock(fdp);
1241 
1242 	flags = FFLAGS(oflags);
1243 
1244 	arc4random_buf(path, sizeof(path));
1245 	memcpy(path, "/tmp/", 5);
1246 	for (i = 5; i < sizeof(path) - 1; i++)
1247 		path[i] = letters[(unsigned char)path[i] & 63];
1248 	path[sizeof(path)-1] = 0;
1249 
1250 	cmode = 0600;
1251 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1252 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1253 		if (error == ERESTART)
1254 			error = EINTR;
1255 		fdplock(fdp);
1256 		fdremove(fdp, indx);
1257 		fdpunlock(fdp);
1258 		closef(fp, p);
1259 		return (error);
1260 	}
1261 	vp = nd.ni_vp;
1262 	fp->f_flag = flags & FMASK;
1263 	fp->f_type = DTYPE_VNODE;
1264 	fp->f_ops = &vnops;
1265 	fp->f_data = vp;
1266 	VOP_UNLOCK(vp);
1267 	*retval = indx;
1268 	fdplock(fdp);
1269 	fdinsert(fdp, indx, cloexec, fp);
1270 	fdpunlock(fdp);
1271 	FRELE(fp, p);
1272 
1273 	/* unlink it */
1274 	/* XXX
1275 	 * there is a wee race here, although it is mostly inconsequential.
1276 	 * perhaps someday we can create a file like object without a name...
1277 	 */
1278 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1279 	    AT_FDCWD, path, p);
1280 	if ((error = namei(&nd)) != 0) {
1281 		printf("can't unlink temp file! %d\n", error);
1282 		error = 0;
1283 	} else {
1284 		vp = nd.ni_vp;
1285 		uvm_vnp_uncache(vp);
1286 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1287 		if (error) {
1288 			printf("error removing vop: %d\n", error);
1289 			error = 0;
1290 		}
1291 	}
1292 
1293 	return (error);
1294 }
1295 
1296 /*
1297  * Get file handle system call
1298  */
1299 int
1300 sys_getfh(struct proc *p, void *v, register_t *retval)
1301 {
1302 	struct sys_getfh_args /* {
1303 		syscallarg(const char *) fname;
1304 		syscallarg(fhandle_t *) fhp;
1305 	} */ *uap = v;
1306 	struct vnode *vp;
1307 	fhandle_t fh;
1308 	int error;
1309 	struct nameidata nd;
1310 
1311 	/*
1312 	 * Must be super user
1313 	 */
1314 	error = suser(p);
1315 	if (error)
1316 		return (error);
1317 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1318 	    SCARG(uap, fname), p);
1319 	error = namei(&nd);
1320 	if (error)
1321 		return (error);
1322 	vp = nd.ni_vp;
1323 	memset(&fh, 0, sizeof(fh));
1324 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1325 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1326 	vput(vp);
1327 	if (error)
1328 		return (error);
1329 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1330 	return (error);
1331 }
1332 
1333 /*
1334  * Open a file given a file handle.
1335  *
1336  * Check permissions, allocate an open file structure,
1337  * and call the device open routine if any.
1338  */
1339 int
1340 sys_fhopen(struct proc *p, void *v, register_t *retval)
1341 {
1342 	struct sys_fhopen_args /* {
1343 		syscallarg(const fhandle_t *) fhp;
1344 		syscallarg(int) flags;
1345 	} */ *uap = v;
1346 	struct filedesc *fdp = p->p_fd;
1347 	struct file *fp;
1348 	struct vnode *vp = NULL;
1349 	struct mount *mp;
1350 	struct ucred *cred = p->p_ucred;
1351 	int flags, cloexec;
1352 	int type, indx, error=0;
1353 	struct flock lf;
1354 	struct vattr va;
1355 	fhandle_t fh;
1356 
1357 	/*
1358 	 * Must be super user
1359 	 */
1360 	if ((error = suser(p)))
1361 		return (error);
1362 
1363 	flags = FFLAGS(SCARG(uap, flags));
1364 	if ((flags & (FREAD | FWRITE)) == 0)
1365 		return (EINVAL);
1366 	if ((flags & O_CREAT))
1367 		return (EINVAL);
1368 
1369 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1370 
1371 	fdplock(fdp);
1372 	if ((error = falloc(p, &fp, &indx)) != 0) {
1373 		fdpunlock(fdp);
1374 		fp = NULL;
1375 		goto bad;
1376 	}
1377 	fdpunlock(fdp);
1378 
1379 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1380 		goto bad;
1381 
1382 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1383 		error = ESTALE;
1384 		goto bad;
1385 	}
1386 
1387 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1388 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1389 		goto bad;
1390 	}
1391 
1392 	/* Now do an effective vn_open */
1393 
1394 	if (vp->v_type == VSOCK) {
1395 		error = EOPNOTSUPP;
1396 		goto bad;
1397 	}
1398 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1399 		error = ENOTDIR;
1400 		goto bad;
1401 	}
1402 	if (flags & FREAD) {
1403 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1404 			goto bad;
1405 	}
1406 	if (flags & (FWRITE | O_TRUNC)) {
1407 		if (vp->v_type == VDIR) {
1408 			error = EISDIR;
1409 			goto bad;
1410 		}
1411 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1412 		    (error = vn_writechk(vp)) != 0)
1413 			goto bad;
1414 	}
1415 	if (flags & O_TRUNC) {
1416 		VATTR_NULL(&va);
1417 		va.va_size = 0;
1418 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1419 			goto bad;
1420 	}
1421 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1422 		goto bad;
1423 	if (flags & FWRITE)
1424 		vp->v_writecount++;
1425 
1426 	/* done with modified vn_open, now finish what sys_open does. */
1427 
1428 	fp->f_flag = flags & FMASK;
1429 	fp->f_type = DTYPE_VNODE;
1430 	fp->f_ops = &vnops;
1431 	fp->f_data = vp;
1432 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1433 		lf.l_whence = SEEK_SET;
1434 		lf.l_start = 0;
1435 		lf.l_len = 0;
1436 		if (flags & O_EXLOCK)
1437 			lf.l_type = F_WRLCK;
1438 		else
1439 			lf.l_type = F_RDLCK;
1440 		type = F_FLOCK;
1441 		if ((flags & FNONBLOCK) == 0)
1442 			type |= F_WAIT;
1443 		VOP_UNLOCK(vp);
1444 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1445 		if (error) {
1446 			vp = NULL;	/* closef will vn_close the file */
1447 			goto bad;
1448 		}
1449 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1450 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1451 	}
1452 	VOP_UNLOCK(vp);
1453 	*retval = indx;
1454 	fdplock(fdp);
1455 	fdinsert(fdp, indx, cloexec, fp);
1456 	fdpunlock(fdp);
1457 	FRELE(fp, p);
1458 	return (0);
1459 
1460 bad:
1461 	if (fp) {
1462 		fdplock(fdp);
1463 		fdremove(fdp, indx);
1464 		fdpunlock(fdp);
1465 		closef(fp, p);
1466 		if (vp != NULL)
1467 			vput(vp);
1468 	}
1469 	return (error);
1470 }
1471 
1472 int
1473 sys_fhstat(struct proc *p, void *v, register_t *retval)
1474 {
1475 	struct sys_fhstat_args /* {
1476 		syscallarg(const fhandle_t *) fhp;
1477 		syscallarg(struct stat *) sb;
1478 	} */ *uap = v;
1479 	struct stat sb;
1480 	int error;
1481 	fhandle_t fh;
1482 	struct mount *mp;
1483 	struct vnode *vp;
1484 
1485 	/*
1486 	 * Must be super user
1487 	 */
1488 	if ((error = suser(p)))
1489 		return (error);
1490 
1491 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1492 		return (error);
1493 
1494 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1495 		return (ESTALE);
1496 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1497 		return (error);
1498 	error = vn_stat(vp, &sb, p);
1499 	vput(vp);
1500 	if (error)
1501 		return (error);
1502 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1503 	return (error);
1504 }
1505 
1506 int
1507 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1508 {
1509 	struct sys_fhstatfs_args /* {
1510 		syscallarg(const fhandle_t *) fhp;
1511 		syscallarg(struct statfs *) buf;
1512 	} */ *uap = v;
1513 	struct statfs *sp;
1514 	fhandle_t fh;
1515 	struct mount *mp;
1516 	struct vnode *vp;
1517 	int error;
1518 
1519 	/*
1520 	 * Must be super user
1521 	 */
1522 	if ((error = suser(p)))
1523 		return (error);
1524 
1525 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1526 		return (error);
1527 
1528 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1529 		return (ESTALE);
1530 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1531 		return (error);
1532 	mp = vp->v_mount;
1533 	sp = &mp->mnt_stat;
1534 	vput(vp);
1535 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1536 		return (error);
1537 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1538 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1539 }
1540 
1541 /*
1542  * Create a special file or named pipe.
1543  */
1544 int
1545 sys_mknod(struct proc *p, void *v, register_t *retval)
1546 {
1547 	struct sys_mknod_args /* {
1548 		syscallarg(const char *) path;
1549 		syscallarg(mode_t) mode;
1550 		syscallarg(int) dev;
1551 	} */ *uap = v;
1552 
1553 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1554 	    SCARG(uap, dev)));
1555 }
1556 
1557 int
1558 sys_mknodat(struct proc *p, void *v, register_t *retval)
1559 {
1560 	struct sys_mknodat_args /* {
1561 		syscallarg(int) fd;
1562 		syscallarg(const char *) path;
1563 		syscallarg(mode_t) mode;
1564 		syscallarg(dev_t) dev;
1565 	} */ *uap = v;
1566 
1567 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1568 	    SCARG(uap, mode), SCARG(uap, dev)));
1569 }
1570 
1571 int
1572 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1573 {
1574 	struct vnode *vp;
1575 	struct vattr vattr;
1576 	int error;
1577 	struct nameidata nd;
1578 
1579 	if (dev == VNOVAL)
1580 		return (EINVAL);
1581 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1582 	nd.ni_pledge = PLEDGE_DPATH;
1583 	nd.ni_unveil = UNVEIL_CREATE;
1584 	if ((error = namei(&nd)) != 0)
1585 		return (error);
1586 	vp = nd.ni_vp;
1587 	if (!S_ISFIFO(mode) || dev != 0) {
1588 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1589 			goto out;
1590 		if (p->p_fd->fd_rdir) {
1591 			error = EINVAL;
1592 			goto out;
1593 		}
1594 	}
1595 	if (vp != NULL)
1596 		error = EEXIST;
1597 	else {
1598 		VATTR_NULL(&vattr);
1599 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1600 		if ((p->p_p->ps_flags & PS_PLEDGE))
1601 			vattr.va_mode &= ACCESSPERMS;
1602 		vattr.va_rdev = dev;
1603 
1604 		switch (mode & S_IFMT) {
1605 		case S_IFMT:	/* used by badsect to flag bad sectors */
1606 			vattr.va_type = VBAD;
1607 			break;
1608 		case S_IFCHR:
1609 			vattr.va_type = VCHR;
1610 			break;
1611 		case S_IFBLK:
1612 			vattr.va_type = VBLK;
1613 			break;
1614 		case S_IFIFO:
1615 #ifndef FIFO
1616 			error = EOPNOTSUPP;
1617 			break;
1618 #else
1619 			if (dev == 0) {
1620 				vattr.va_type = VFIFO;
1621 				break;
1622 			}
1623 			/* FALLTHROUGH */
1624 #endif /* FIFO */
1625 		default:
1626 			error = EINVAL;
1627 			break;
1628 		}
1629 	}
1630 out:
1631 	if (!error) {
1632 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1633 		vput(nd.ni_dvp);
1634 	} else {
1635 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1636 		if (nd.ni_dvp == vp)
1637 			vrele(nd.ni_dvp);
1638 		else
1639 			vput(nd.ni_dvp);
1640 		if (vp)
1641 			vrele(vp);
1642 	}
1643 	return (error);
1644 }
1645 
1646 /*
1647  * Create a named pipe.
1648  */
1649 int
1650 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1651 {
1652 	struct sys_mkfifo_args /* {
1653 		syscallarg(const char *) path;
1654 		syscallarg(mode_t) mode;
1655 	} */ *uap = v;
1656 
1657 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1658 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1659 }
1660 
1661 int
1662 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1663 {
1664 	struct sys_mkfifoat_args /* {
1665 		syscallarg(int) fd;
1666 		syscallarg(const char *) path;
1667 		syscallarg(mode_t) mode;
1668 	} */ *uap = v;
1669 
1670 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1671 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1672 }
1673 
1674 /*
1675  * Make a hard file link.
1676  */
1677 int
1678 sys_link(struct proc *p, void *v, register_t *retval)
1679 {
1680 	struct sys_link_args /* {
1681 		syscallarg(const char *) path;
1682 		syscallarg(const char *) link;
1683 	} */ *uap = v;
1684 
1685 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1686 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1687 }
1688 
1689 int
1690 sys_linkat(struct proc *p, void *v, register_t *retval)
1691 {
1692 	struct sys_linkat_args /* {
1693 		syscallarg(int) fd1;
1694 		syscallarg(const char *) path1;
1695 		syscallarg(int) fd2;
1696 		syscallarg(const char *) path2;
1697 		syscallarg(int) flag;
1698 	} */ *uap = v;
1699 
1700 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1701 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1702 }
1703 
1704 int
1705 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1706     const char *path2, int flag)
1707 {
1708 	struct vnode *vp;
1709 	struct nameidata nd;
1710 	int error, follow;
1711 	int flags;
1712 
1713 	if (flag & ~AT_SYMLINK_FOLLOW)
1714 		return (EINVAL);
1715 
1716 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1717 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1718 	nd.ni_pledge = PLEDGE_RPATH;
1719 	nd.ni_unveil = UNVEIL_READ;
1720 	if ((error = namei(&nd)) != 0)
1721 		return (error);
1722 	vp = nd.ni_vp;
1723 
1724 	flags = LOCKPARENT;
1725 	if (vp->v_type == VDIR) {
1726 		flags |= STRIPSLASHES;
1727 	}
1728 
1729 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1730 	nd.ni_pledge = PLEDGE_CPATH;
1731 	nd.ni_unveil = UNVEIL_CREATE;
1732 	if ((error = namei(&nd)) != 0)
1733 		goto out;
1734 	if (nd.ni_vp) {
1735 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1736 		if (nd.ni_dvp == nd.ni_vp)
1737 			vrele(nd.ni_dvp);
1738 		else
1739 			vput(nd.ni_dvp);
1740 		vrele(nd.ni_vp);
1741 		error = EEXIST;
1742 		goto out;
1743 	}
1744 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1745 out:
1746 	vrele(vp);
1747 	return (error);
1748 }
1749 
1750 /*
1751  * Make a symbolic link.
1752  */
1753 int
1754 sys_symlink(struct proc *p, void *v, register_t *retval)
1755 {
1756 	struct sys_symlink_args /* {
1757 		syscallarg(const char *) path;
1758 		syscallarg(const char *) link;
1759 	} */ *uap = v;
1760 
1761 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1762 }
1763 
1764 int
1765 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1766 {
1767 	struct sys_symlinkat_args /* {
1768 		syscallarg(const char *) path;
1769 		syscallarg(int) fd;
1770 		syscallarg(const char *) link;
1771 	} */ *uap = v;
1772 
1773 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1774 	    SCARG(uap, link)));
1775 }
1776 
1777 int
1778 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1779 {
1780 	struct vattr vattr;
1781 	char *path;
1782 	int error;
1783 	struct nameidata nd;
1784 
1785 	path = pool_get(&namei_pool, PR_WAITOK);
1786 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1787 	if (error)
1788 		goto out;
1789 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1790 	nd.ni_pledge = PLEDGE_CPATH;
1791 	nd.ni_unveil = UNVEIL_CREATE;
1792 	if ((error = namei(&nd)) != 0)
1793 		goto out;
1794 	if (nd.ni_vp) {
1795 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1796 		if (nd.ni_dvp == nd.ni_vp)
1797 			vrele(nd.ni_dvp);
1798 		else
1799 			vput(nd.ni_dvp);
1800 		vrele(nd.ni_vp);
1801 		error = EEXIST;
1802 		goto out;
1803 	}
1804 	VATTR_NULL(&vattr);
1805 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1806 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1807 out:
1808 	pool_put(&namei_pool, path);
1809 	return (error);
1810 }
1811 
1812 /*
1813  * Delete a name from the filesystem.
1814  */
1815 int
1816 sys_unlink(struct proc *p, void *v, register_t *retval)
1817 {
1818 	struct sys_unlink_args /* {
1819 		syscallarg(const char *) path;
1820 	} */ *uap = v;
1821 
1822 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1823 }
1824 
1825 int
1826 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1827 {
1828 	struct sys_unlinkat_args /* {
1829 		syscallarg(int) fd;
1830 		syscallarg(const char *) path;
1831 		syscallarg(int) flag;
1832 	} */ *uap = v;
1833 
1834 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1835 	    SCARG(uap, flag)));
1836 }
1837 
1838 int
1839 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1840 {
1841 	struct vnode *vp;
1842 	int error;
1843 	struct nameidata nd;
1844 
1845 	if (flag & ~AT_REMOVEDIR)
1846 		return (EINVAL);
1847 
1848 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1849 	    fd, path, p);
1850 	nd.ni_pledge = PLEDGE_CPATH;
1851 	nd.ni_unveil = UNVEIL_CREATE;
1852 	if ((error = namei(&nd)) != 0)
1853 		return (error);
1854 	vp = nd.ni_vp;
1855 
1856 	if (flag & AT_REMOVEDIR) {
1857 		if (vp->v_type != VDIR) {
1858 			error = ENOTDIR;
1859 			goto out;
1860 		}
1861 		/*
1862 		 * No rmdir "." please.
1863 		 */
1864 		if (nd.ni_dvp == vp) {
1865 			error = EINVAL;
1866 			goto out;
1867 		}
1868 		/*
1869 		 * A mounted on directory cannot be deleted.
1870 		 */
1871 		if (vp->v_mountedhere != NULL) {
1872 			error = EBUSY;
1873 			goto out;
1874 		}
1875 	}
1876 
1877 	/*
1878 	 * The root of a mounted filesystem cannot be deleted.
1879 	 */
1880 	if (vp->v_flag & VROOT)
1881 		error = EBUSY;
1882 out:
1883 	if (!error) {
1884 		if (flag & AT_REMOVEDIR) {
1885 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1886 		} else {
1887 			(void)uvm_vnp_uncache(vp);
1888 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1889 		}
1890 	} else {
1891 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1892 		if (nd.ni_dvp == vp)
1893 			vrele(nd.ni_dvp);
1894 		else
1895 			vput(nd.ni_dvp);
1896 		vput(vp);
1897 	}
1898 	return (error);
1899 }
1900 
1901 /*
1902  * Reposition read/write file offset.
1903  */
1904 int
1905 sys_lseek(struct proc *p, void *v, register_t *retval)
1906 {
1907 	struct sys_lseek_args /* {
1908 		syscallarg(int) fd;
1909 		syscallarg(off_t) offset;
1910 		syscallarg(int) whence;
1911 	} */ *uap = v;
1912 	struct filedesc *fdp = p->p_fd;
1913 	struct file *fp;
1914 	off_t offset;
1915 	int error;
1916 
1917 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1918 		return (EBADF);
1919 	if (fp->f_ops->fo_seek == NULL) {
1920 		error = ESPIPE;
1921 		goto bad;
1922 	}
1923 	offset = SCARG(uap, offset);
1924 
1925 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1926 	if (error)
1927 		goto bad;
1928 
1929 	*(off_t *)retval = offset;
1930 	mtx_enter(&fp->f_mtx);
1931 	fp->f_seek++;
1932 	mtx_leave(&fp->f_mtx);
1933 	error = 0;
1934  bad:
1935 	FRELE(fp, p);
1936 	return (error);
1937 }
1938 
1939 /*
1940  * Check access permissions.
1941  */
1942 int
1943 sys_access(struct proc *p, void *v, register_t *retval)
1944 {
1945 	struct sys_access_args /* {
1946 		syscallarg(const char *) path;
1947 		syscallarg(int) amode;
1948 	} */ *uap = v;
1949 
1950 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1951 	    SCARG(uap, amode), 0));
1952 }
1953 
1954 int
1955 sys_faccessat(struct proc *p, void *v, register_t *retval)
1956 {
1957 	struct sys_faccessat_args /* {
1958 		syscallarg(int) fd;
1959 		syscallarg(const char *) path;
1960 		syscallarg(int) amode;
1961 		syscallarg(int) flag;
1962 	} */ *uap = v;
1963 
1964 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1965 	    SCARG(uap, amode), SCARG(uap, flag)));
1966 }
1967 
1968 int
1969 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1970 {
1971 	struct vnode *vp;
1972 	struct ucred *newcred, *oldcred;
1973 	struct nameidata nd;
1974 	int error;
1975 
1976 	if (amode & ~(R_OK | W_OK | X_OK))
1977 		return (EINVAL);
1978 	if (flag & ~AT_EACCESS)
1979 		return (EINVAL);
1980 
1981 	newcred = NULL;
1982 	oldcred = p->p_ucred;
1983 
1984 	/*
1985 	 * If access as real ids was requested and they really differ,
1986 	 * give the thread new creds with them reset
1987 	 */
1988 	if ((flag & AT_EACCESS) == 0 &&
1989 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1990 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1991 		p->p_ucred = newcred = crdup(oldcred);
1992 		newcred->cr_uid = newcred->cr_ruid;
1993 		newcred->cr_gid = newcred->cr_rgid;
1994 	}
1995 
1996 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1997 	nd.ni_pledge = PLEDGE_RPATH;
1998 	nd.ni_unveil = UNVEIL_READ;
1999 	if ((error = namei(&nd)) != 0)
2000 		goto out;
2001 	vp = nd.ni_vp;
2002 
2003 	/* Flags == 0 means only check for existence. */
2004 	if (amode) {
2005 		int vflags = 0;
2006 
2007 		if (amode & R_OK)
2008 			vflags |= VREAD;
2009 		if (amode & W_OK)
2010 			vflags |= VWRITE;
2011 		if (amode & X_OK)
2012 			vflags |= VEXEC;
2013 
2014 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2015 		if (!error && (vflags & VWRITE))
2016 			error = vn_writechk(vp);
2017 	}
2018 	vput(vp);
2019 out:
2020 	if (newcred != NULL) {
2021 		p->p_ucred = oldcred;
2022 		crfree(newcred);
2023 	}
2024 	return (error);
2025 }
2026 
2027 /*
2028  * Get file status; this version follows links.
2029  */
2030 int
2031 sys_stat(struct proc *p, void *v, register_t *retval)
2032 {
2033 	struct sys_stat_args /* {
2034 		syscallarg(const char *) path;
2035 		syscallarg(struct stat *) ub;
2036 	} */ *uap = v;
2037 
2038 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2039 }
2040 
2041 int
2042 sys_fstatat(struct proc *p, void *v, register_t *retval)
2043 {
2044 	struct sys_fstatat_args /* {
2045 		syscallarg(int) fd;
2046 		syscallarg(const char *) path;
2047 		syscallarg(struct stat *) buf;
2048 		syscallarg(int) flag;
2049 	} */ *uap = v;
2050 
2051 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2052 	    SCARG(uap, buf), SCARG(uap, flag)));
2053 }
2054 
2055 int
2056 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2057 {
2058 	struct stat sb;
2059 	int error, follow;
2060 	struct nameidata nd;
2061 
2062 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2063 		return (EINVAL);
2064 
2065 
2066 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2067 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2068 	nd.ni_pledge = PLEDGE_RPATH;
2069 	nd.ni_unveil = UNVEIL_READ;
2070 	if ((error = namei(&nd)) != 0)
2071 		return (error);
2072 	error = vn_stat(nd.ni_vp, &sb, p);
2073 	vput(nd.ni_vp);
2074 	if (error)
2075 		return (error);
2076 	/* Don't let non-root see generation numbers (for NFS security) */
2077 	if (suser(p))
2078 		sb.st_gen = 0;
2079 	error = copyout(&sb, buf, sizeof(sb));
2080 #ifdef KTRACE
2081 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2082 		ktrstat(p, &sb);
2083 #endif
2084 	return (error);
2085 }
2086 
2087 /*
2088  * Get file status; this version does not follow links.
2089  */
2090 int
2091 sys_lstat(struct proc *p, void *v, register_t *retval)
2092 {
2093 	struct sys_lstat_args /* {
2094 		syscallarg(const char *) path;
2095 		syscallarg(struct stat *) ub;
2096 	} */ *uap = v;
2097 
2098 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2099 	    AT_SYMLINK_NOFOLLOW));
2100 }
2101 
2102 /*
2103  * Get configurable pathname variables.
2104  */
2105 int
2106 sys_pathconf(struct proc *p, void *v, register_t *retval)
2107 {
2108 	struct sys_pathconf_args /* {
2109 		syscallarg(const char *) path;
2110 		syscallarg(int) name;
2111 	} */ *uap = v;
2112 	int error;
2113 	struct nameidata nd;
2114 
2115 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2116 	    SCARG(uap, path), p);
2117 	nd.ni_pledge = PLEDGE_RPATH;
2118 	nd.ni_unveil = UNVEIL_READ;
2119 	if ((error = namei(&nd)) != 0)
2120 		return (error);
2121 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2122 	vput(nd.ni_vp);
2123 	return (error);
2124 }
2125 
2126 /*
2127  * Return target name of a symbolic link.
2128  */
2129 int
2130 sys_readlink(struct proc *p, void *v, register_t *retval)
2131 {
2132 	struct sys_readlink_args /* {
2133 		syscallarg(const char *) path;
2134 		syscallarg(char *) buf;
2135 		syscallarg(size_t) count;
2136 	} */ *uap = v;
2137 
2138 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2139 	    SCARG(uap, count), retval));
2140 }
2141 
2142 int
2143 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2144 {
2145 	struct sys_readlinkat_args /* {
2146 		syscallarg(int) fd;
2147 		syscallarg(const char *) path;
2148 		syscallarg(char *) buf;
2149 		syscallarg(size_t) count;
2150 	} */ *uap = v;
2151 
2152 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2153 	    SCARG(uap, buf), SCARG(uap, count), retval));
2154 }
2155 
2156 int
2157 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2158     size_t count, register_t *retval)
2159 {
2160 	struct vnode *vp;
2161 	struct iovec aiov;
2162 	struct uio auio;
2163 	int error;
2164 	struct nameidata nd;
2165 
2166 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2167 	nd.ni_pledge = PLEDGE_RPATH;
2168 	nd.ni_unveil = UNVEIL_READ;
2169 	if ((error = namei(&nd)) != 0)
2170 		return (error);
2171 	vp = nd.ni_vp;
2172 	if (vp->v_type != VLNK)
2173 		error = EINVAL;
2174 	else {
2175 		aiov.iov_base = buf;
2176 		aiov.iov_len = count;
2177 		auio.uio_iov = &aiov;
2178 		auio.uio_iovcnt = 1;
2179 		auio.uio_offset = 0;
2180 		auio.uio_rw = UIO_READ;
2181 		auio.uio_segflg = UIO_USERSPACE;
2182 		auio.uio_procp = p;
2183 		auio.uio_resid = count;
2184 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2185 		*retval = count - auio.uio_resid;
2186 	}
2187 	vput(vp);
2188 	return (error);
2189 }
2190 
2191 /*
2192  * Change flags of a file given a path name.
2193  */
2194 int
2195 sys_chflags(struct proc *p, void *v, register_t *retval)
2196 {
2197 	struct sys_chflags_args /* {
2198 		syscallarg(const char *) path;
2199 		syscallarg(u_int) flags;
2200 	} */ *uap = v;
2201 
2202 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2203 	    SCARG(uap, flags), 0));
2204 }
2205 
2206 int
2207 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2208 {
2209 	struct sys_chflagsat_args /* {
2210 		syscallarg(int) fd;
2211 		syscallarg(const char *) path;
2212 		syscallarg(u_int) flags;
2213 		syscallarg(int) atflags;
2214 	} */ *uap = v;
2215 
2216 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2217 	    SCARG(uap, flags), SCARG(uap, atflags)));
2218 }
2219 
2220 int
2221 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2222 {
2223 	struct nameidata nd;
2224 	int error, follow;
2225 
2226 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2227 		return (EINVAL);
2228 
2229 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2230 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2231 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2232 	nd.ni_unveil = UNVEIL_WRITE;
2233 	if ((error = namei(&nd)) != 0)
2234 		return (error);
2235 	return (dovchflags(p, nd.ni_vp, flags));
2236 }
2237 
2238 /*
2239  * Change flags of a file given a file descriptor.
2240  */
2241 int
2242 sys_fchflags(struct proc *p, void *v, register_t *retval)
2243 {
2244 	struct sys_fchflags_args /* {
2245 		syscallarg(int) fd;
2246 		syscallarg(u_int) flags;
2247 	} */ *uap = v;
2248 	struct file *fp;
2249 	struct vnode *vp;
2250 	int error;
2251 
2252 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2253 		return (error);
2254 	vp = fp->f_data;
2255 	vref(vp);
2256 	FRELE(fp, p);
2257 	return (dovchflags(p, vp, SCARG(uap, flags)));
2258 }
2259 
2260 int
2261 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2262 {
2263 	struct vattr vattr;
2264 	int error;
2265 
2266 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2267 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2268 		error = EROFS;
2269 	else if (flags == VNOVAL)
2270 		error = EINVAL;
2271 	else {
2272 		if (suser(p)) {
2273 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2274 			    != 0)
2275 				goto out;
2276 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2277 				error = EINVAL;
2278 				goto out;
2279 			}
2280 		}
2281 		VATTR_NULL(&vattr);
2282 		vattr.va_flags = flags;
2283 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2284 	}
2285 out:
2286 	vput(vp);
2287 	return (error);
2288 }
2289 
2290 /*
2291  * Change mode of a file given path name.
2292  */
2293 int
2294 sys_chmod(struct proc *p, void *v, register_t *retval)
2295 {
2296 	struct sys_chmod_args /* {
2297 		syscallarg(const char *) path;
2298 		syscallarg(mode_t) mode;
2299 	} */ *uap = v;
2300 
2301 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2302 }
2303 
2304 int
2305 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2306 {
2307 	struct sys_fchmodat_args /* {
2308 		syscallarg(int) fd;
2309 		syscallarg(const char *) path;
2310 		syscallarg(mode_t) mode;
2311 		syscallarg(int) flag;
2312 	} */ *uap = v;
2313 
2314 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2315 	    SCARG(uap, mode), SCARG(uap, flag)));
2316 }
2317 
2318 int
2319 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2320 {
2321 	struct vnode *vp;
2322 	struct vattr vattr;
2323 	int error, follow;
2324 	struct nameidata nd;
2325 
2326 	if (mode & ~(S_IFMT | ALLPERMS))
2327 		return (EINVAL);
2328 	if ((p->p_p->ps_flags & PS_PLEDGE))
2329 		mode &= ACCESSPERMS;
2330 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2331 		return (EINVAL);
2332 
2333 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2334 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2335 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2336 	nd.ni_unveil = UNVEIL_WRITE;
2337 	if ((error = namei(&nd)) != 0)
2338 		return (error);
2339 	vp = nd.ni_vp;
2340 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2341 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2342 		error = EROFS;
2343 	else {
2344 		VATTR_NULL(&vattr);
2345 		vattr.va_mode = mode & ALLPERMS;
2346 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2347 	}
2348 	vput(vp);
2349 	return (error);
2350 }
2351 
2352 /*
2353  * Change mode of a file given a file descriptor.
2354  */
2355 int
2356 sys_fchmod(struct proc *p, void *v, register_t *retval)
2357 {
2358 	struct sys_fchmod_args /* {
2359 		syscallarg(int) fd;
2360 		syscallarg(mode_t) mode;
2361 	} */ *uap = v;
2362 	struct vattr vattr;
2363 	struct vnode *vp;
2364 	struct file *fp;
2365 	mode_t mode = SCARG(uap, mode);
2366 	int error;
2367 
2368 	if (mode & ~(S_IFMT | ALLPERMS))
2369 		return (EINVAL);
2370 	if ((p->p_p->ps_flags & PS_PLEDGE))
2371 		mode &= ACCESSPERMS;
2372 
2373 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2374 		return (error);
2375 	vp = fp->f_data;
2376 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2377 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2378 		error = EROFS;
2379 	else {
2380 		VATTR_NULL(&vattr);
2381 		vattr.va_mode = mode & ALLPERMS;
2382 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2383 	}
2384 	VOP_UNLOCK(vp);
2385 	FRELE(fp, p);
2386 	return (error);
2387 }
2388 
2389 /*
2390  * Set ownership given a path name.
2391  */
2392 int
2393 sys_chown(struct proc *p, void *v, register_t *retval)
2394 {
2395 	struct sys_chown_args /* {
2396 		syscallarg(const char *) path;
2397 		syscallarg(uid_t) uid;
2398 		syscallarg(gid_t) gid;
2399 	} */ *uap = v;
2400 
2401 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2402 	    SCARG(uap, gid), 0));
2403 }
2404 
2405 int
2406 sys_fchownat(struct proc *p, void *v, register_t *retval)
2407 {
2408 	struct sys_fchownat_args /* {
2409 		syscallarg(int) fd;
2410 		syscallarg(const char *) path;
2411 		syscallarg(uid_t) uid;
2412 		syscallarg(gid_t) gid;
2413 		syscallarg(int) flag;
2414 	} */ *uap = v;
2415 
2416 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2417 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2418 }
2419 
2420 int
2421 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2422     int flag)
2423 {
2424 	struct vnode *vp;
2425 	struct vattr vattr;
2426 	int error, follow;
2427 	struct nameidata nd;
2428 	mode_t mode;
2429 
2430 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2431 		return (EINVAL);
2432 
2433 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2434 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2435 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2436 	nd.ni_unveil = UNVEIL_WRITE;
2437 	if ((error = namei(&nd)) != 0)
2438 		return (error);
2439 	vp = nd.ni_vp;
2440 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2441 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2442 		error = EROFS;
2443 	else {
2444 		if ((error = pledge_chown(p, uid, gid)))
2445 			goto out;
2446 		if ((uid != -1 || gid != -1) &&
2447 		    !vnoperm(vp) &&
2448 		    (suser(p) || suid_clear)) {
2449 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2450 			if (error)
2451 				goto out;
2452 			mode = vattr.va_mode & ~(VSUID | VSGID);
2453 			if (mode == vattr.va_mode)
2454 				mode = VNOVAL;
2455 		} else
2456 			mode = VNOVAL;
2457 		VATTR_NULL(&vattr);
2458 		vattr.va_uid = uid;
2459 		vattr.va_gid = gid;
2460 		vattr.va_mode = mode;
2461 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2462 	}
2463 out:
2464 	vput(vp);
2465 	return (error);
2466 }
2467 
2468 /*
2469  * Set ownership given a path name, without following links.
2470  */
2471 int
2472 sys_lchown(struct proc *p, void *v, register_t *retval)
2473 {
2474 	struct sys_lchown_args /* {
2475 		syscallarg(const char *) path;
2476 		syscallarg(uid_t) uid;
2477 		syscallarg(gid_t) gid;
2478 	} */ *uap = v;
2479 	struct vnode *vp;
2480 	struct vattr vattr;
2481 	int error;
2482 	struct nameidata nd;
2483 	mode_t mode;
2484 	uid_t uid = SCARG(uap, uid);
2485 	gid_t gid = SCARG(uap, gid);
2486 
2487 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2488 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2489 	nd.ni_unveil = UNVEIL_WRITE;
2490 	if ((error = namei(&nd)) != 0)
2491 		return (error);
2492 	vp = nd.ni_vp;
2493 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2494 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2495 		error = EROFS;
2496 	else {
2497 		if ((error = pledge_chown(p, uid, gid)))
2498 			goto out;
2499 		if ((uid != -1 || gid != -1) &&
2500 		    !vnoperm(vp) &&
2501 		    (suser(p) || suid_clear)) {
2502 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2503 			if (error)
2504 				goto out;
2505 			mode = vattr.va_mode & ~(VSUID | VSGID);
2506 			if (mode == vattr.va_mode)
2507 				mode = VNOVAL;
2508 		} else
2509 			mode = VNOVAL;
2510 		VATTR_NULL(&vattr);
2511 		vattr.va_uid = uid;
2512 		vattr.va_gid = gid;
2513 		vattr.va_mode = mode;
2514 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2515 	}
2516 out:
2517 	vput(vp);
2518 	return (error);
2519 }
2520 
2521 /*
2522  * Set ownership given a file descriptor.
2523  */
2524 int
2525 sys_fchown(struct proc *p, void *v, register_t *retval)
2526 {
2527 	struct sys_fchown_args /* {
2528 		syscallarg(int) fd;
2529 		syscallarg(uid_t) uid;
2530 		syscallarg(gid_t) gid;
2531 	} */ *uap = v;
2532 	struct vnode *vp;
2533 	struct vattr vattr;
2534 	int error;
2535 	struct file *fp;
2536 	mode_t mode;
2537 	uid_t uid = SCARG(uap, uid);
2538 	gid_t gid = SCARG(uap, gid);
2539 
2540 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2541 		return (error);
2542 	vp = fp->f_data;
2543 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2544 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2545 		error = EROFS;
2546 	else {
2547 		if ((error = pledge_chown(p, uid, gid)))
2548 			goto out;
2549 		if ((uid != -1 || gid != -1) &&
2550 		    !vnoperm(vp) &&
2551 		    (suser(p) || suid_clear)) {
2552 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2553 			if (error)
2554 				goto out;
2555 			mode = vattr.va_mode & ~(VSUID | VSGID);
2556 			if (mode == vattr.va_mode)
2557 				mode = VNOVAL;
2558 		} else
2559 			mode = VNOVAL;
2560 		VATTR_NULL(&vattr);
2561 		vattr.va_uid = uid;
2562 		vattr.va_gid = gid;
2563 		vattr.va_mode = mode;
2564 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2565 	}
2566 out:
2567 	VOP_UNLOCK(vp);
2568 	FRELE(fp, p);
2569 	return (error);
2570 }
2571 
2572 /*
2573  * Set the access and modification times given a path name.
2574  */
2575 int
2576 sys_utimes(struct proc *p, void *v, register_t *retval)
2577 {
2578 	struct sys_utimes_args /* {
2579 		syscallarg(const char *) path;
2580 		syscallarg(const struct timeval *) tptr;
2581 	} */ *uap = v;
2582 
2583 	struct timespec ts[2];
2584 	struct timeval tv[2];
2585 	const struct timeval *tvp;
2586 	int error;
2587 
2588 	tvp = SCARG(uap, tptr);
2589 	if (tvp != NULL) {
2590 		error = copyin(tvp, tv, sizeof(tv));
2591 		if (error)
2592 			return (error);
2593 #ifdef KTRACE
2594 		if (KTRPOINT(p, KTR_STRUCT))
2595 			ktrabstimeval(p, &tv);
2596 #endif
2597 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2598 			return (EINVAL);
2599 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2600 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2601 	} else
2602 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2603 
2604 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2605 }
2606 
2607 int
2608 sys_utimensat(struct proc *p, void *v, register_t *retval)
2609 {
2610 	struct sys_utimensat_args /* {
2611 		syscallarg(int) fd;
2612 		syscallarg(const char *) path;
2613 		syscallarg(const struct timespec *) times;
2614 		syscallarg(int) flag;
2615 	} */ *uap = v;
2616 
2617 	struct timespec ts[2];
2618 	const struct timespec *tsp;
2619 	int error, i;
2620 
2621 	tsp = SCARG(uap, times);
2622 	if (tsp != NULL) {
2623 		error = copyin(tsp, ts, sizeof(ts));
2624 		if (error)
2625 			return (error);
2626 		for (i = 0; i < nitems(ts); i++) {
2627 			if (ts[i].tv_nsec == UTIME_NOW)
2628 				continue;
2629 			if (ts[i].tv_nsec == UTIME_OMIT)
2630 				continue;
2631 #ifdef KTRACE
2632 			if (KTRPOINT(p, KTR_STRUCT))
2633 				ktrabstimespec(p, &ts[i]);
2634 #endif
2635 			if (!timespecisvalid(&ts[i]))
2636 				return (EINVAL);
2637 		}
2638 	} else
2639 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2640 
2641 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2642 	    SCARG(uap, flag)));
2643 }
2644 
2645 int
2646 doutimensat(struct proc *p, int fd, const char *path,
2647     struct timespec ts[2], int flag)
2648 {
2649 	struct vnode *vp;
2650 	int error, follow;
2651 	struct nameidata nd;
2652 
2653 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2654 		return (EINVAL);
2655 
2656 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2657 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2658 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2659 	nd.ni_unveil = UNVEIL_WRITE;
2660 	if ((error = namei(&nd)) != 0)
2661 		return (error);
2662 	vp = nd.ni_vp;
2663 
2664 	return (dovutimens(p, vp, ts));
2665 }
2666 
2667 int
2668 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2669 {
2670 	struct vattr vattr;
2671 	struct timespec now;
2672 	int error;
2673 
2674 #ifdef KTRACE
2675 	/* if they're both UTIME_NOW, then don't report either */
2676 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2677 	    KTRPOINT(p, KTR_STRUCT)) {
2678 		ktrabstimespec(p, &ts[0]);
2679 		ktrabstimespec(p, &ts[1]);
2680 	}
2681 #endif
2682 
2683 	VATTR_NULL(&vattr);
2684 
2685 	/*  make sure ctime is updated even if neither mtime nor atime is */
2686 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2687 
2688 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2689 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2690 			vattr.va_vaflags |= VA_UTIMES_NULL;
2691 
2692 		getnanotime(&now);
2693 		if (ts[0].tv_nsec == UTIME_NOW)
2694 			ts[0] = now;
2695 		if (ts[1].tv_nsec == UTIME_NOW)
2696 			ts[1] = now;
2697 	}
2698 
2699 	if (ts[0].tv_nsec != UTIME_OMIT)
2700 		vattr.va_atime = ts[0];
2701 	if (ts[1].tv_nsec != UTIME_OMIT)
2702 		vattr.va_mtime = ts[1];
2703 
2704 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2705 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2706 		error = EROFS;
2707 	else
2708 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2709 	vput(vp);
2710 	return (error);
2711 }
2712 
2713 /*
2714  * Set the access and modification times given a file descriptor.
2715  */
2716 int
2717 sys_futimes(struct proc *p, void *v, register_t *retval)
2718 {
2719 	struct sys_futimes_args /* {
2720 		syscallarg(int) fd;
2721 		syscallarg(const struct timeval *) tptr;
2722 	} */ *uap = v;
2723 	struct timeval tv[2];
2724 	struct timespec ts[2];
2725 	const struct timeval *tvp;
2726 	int error;
2727 
2728 	tvp = SCARG(uap, tptr);
2729 	if (tvp != NULL) {
2730 		error = copyin(tvp, tv, sizeof(tv));
2731 		if (error)
2732 			return (error);
2733 #ifdef KTRACE
2734 		if (KTRPOINT(p, KTR_STRUCT)) {
2735 			ktrabstimeval(p, &tv[0]);
2736 			ktrabstimeval(p, &tv[1]);
2737 		}
2738 #endif
2739 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2740 			return (EINVAL);
2741 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2742 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2743 	} else
2744 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2745 
2746 	return (dofutimens(p, SCARG(uap, fd), ts));
2747 }
2748 
2749 int
2750 sys_futimens(struct proc *p, void *v, register_t *retval)
2751 {
2752 	struct sys_futimens_args /* {
2753 		syscallarg(int) fd;
2754 		syscallarg(const struct timespec *) times;
2755 	} */ *uap = v;
2756 	struct timespec ts[2];
2757 	const struct timespec *tsp;
2758 	int error, i;
2759 
2760 	tsp = SCARG(uap, times);
2761 	if (tsp != NULL) {
2762 		error = copyin(tsp, ts, sizeof(ts));
2763 		if (error)
2764 			return (error);
2765 		for (i = 0; i < nitems(ts); i++) {
2766 			if (ts[i].tv_nsec == UTIME_NOW)
2767 				continue;
2768 			if (ts[i].tv_nsec == UTIME_OMIT)
2769 				continue;
2770 #ifdef KTRACE
2771 			if (KTRPOINT(p, KTR_STRUCT))
2772 				ktrabstimespec(p, &ts[i]);
2773 #endif
2774 			if (!timespecisvalid(&ts[i]))
2775 				return (EINVAL);
2776 		}
2777 	} else
2778 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2779 
2780 	return (dofutimens(p, SCARG(uap, fd), ts));
2781 }
2782 
2783 int
2784 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2785 {
2786 	struct file *fp;
2787 	struct vnode *vp;
2788 	int error;
2789 
2790 	if ((error = getvnode(p, fd, &fp)) != 0)
2791 		return (error);
2792 	vp = fp->f_data;
2793 	vref(vp);
2794 	FRELE(fp, p);
2795 
2796 	return (dovutimens(p, vp, ts));
2797 }
2798 
2799 /*
2800  * Truncate a file given a vnode.
2801  */
2802 int
2803 dotruncate(struct proc *p, struct vnode *vp, off_t len)
2804 {
2805 	struct vattr vattr;
2806 	int error;
2807 
2808 	if (len < 0)
2809 		return EINVAL;
2810 	if (vp->v_type == VDIR)
2811 		return EISDIR;
2812 	if ((error = vn_writechk(vp)) != 0)
2813 		return error;
2814 	if (vp->v_type == VREG && len > lim_cur_proc(p, RLIMIT_FSIZE)) {
2815 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2816 			return error;
2817 		if (len > vattr.va_size) {
2818 			/* if extending over the limit, send signal and fail */
2819 			psignal(p, SIGXFSZ);
2820 			return EFBIG;
2821 		}
2822 	}
2823 	VATTR_NULL(&vattr);
2824 	vattr.va_size = len;
2825 	return VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2826 }
2827 
2828 /*
2829  * Truncate a file given its path name.
2830  */
2831 int
2832 sys_truncate(struct proc *p, void *v, register_t *retval)
2833 {
2834 	struct sys_truncate_args /* {
2835 		syscallarg(const char *) path;
2836 		syscallarg(off_t) length;
2837 	} */ *uap = v;
2838 	struct vnode *vp;
2839 	int error;
2840 	struct nameidata nd;
2841 
2842 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2843 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2844 	nd.ni_unveil = UNVEIL_WRITE;
2845 	if ((error = namei(&nd)) != 0)
2846 		return (error);
2847 	vp = nd.ni_vp;
2848 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2849 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0)
2850 		error = dotruncate(p, vp, SCARG(uap, length));
2851 	vput(vp);
2852 	return (error);
2853 }
2854 
2855 /*
2856  * Truncate a file given a file descriptor.
2857  */
2858 int
2859 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2860 {
2861 	struct sys_ftruncate_args /* {
2862 		syscallarg(int) fd;
2863 		syscallarg(off_t) length;
2864 	} */ *uap = v;
2865 	struct vnode *vp;
2866 	struct file *fp;
2867 	int error;
2868 
2869 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2870 		return (error);
2871 	if ((fp->f_flag & FWRITE) == 0) {
2872 		error = EINVAL;
2873 		goto bad;
2874 	}
2875 	vp = fp->f_data;
2876 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2877 	error = dotruncate(p, vp, SCARG(uap, length));
2878 	VOP_UNLOCK(vp);
2879 bad:
2880 	FRELE(fp, p);
2881 	return (error);
2882 }
2883 
2884 /*
2885  * Sync an open file.
2886  */
2887 int
2888 sys_fsync(struct proc *p, void *v, register_t *retval)
2889 {
2890 	struct sys_fsync_args /* {
2891 		syscallarg(int) fd;
2892 	} */ *uap = v;
2893 	struct vnode *vp;
2894 	struct file *fp;
2895 	int error;
2896 
2897 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2898 		return (error);
2899 	vp = fp->f_data;
2900 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2901 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2902 #ifdef FFS_SOFTUPDATES
2903 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2904 		error = softdep_fsync(vp);
2905 #endif
2906 
2907 	VOP_UNLOCK(vp);
2908 	FRELE(fp, p);
2909 	return (error);
2910 }
2911 
2912 /*
2913  * Rename files.  Source and destination must either both be directories,
2914  * or both not be directories.  If target is a directory, it must be empty.
2915  */
2916 int
2917 sys_rename(struct proc *p, void *v, register_t *retval)
2918 {
2919 	struct sys_rename_args /* {
2920 		syscallarg(const char *) from;
2921 		syscallarg(const char *) to;
2922 	} */ *uap = v;
2923 
2924 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2925 	    SCARG(uap, to)));
2926 }
2927 
2928 int
2929 sys_renameat(struct proc *p, void *v, register_t *retval)
2930 {
2931 	struct sys_renameat_args /* {
2932 		syscallarg(int) fromfd;
2933 		syscallarg(const char *) from;
2934 		syscallarg(int) tofd;
2935 		syscallarg(const char *) to;
2936 	} */ *uap = v;
2937 
2938 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2939 	    SCARG(uap, tofd), SCARG(uap, to)));
2940 }
2941 
2942 int
2943 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2944     const char *to)
2945 {
2946 	struct vnode *tvp, *fvp, *tdvp;
2947 	struct nameidata fromnd, tond;
2948 	int error;
2949 	int flags;
2950 
2951 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2952 	    fromfd, from, p);
2953 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2954 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2955 	if ((error = namei(&fromnd)) != 0)
2956 		return (error);
2957 	fvp = fromnd.ni_vp;
2958 
2959 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2960 	/*
2961 	 * rename("foo/", "bar/");  is  OK
2962 	 */
2963 	if (fvp->v_type == VDIR)
2964 		flags |= STRIPSLASHES;
2965 
2966 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2967 	tond.ni_pledge = PLEDGE_CPATH;
2968 	tond.ni_unveil = UNVEIL_CREATE;
2969 	if ((error = namei(&tond)) != 0) {
2970 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2971 		vrele(fromnd.ni_dvp);
2972 		vrele(fvp);
2973 		goto out1;
2974 	}
2975 	tdvp = tond.ni_dvp;
2976 	tvp = tond.ni_vp;
2977 	if (tvp != NULL) {
2978 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2979 			error = ENOTDIR;
2980 			goto out;
2981 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2982 			error = EISDIR;
2983 			goto out;
2984 		}
2985 	}
2986 	if (fvp == tdvp)
2987 		error = EINVAL;
2988 	/*
2989 	 * If source is the same as the destination (that is the
2990 	 * same inode number)
2991 	 */
2992 	if (fvp == tvp)
2993 		error = -1;
2994 out:
2995 	if (!error) {
2996 		if (tvp) {
2997 			(void)uvm_vnp_uncache(tvp);
2998 		}
2999 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3000 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3001 	} else {
3002 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3003 		if (tdvp == tvp)
3004 			vrele(tdvp);
3005 		else
3006 			vput(tdvp);
3007 		if (tvp)
3008 			vput(tvp);
3009 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3010 		vrele(fromnd.ni_dvp);
3011 		vrele(fvp);
3012 	}
3013 	vrele(tond.ni_startdir);
3014 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3015 out1:
3016 	if (fromnd.ni_startdir)
3017 		vrele(fromnd.ni_startdir);
3018 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3019 	if (error == -1)
3020 		return (0);
3021 	return (error);
3022 }
3023 
3024 /*
3025  * Make a directory file.
3026  */
3027 int
3028 sys_mkdir(struct proc *p, void *v, register_t *retval)
3029 {
3030 	struct sys_mkdir_args /* {
3031 		syscallarg(const char *) path;
3032 		syscallarg(mode_t) mode;
3033 	} */ *uap = v;
3034 
3035 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3036 }
3037 
3038 int
3039 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3040 {
3041 	struct sys_mkdirat_args /* {
3042 		syscallarg(int) fd;
3043 		syscallarg(const char *) path;
3044 		syscallarg(mode_t) mode;
3045 	} */ *uap = v;
3046 
3047 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3048 	    SCARG(uap, mode)));
3049 }
3050 
3051 int
3052 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3053 {
3054 	struct vnode *vp;
3055 	struct vattr vattr;
3056 	int error;
3057 	struct nameidata nd;
3058 
3059 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3060 	    fd, path, p);
3061 	nd.ni_pledge = PLEDGE_CPATH;
3062 	nd.ni_unveil = UNVEIL_CREATE;
3063 	if ((error = namei(&nd)) != 0)
3064 		return (error);
3065 	vp = nd.ni_vp;
3066 	if (vp != NULL) {
3067 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3068 		if (nd.ni_dvp == vp)
3069 			vrele(nd.ni_dvp);
3070 		else
3071 			vput(nd.ni_dvp);
3072 		vrele(vp);
3073 		return (EEXIST);
3074 	}
3075 	VATTR_NULL(&vattr);
3076 	vattr.va_type = VDIR;
3077 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3078 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3079 	if (!error)
3080 		vput(nd.ni_vp);
3081 	return (error);
3082 }
3083 
3084 /*
3085  * Remove a directory file.
3086  */
3087 int
3088 sys_rmdir(struct proc *p, void *v, register_t *retval)
3089 {
3090 	struct sys_rmdir_args /* {
3091 		syscallarg(const char *) path;
3092 	} */ *uap = v;
3093 
3094 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3095 }
3096 
3097 /*
3098  * Read a block of directory entries in a file system independent format.
3099  */
3100 int
3101 sys_getdents(struct proc *p, void *v, register_t *retval)
3102 {
3103 	struct sys_getdents_args /* {
3104 		syscallarg(int) fd;
3105 		syscallarg(void *) buf;
3106 		syscallarg(size_t) buflen;
3107 	} */ *uap = v;
3108 	struct vnode *vp;
3109 	struct file *fp;
3110 	struct uio auio;
3111 	struct iovec aiov;
3112 	size_t buflen;
3113 	int error, eofflag;
3114 
3115 	buflen = SCARG(uap, buflen);
3116 
3117 	if (buflen > INT_MAX)
3118 		return (EINVAL);
3119 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3120 		return (error);
3121 	if ((fp->f_flag & FREAD) == 0) {
3122 		error = EBADF;
3123 		goto bad;
3124 	}
3125 	vp = fp->f_data;
3126 	if (vp->v_type != VDIR) {
3127 		error = EINVAL;
3128 		goto bad;
3129 	}
3130 
3131 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3132 
3133 	if (fp->f_offset < 0) {
3134 		VOP_UNLOCK(vp);
3135 		error = EINVAL;
3136 		goto bad;
3137 	}
3138 
3139 	aiov.iov_base = SCARG(uap, buf);
3140 	aiov.iov_len = buflen;
3141 	auio.uio_iov = &aiov;
3142 	auio.uio_iovcnt = 1;
3143 	auio.uio_rw = UIO_READ;
3144 	auio.uio_segflg = UIO_USERSPACE;
3145 	auio.uio_procp = p;
3146 	auio.uio_resid = buflen;
3147 	auio.uio_offset = fp->f_offset;
3148 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3149 	mtx_enter(&fp->f_mtx);
3150 	fp->f_offset = auio.uio_offset;
3151 	mtx_leave(&fp->f_mtx);
3152 	VOP_UNLOCK(vp);
3153 	if (error)
3154 		goto bad;
3155 	*retval = buflen - auio.uio_resid;
3156 bad:
3157 	FRELE(fp, p);
3158 	return (error);
3159 }
3160 
3161 /*
3162  * Set the mode mask for creation of filesystem nodes.
3163  */
3164 int
3165 sys_umask(struct proc *p, void *v, register_t *retval)
3166 {
3167 	struct sys_umask_args /* {
3168 		syscallarg(mode_t) newmask;
3169 	} */ *uap = v;
3170 	struct filedesc *fdp = p->p_fd;
3171 
3172 	fdplock(fdp);
3173 	*retval = fdp->fd_cmask;
3174 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3175 	fdpunlock(fdp);
3176 	return (0);
3177 }
3178 
3179 /*
3180  * Void all references to file by ripping underlying filesystem
3181  * away from vnode.
3182  */
3183 int
3184 sys_revoke(struct proc *p, void *v, register_t *retval)
3185 {
3186 	struct sys_revoke_args /* {
3187 		syscallarg(const char *) path;
3188 	} */ *uap = v;
3189 	struct vnode *vp;
3190 	struct vattr vattr;
3191 	int error;
3192 	struct nameidata nd;
3193 
3194 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3195 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3196 	nd.ni_unveil = UNVEIL_READ;
3197 	if ((error = namei(&nd)) != 0)
3198 		return (error);
3199 	vp = nd.ni_vp;
3200 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3201 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3202 		error = ENOTTY;
3203 		goto out;
3204 	}
3205 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3206 		goto out;
3207 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3208 	    (error = suser(p)))
3209 		goto out;
3210 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3211 		VOP_REVOKE(vp, REVOKEALL);
3212 out:
3213 	vrele(vp);
3214 	return (error);
3215 }
3216 
3217 /*
3218  * Convert a user file descriptor to a kernel file entry.
3219  *
3220  * On return *fpp is FREF:ed.
3221  */
3222 int
3223 getvnode(struct proc *p, int fd, struct file **fpp)
3224 {
3225 	struct file *fp;
3226 	struct vnode *vp;
3227 
3228 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3229 		return (EBADF);
3230 
3231 	if (fp->f_type != DTYPE_VNODE) {
3232 		FRELE(fp, p);
3233 		return (EINVAL);
3234 	}
3235 
3236 	vp = fp->f_data;
3237 	if (vp->v_type == VBAD) {
3238 		FRELE(fp, p);
3239 		return (EBADF);
3240 	}
3241 
3242 	*fpp = fp;
3243 
3244 	return (0);
3245 }
3246 
3247 /*
3248  * Positional read system call.
3249  */
3250 int
3251 sys_pread(struct proc *p, void *v, register_t *retval)
3252 {
3253 	struct sys_pread_args /* {
3254 		syscallarg(int) fd;
3255 		syscallarg(void *) buf;
3256 		syscallarg(size_t) nbyte;
3257 		syscallarg(off_t) offset;
3258 	} */ *uap = v;
3259 	struct iovec iov;
3260 	struct uio auio;
3261 
3262 	iov.iov_base = SCARG(uap, buf);
3263 	iov.iov_len = SCARG(uap, nbyte);
3264 	if (iov.iov_len > SSIZE_MAX)
3265 		return (EINVAL);
3266 
3267 	auio.uio_iov = &iov;
3268 	auio.uio_iovcnt = 1;
3269 	auio.uio_resid = iov.iov_len;
3270 	auio.uio_offset = SCARG(uap, offset);
3271 
3272 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3273 }
3274 
3275 /*
3276  * Positional scatter read system call.
3277  */
3278 int
3279 sys_preadv(struct proc *p, void *v, register_t *retval)
3280 {
3281 	struct sys_preadv_args /* {
3282 		syscallarg(int) fd;
3283 		syscallarg(const struct iovec *) iovp;
3284 		syscallarg(int) iovcnt;
3285 		syscallarg(off_t) offset;
3286 	} */ *uap = v;
3287 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3288 	int error, iovcnt = SCARG(uap, iovcnt);
3289 	struct uio auio;
3290 	size_t resid;
3291 
3292 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3293 	if (error)
3294 		goto done;
3295 
3296 	auio.uio_iov = iov;
3297 	auio.uio_iovcnt = iovcnt;
3298 	auio.uio_resid = resid;
3299 	auio.uio_offset = SCARG(uap, offset);
3300 
3301 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3302  done:
3303 	iovec_free(iov, iovcnt);
3304 	return (error);
3305 }
3306 
3307 /*
3308  * Positional write system call.
3309  */
3310 int
3311 sys_pwrite(struct proc *p, void *v, register_t *retval)
3312 {
3313 	struct sys_pwrite_args /* {
3314 		syscallarg(int) fd;
3315 		syscallarg(const void *) buf;
3316 		syscallarg(size_t) nbyte;
3317 		syscallarg(off_t) offset;
3318 	} */ *uap = v;
3319 	struct iovec iov;
3320 	struct uio auio;
3321 
3322 	iov.iov_base = (void *)SCARG(uap, buf);
3323 	iov.iov_len = SCARG(uap, nbyte);
3324 	if (iov.iov_len > SSIZE_MAX)
3325 		return (EINVAL);
3326 
3327 	auio.uio_iov = &iov;
3328 	auio.uio_iovcnt = 1;
3329 	auio.uio_resid = iov.iov_len;
3330 	auio.uio_offset = SCARG(uap, offset);
3331 
3332 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3333 }
3334 
3335 /*
3336  * Positional gather write system call.
3337  */
3338 int
3339 sys_pwritev(struct proc *p, void *v, register_t *retval)
3340 {
3341 	struct sys_pwritev_args /* {
3342 		syscallarg(int) fd;
3343 		syscallarg(const struct iovec *) iovp;
3344 		syscallarg(int) iovcnt;
3345 		syscallarg(off_t) offset;
3346 	} */ *uap = v;
3347 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3348 	int error, iovcnt = SCARG(uap, iovcnt);
3349 	struct uio auio;
3350 	size_t resid;
3351 
3352 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3353 	if (error)
3354 		goto done;
3355 
3356 	auio.uio_iov = iov;
3357 	auio.uio_iovcnt = iovcnt;
3358 	auio.uio_resid = resid;
3359 	auio.uio_offset = SCARG(uap, offset);
3360 
3361 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3362  done:
3363 	iovec_free(iov, iovcnt);
3364 	return (error);
3365 }
3366