xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.356 2022/02/17 03:12:34 rob Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 
94 /*
95  * Virtual File System System Calls
96  */
97 
98 /*
99  * Mount a file system.
100  */
101 int
102 sys_mount(struct proc *p, void *v, register_t *retval)
103 {
104 	struct sys_mount_args /* {
105 		syscallarg(const char *) type;
106 		syscallarg(const char *) path;
107 		syscallarg(int) flags;
108 		syscallarg(void *) data;
109 	} */ *uap = v;
110 	struct vnode *vp;
111 	struct mount *mp;
112 	int error, mntflag = 0;
113 	char fstypename[MFSNAMELEN];
114 	char fspath[MNAMELEN];
115 	struct nameidata nd;
116 	struct vfsconf *vfsp;
117 	int flags = SCARG(uap, flags);
118 	void *args = NULL;
119 
120 	if ((error = suser(p)))
121 		return (error);
122 
123 	/*
124 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
125 	 */
126 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
127 	if (error)
128 		return(error);
129 
130 	/*
131 	 * Get vnode to be covered
132 	 */
133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
134 	if ((error = namei(&nd)) != 0)
135 		goto fail;
136 	vp = nd.ni_vp;
137 	if (flags & MNT_UPDATE) {
138 		if ((vp->v_flag & VROOT) == 0) {
139 			vput(vp);
140 			error = EINVAL;
141 			goto fail;
142 		}
143 		mp = vp->v_mount;
144 		vfsp = mp->mnt_vfc;
145 
146 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
147 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
148 		if (error) {
149 			vput(vp);
150 			goto fail;
151 		}
152 
153 		mntflag = mp->mnt_flag;
154 		/*
155 		 * We only allow the filesystem to be reloaded if it
156 		 * is currently mounted read-only.
157 		 */
158 		if ((flags & MNT_RELOAD) &&
159 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 			vput(vp);
161 			error = EOPNOTSUPP;	/* Needs translation */
162 			goto fail;
163 		}
164 
165 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
166 			vput(vp);
167 			goto fail;
168 		}
169 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
170 		goto update;
171 	}
172 	/*
173 	 * Do not allow disabling of permission checks unless exec and access to
174 	 * device files is disabled too.
175 	 */
176 	if ((flags & MNT_NOPERM) &&
177 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
178 		vput(vp);
179 		error = EPERM;
180 		goto fail;
181 	}
182 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
183 		vput(vp);
184 		goto fail;
185 	}
186 	if (vp->v_type != VDIR) {
187 		vput(vp);
188 		goto fail;
189 	}
190 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
191 	if (error) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	vfsp = vfs_byname(fstypename);
196 	if (vfsp == NULL) {
197 		vput(vp);
198 		error = EOPNOTSUPP;
199 		goto fail;
200 	}
201 
202 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
203 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
204 	if (error) {
205 		vput(vp);
206 		goto fail;
207 	}
208 
209 	if (vp->v_mountedhere != NULL) {
210 		vput(vp);
211 		error = EBUSY;
212 		goto fail;
213 	}
214 
215 	/*
216 	 * Allocate and initialize the file system.
217 	 */
218 	mp = vfs_mount_alloc(vp, vfsp);
219 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
220 
221 update:
222 	/* Ensure that the parent mountpoint does not get unmounted. */
223 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
224 	if (error) {
225 		if (mp->mnt_flag & MNT_UPDATE) {
226 			mp->mnt_flag = mntflag;
227 			vfs_unbusy(mp);
228 		} else {
229 			vfs_unbusy(mp);
230 			vfs_mount_free(mp);
231 		}
232 		vput(vp);
233 		goto fail;
234 	}
235 
236 	/*
237 	 * Set the mount level flags.
238 	 */
239 	if (flags & MNT_RDONLY)
240 		mp->mnt_flag |= MNT_RDONLY;
241 	else if (mp->mnt_flag & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_WANTRDWR;
243 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
244 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
245 	    MNT_NOPERM | MNT_FORCE);
246 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
247 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
248 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
249 	/*
250 	 * Mount the filesystem.
251 	 */
252 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
253 	if (!error) {
254 		mp->mnt_stat.f_ctime = gettime();
255 	}
256 	if (mp->mnt_flag & MNT_UPDATE) {
257 		vfs_unbusy(vp->v_mount);
258 		vput(vp);
259 		if (mp->mnt_flag & MNT_WANTRDWR)
260 			mp->mnt_flag &= ~MNT_RDONLY;
261 		mp->mnt_flag &= ~MNT_OP_FLAGS;
262 		if (error)
263 			mp->mnt_flag = mntflag;
264 
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
266 			if (mp->mnt_syncer == NULL)
267 				error = vfs_allocate_syncvnode(mp);
268 		} else {
269 			if (mp->mnt_syncer != NULL)
270 				vgone(mp->mnt_syncer);
271 			mp->mnt_syncer = NULL;
272 		}
273 
274 		vfs_unbusy(mp);
275 		goto fail;
276 	}
277 
278 	mp->mnt_flag &= ~MNT_OP_FLAGS;
279 	vp->v_mountedhere = mp;
280 
281 	/*
282 	 * Put the new filesystem on the mount list after root.
283 	 */
284 	cache_purge(vp);
285 	if (!error) {
286 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
287 		checkdirs(vp);
288 		vfs_unbusy(vp->v_mount);
289 		VOP_UNLOCK(vp);
290 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
291 			error = vfs_allocate_syncvnode(mp);
292 		vfs_unbusy(mp);
293 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
294 		if ((error = VFS_START(mp, 0, p)) != 0)
295 			vrele(vp);
296 	} else {
297 		mp->mnt_vnodecovered->v_mountedhere = NULL;
298 		vfs_unbusy(mp);
299 		vfs_mount_free(mp);
300 		vfs_unbusy(vp->v_mount);
301 		vput(vp);
302 	}
303 fail:
304 	if (args)
305 		free(args, M_TEMP, vfsp->vfc_datasize);
306 	return (error);
307 }
308 
309 /*
310  * Scan all active processes to see if any of them have a current
311  * or root directory onto which the new filesystem has just been
312  * mounted. If so, replace them with the new mount point, keeping
313  * track of how many were replaced.  That's the number of references
314  * the old vnode had that we've replaced, so finish by vrele()'ing
315  * it that many times.  This puts off any possible sleeping until
316  * we've finished walking the allprocess list.
317  */
318 void
319 checkdirs(struct vnode *olddp)
320 {
321 	struct filedesc *fdp;
322 	struct vnode *newdp;
323 	struct process *pr;
324 	u_int  free_count = 0;
325 
326 	if (olddp->v_usecount == 1)
327 		return;
328 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
329 		panic("mount: lost mount");
330 	LIST_FOREACH(pr, &allprocess, ps_list) {
331 		fdp = pr->ps_fd;
332 		if (fdp->fd_cdir == olddp) {
333 			free_count++;
334 			vref(newdp);
335 			fdp->fd_cdir = newdp;
336 		}
337 		if (fdp->fd_rdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_rdir = newdp;
341 		}
342 	}
343 	if (rootvnode == olddp) {
344 		free_count++;
345 		vref(newdp);
346 		rootvnode = newdp;
347 	}
348 	while (free_count-- > 0)
349 		vrele(olddp);
350 	vput(newdp);
351 }
352 
353 /*
354  * Unmount a file system.
355  *
356  * Note: unmount takes a path to the vnode mounted on as argument,
357  * not special file (as before).
358  */
359 int
360 sys_unmount(struct proc *p, void *v, register_t *retval)
361 {
362 	struct sys_unmount_args /* {
363 		syscallarg(const char *) path;
364 		syscallarg(int) flags;
365 	} */ *uap = v;
366 	struct vnode *vp;
367 	struct mount *mp;
368 	int error;
369 	struct nameidata nd;
370 
371 	if ((error = suser(p)) != 0)
372 		return (error);
373 
374 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
375 	    SCARG(uap, path), p);
376 	if ((error = namei(&nd)) != 0)
377 		return (error);
378 	vp = nd.ni_vp;
379 	mp = vp->v_mount;
380 
381 	/*
382 	 * Don't allow unmounting the root file system.
383 	 */
384 	if (mp->mnt_flag & MNT_ROOTFS) {
385 		vput(vp);
386 		return (EINVAL);
387 	}
388 
389 	/*
390 	 * Must be the root of the filesystem
391 	 */
392 	if ((vp->v_flag & VROOT) == 0) {
393 		vput(vp);
394 		return (EINVAL);
395 	}
396 	vput(vp);
397 
398 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
399 		return (EBUSY);
400 
401 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
402 }
403 
404 /*
405  * Do the actual file system unmount.
406  */
407 int
408 dounmount(struct mount *mp, int flags, struct proc *p)
409 {
410 	SLIST_HEAD(, mount) mplist;
411 	struct mount *nmp;
412 	int error;
413 
414 	SLIST_INIT(&mplist);
415 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
416 
417 	/*
418 	 * Collect nested mount points. This takes advantage of the mount list
419 	 * being ordered - nested mount points come after their parent.
420 	 */
421 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
422 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
423 			if (mp->mnt_vnodecovered == NULLVP ||
424 			    mp->mnt_vnodecovered->v_mount != nmp)
425 				continue;
426 
427 			if ((flags & MNT_FORCE) == 0) {
428 				error = EBUSY;
429 				goto err;
430 			}
431 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
432 			if (error) {
433 				if ((flags & MNT_DOOMED)) {
434 					/*
435 					 * If the mount point was busy due to
436 					 * being unmounted, it has been removed
437 					 * from the mount list already.
438 					 * Restart the iteration from the last
439 					 * collected busy entry.
440 					 */
441 					mp = SLIST_FIRST(&mplist);
442 					break;
443 				}
444 				goto err;
445 			}
446 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
447 			break;
448 		}
449 	}
450 
451 	/*
452 	 * Nested mount points cannot appear during this loop as mounting
453 	 * requires a read lock for the parent mount point.
454 	 */
455 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
456 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
457 		error = dounmount_leaf(mp, flags, p);
458 		if (error)
459 			goto err;
460 	}
461 	return (0);
462 
463 err:
464 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
465 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
466 		vfs_unbusy(mp);
467 	}
468 	return (error);
469 }
470 
471 int
472 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
473 {
474 	struct vnode *coveredvp;
475 	struct vnode *vp, *nvp;
476 	int error;
477 	int hadsyncer = 0;
478 
479 	mp->mnt_flag &=~ MNT_ASYNC;
480 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
481 	if (mp->mnt_syncer != NULL) {
482 		hadsyncer = 1;
483 		vgone(mp->mnt_syncer);
484 		mp->mnt_syncer = NULL;
485 	}
486 
487 	/*
488 	 * Before calling file system unmount, make sure
489 	 * all unveils to vnodes in here are dropped.
490 	 */
491 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
492 		unveil_removevnode(vp);
493 	}
494 
495 	if (((mp->mnt_flag & MNT_RDONLY) ||
496 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
497 	    (flags & MNT_FORCE))
498 		error = VFS_UNMOUNT(mp, flags, p);
499 
500 	if (error && !(flags & MNT_DOOMED)) {
501 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
502 			(void) vfs_allocate_syncvnode(mp);
503 		vfs_unbusy(mp);
504 		return (error);
505 	}
506 
507 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
508 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
509 		coveredvp->v_mountedhere = NULL;
510 		vrele(coveredvp);
511 	}
512 
513 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
514 		panic("unmount: dangling vnode");
515 
516 	vfs_unbusy(mp);
517 	vfs_mount_free(mp);
518 
519 	return (0);
520 }
521 
522 /*
523  * Sync each mounted filesystem.
524  */
525 int
526 sys_sync(struct proc *p, void *v, register_t *retval)
527 {
528 	struct mount *mp;
529 	int asyncflag;
530 
531 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
532 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
533 			continue;
534 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
535 			asyncflag = mp->mnt_flag & MNT_ASYNC;
536 			mp->mnt_flag &= ~MNT_ASYNC;
537 			uvm_vnp_sync(mp);
538 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
539 			if (asyncflag)
540 				mp->mnt_flag |= MNT_ASYNC;
541 		}
542 		vfs_unbusy(mp);
543 	}
544 
545 	return (0);
546 }
547 
548 /*
549  * Change filesystem quotas.
550  */
551 int
552 sys_quotactl(struct proc *p, void *v, register_t *retval)
553 {
554 	struct sys_quotactl_args /* {
555 		syscallarg(const char *) path;
556 		syscallarg(int) cmd;
557 		syscallarg(int) uid;
558 		syscallarg(char *) arg;
559 	} */ *uap = v;
560 	struct mount *mp;
561 	int error;
562 	struct nameidata nd;
563 
564 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
565 	if ((error = namei(&nd)) != 0)
566 		return (error);
567 	mp = nd.ni_vp->v_mount;
568 	vrele(nd.ni_vp);
569 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
570 	    SCARG(uap, arg), p));
571 }
572 
573 int
574 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
575 {
576 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
577 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
578 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
579 	char *s, *d;
580 	int error;
581 
582 	/* Don't let non-root see filesystem id (for NFS security) */
583 	if (suser(p)) {
584 		fsid_t fsid;
585 
586 		s = (char *)sp;
587 		d = (char *)uaddr;
588 
589 		memset(&fsid, 0, sizeof(fsid));
590 
591 		if ((error = copyout(s, d, co_sz1)) != 0)
592 			return (error);
593 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
594 			return (error);
595 		return (copyout(s + co_off2, d + co_off2, co_sz2));
596 	}
597 
598 	return (copyout(sp, uaddr, sizeof(*sp)));
599 }
600 
601 /*
602  * Get filesystem statistics.
603  */
604 int
605 sys_statfs(struct proc *p, void *v, register_t *retval)
606 {
607 	struct sys_statfs_args /* {
608 		syscallarg(const char *) path;
609 		syscallarg(struct statfs *) buf;
610 	} */ *uap = v;
611 	struct mount *mp;
612 	struct statfs *sp;
613 	int error;
614 	struct nameidata nd;
615 
616 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
617 	    SCARG(uap, path), p);
618 	nd.ni_pledge = PLEDGE_RPATH;
619 	nd.ni_unveil = UNVEIL_READ;
620 	if ((error = namei(&nd)) != 0)
621 		return (error);
622 	mp = nd.ni_vp->v_mount;
623 	sp = &mp->mnt_stat;
624 	vrele(nd.ni_vp);
625 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
626 		return (error);
627 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
628 
629 	return (copyout_statfs(sp, SCARG(uap, buf), p));
630 }
631 
632 /*
633  * Get filesystem statistics.
634  */
635 int
636 sys_fstatfs(struct proc *p, void *v, register_t *retval)
637 {
638 	struct sys_fstatfs_args /* {
639 		syscallarg(int) fd;
640 		syscallarg(struct statfs *) buf;
641 	} */ *uap = v;
642 	struct file *fp;
643 	struct mount *mp;
644 	struct statfs *sp;
645 	int error;
646 
647 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
648 		return (error);
649 	mp = ((struct vnode *)fp->f_data)->v_mount;
650 	if (!mp) {
651 		FRELE(fp, p);
652 		return (ENOENT);
653 	}
654 	sp = &mp->mnt_stat;
655 	error = VFS_STATFS(mp, sp, p);
656 	FRELE(fp, p);
657 	if (error)
658 		return (error);
659 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
660 
661 	return (copyout_statfs(sp, SCARG(uap, buf), p));
662 }
663 
664 /*
665  * Get statistics on all filesystems.
666  */
667 int
668 sys_getfsstat(struct proc *p, void *v, register_t *retval)
669 {
670 	struct sys_getfsstat_args /* {
671 		syscallarg(struct statfs *) buf;
672 		syscallarg(size_t) bufsize;
673 		syscallarg(int) flags;
674 	} */ *uap = v;
675 	struct mount *mp;
676 	struct statfs *sp;
677 	struct statfs *sfsp;
678 	size_t count, maxcount;
679 	int error, flags = SCARG(uap, flags);
680 
681 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
682 	sfsp = SCARG(uap, buf);
683 	count = 0;
684 
685 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
686 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
687 			continue;
688 		if (sfsp && count < maxcount) {
689 			sp = &mp->mnt_stat;
690 
691 			/* Refresh stats unless MNT_NOWAIT is specified */
692 			if (flags != MNT_NOWAIT &&
693 			    flags != MNT_LAZY &&
694 			    (flags == MNT_WAIT ||
695 			    flags == 0) &&
696 			    (error = VFS_STATFS(mp, sp, p))) {
697 				vfs_unbusy(mp);
698 				continue;
699 			}
700 
701 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
702 #if notyet
703 			if (mp->mnt_flag & MNT_SOFTDEP)
704 				sp->f_eflags = STATFS_SOFTUPD;
705 #endif
706 			error = (copyout_statfs(sp, sfsp, p));
707 			if (error) {
708 				vfs_unbusy(mp);
709 				return (error);
710 			}
711 			sfsp++;
712 		}
713 		count++;
714 		vfs_unbusy(mp);
715 	}
716 
717 	if (sfsp && count > maxcount)
718 		*retval = maxcount;
719 	else
720 		*retval = count;
721 
722 	return (0);
723 }
724 
725 /*
726  * Change current working directory to a given file descriptor.
727  */
728 int
729 sys_fchdir(struct proc *p, void *v, register_t *retval)
730 {
731 	struct sys_fchdir_args /* {
732 		syscallarg(int) fd;
733 	} */ *uap = v;
734 	struct filedesc *fdp = p->p_fd;
735 	struct vnode *vp, *tdp, *old_cdir;
736 	struct mount *mp;
737 	struct file *fp;
738 	int error;
739 
740 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
741 		return (EBADF);
742 	vp = fp->f_data;
743 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
744 		FRELE(fp, p);
745 		return (ENOTDIR);
746 	}
747 	vref(vp);
748 	FRELE(fp, p);
749 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
750 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
751 
752 	while (!error && (mp = vp->v_mountedhere) != NULL) {
753 		if (vfs_busy(mp, VB_READ|VB_WAIT))
754 			continue;
755 		error = VFS_ROOT(mp, &tdp);
756 		vfs_unbusy(mp);
757 		if (error)
758 			break;
759 		vput(vp);
760 		vp = tdp;
761 	}
762 	if (error) {
763 		vput(vp);
764 		return (error);
765 	}
766 	VOP_UNLOCK(vp);
767 	old_cdir = fdp->fd_cdir;
768 	fdp->fd_cdir = vp;
769 	vrele(old_cdir);
770 	return (0);
771 }
772 
773 /*
774  * Change current working directory (``.'').
775  */
776 int
777 sys_chdir(struct proc *p, void *v, register_t *retval)
778 {
779 	struct sys_chdir_args /* {
780 		syscallarg(const char *) path;
781 	} */ *uap = v;
782 	struct filedesc *fdp = p->p_fd;
783 	struct vnode *old_cdir;
784 	int error;
785 	struct nameidata nd;
786 
787 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
788 	    SCARG(uap, path), p);
789 	nd.ni_pledge = PLEDGE_RPATH;
790 	nd.ni_unveil = UNVEIL_READ;
791 	if ((error = change_dir(&nd, p)) != 0)
792 		return (error);
793 	old_cdir = fdp->fd_cdir;
794 	fdp->fd_cdir = nd.ni_vp;
795 	vrele(old_cdir);
796 	return (0);
797 }
798 
799 /*
800  * Change notion of root (``/'') directory.
801  */
802 int
803 sys_chroot(struct proc *p, void *v, register_t *retval)
804 {
805 	struct sys_chroot_args /* {
806 		syscallarg(const char *) path;
807 	} */ *uap = v;
808 	struct filedesc *fdp = p->p_fd;
809 	struct vnode *old_cdir, *old_rdir;
810 	int error;
811 	struct nameidata nd;
812 
813 	if ((error = suser(p)) != 0)
814 		return (error);
815 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
816 	    SCARG(uap, path), p);
817 	if ((error = change_dir(&nd, p)) != 0)
818 		return (error);
819 	if (fdp->fd_rdir != NULL) {
820 		/*
821 		 * A chroot() done inside a changed root environment does
822 		 * an automatic chdir to avoid the out-of-tree experience.
823 		 */
824 		vref(nd.ni_vp);
825 		old_rdir = fdp->fd_rdir;
826 		old_cdir = fdp->fd_cdir;
827 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
828 		vrele(old_rdir);
829 		vrele(old_cdir);
830 	} else
831 		fdp->fd_rdir = nd.ni_vp;
832 	atomic_setbits_int(&p->p_p->ps_flags, PS_CHROOT);
833 	return (0);
834 }
835 
836 /*
837  * Common routine for chroot and chdir.
838  */
839 static int
840 change_dir(struct nameidata *ndp, struct proc *p)
841 {
842 	struct vnode *vp;
843 	int error;
844 
845 	if ((error = namei(ndp)) != 0)
846 		return (error);
847 	vp = ndp->ni_vp;
848 	if (vp->v_type != VDIR)
849 		error = ENOTDIR;
850 	else
851 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
852 	if (error)
853 		vput(vp);
854 	else
855 		VOP_UNLOCK(vp);
856 	return (error);
857 }
858 
859 int
860 sys___realpath(struct proc *p, void *v, register_t *retval)
861 {
862 	struct sys___realpath_args /* {
863 		syscallarg(const char *) pathname;
864 		syscallarg(char *) resolved;
865 	} */ *uap = v;
866 	char *pathname;
867 	char *rpbuf;
868 	struct nameidata nd;
869 	size_t pathlen;
870 	int error = 0;
871 
872 	if (SCARG(uap, pathname) == NULL)
873 		return (EINVAL);
874 
875 	pathname = pool_get(&namei_pool, PR_WAITOK);
876 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
877 
878 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
879 	    &pathlen)))
880 		goto end;
881 
882 	if (pathlen == 1) { /* empty string "" */
883 		error = ENOENT;
884 		goto end;
885 	}
886 	if (pathlen < 2) {
887 		error = EINVAL;
888 		goto end;
889 	}
890 
891 	/* Get cwd for relative path if needed, prepend to rpbuf */
892 	rpbuf[0] = '\0';
893 	if (pathname[0] != '/') {
894 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
895 		char *cwdbuf, *bp;
896 
897 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
898 
899 		/* vfs_getcwd_common fills this in backwards */
900 		bp = &cwdbuf[cwdlen - 1];
901 		*bp = '\0';
902 
903 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
904 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
905 
906 		if (error) {
907 			free(cwdbuf, M_TEMP, cwdlen);
908 			goto end;
909 		}
910 
911 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
912 			free(cwdbuf, M_TEMP, cwdlen);
913 			error = ENAMETOOLONG;
914 			goto end;
915 		}
916 
917 		free(cwdbuf, M_TEMP, cwdlen);
918 	}
919 
920 	NDINIT(&nd, LOOKUP, FOLLOW | SAVENAME | REALPATH, UIO_SYSSPACE,
921 	    pathname, p);
922 
923 	nd.ni_cnd.cn_rpbuf = rpbuf;
924 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
925 
926 	nd.ni_pledge = PLEDGE_RPATH;
927 	nd.ni_unveil = UNVEIL_READ;
928 	if ((error = namei(&nd)) != 0)
929 		goto end;
930 
931 	/* release reference from namei */
932 	if (nd.ni_vp)
933 		vrele(nd.ni_vp);
934 
935 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
936 	    MAXPATHLEN, NULL);
937 
938 #ifdef KTRACE
939 	if (KTRPOINT(p, KTR_NAMEI))
940 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
941 #endif
942 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
943 end:
944 	pool_put(&namei_pool, rpbuf);
945 	pool_put(&namei_pool, pathname);
946 	return (error);
947 }
948 
949 int
950 sys_unveil(struct proc *p, void *v, register_t *retval)
951 {
952 	struct sys_unveil_args /* {
953 		syscallarg(const char *) path;
954 		syscallarg(const char *) permissions;
955 	} */ *uap = v;
956 	struct process *pr = p->p_p;
957 	char *pathname, *c;
958 	struct nameidata nd;
959 	size_t pathlen;
960 	char permissions[5];
961 	int error, allow;
962 
963 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
964 		pr->ps_uvdone = 1;
965 		return (0);
966 	}
967 
968 	if (pr->ps_uvdone != 0)
969 		return EPERM;
970 
971 	error = copyinstr(SCARG(uap, permissions), permissions,
972 	    sizeof(permissions), NULL);
973 	if (error)
974 		return (error);
975 	pathname = pool_get(&namei_pool, PR_WAITOK);
976 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
977 	if (error)
978 		goto end;
979 
980 #ifdef KTRACE
981 	if (KTRPOINT(p, KTR_STRUCT))
982 		ktrstruct(p, "unveil", permissions, strlen(permissions));
983 #endif
984 	if (pathlen < 2) {
985 		error = EINVAL;
986 		goto end;
987 	}
988 
989 	/* find root "/" or "//" */
990 	for (c = pathname; *c != '\0'; c++) {
991 		if (*c != '/')
992 			break;
993 	}
994 	if (*c == '\0')
995 		/* root directory */
996 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
997 		    UIO_SYSSPACE, pathname, p);
998 	else
999 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
1000 		    UIO_SYSSPACE, pathname, p);
1001 
1002 	nd.ni_pledge = PLEDGE_UNVEIL;
1003 	if ((error = namei(&nd)) != 0)
1004 		goto end;
1005 
1006 	/*
1007 	 * XXX Any access to the file or directory will allow us to
1008 	 * pledge path it
1009 	 */
1010 	allow = ((nd.ni_vp &&
1011 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1012 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1013 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1014 	    (nd.ni_dvp &&
1015 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1016 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1017 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1018 
1019 	/* release lock from namei, but keep ref */
1020 	if (nd.ni_vp)
1021 		VOP_UNLOCK(nd.ni_vp);
1022 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1023 		VOP_UNLOCK(nd.ni_dvp);
1024 
1025 	if (allow)
1026 		error = unveil_add(p, &nd, permissions);
1027 	else
1028 		error = EPERM;
1029 
1030 	/* release vref from namei, but not vref from unveil_add */
1031 	if (nd.ni_vp)
1032 		vrele(nd.ni_vp);
1033 	if (nd.ni_dvp)
1034 		vrele(nd.ni_dvp);
1035 
1036 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1037 end:
1038 	pool_put(&namei_pool, pathname);
1039 
1040 	return (error);
1041 }
1042 
1043 /*
1044  * Check permissions, allocate an open file structure,
1045  * and call the device open routine if any.
1046  */
1047 int
1048 sys_open(struct proc *p, void *v, register_t *retval)
1049 {
1050 	struct sys_open_args /* {
1051 		syscallarg(const char *) path;
1052 		syscallarg(int) flags;
1053 		syscallarg(mode_t) mode;
1054 	} */ *uap = v;
1055 
1056 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1057 	    SCARG(uap, mode), retval));
1058 }
1059 
1060 int
1061 sys_openat(struct proc *p, void *v, register_t *retval)
1062 {
1063 	struct sys_openat_args /* {
1064 		syscallarg(int) fd;
1065 		syscallarg(const char *) path;
1066 		syscallarg(int) flags;
1067 		syscallarg(mode_t) mode;
1068 	} */ *uap = v;
1069 
1070 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1071 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1072 }
1073 
1074 int
1075 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1076     register_t *retval)
1077 {
1078 	struct filedesc *fdp = p->p_fd;
1079 	struct file *fp;
1080 	struct vnode *vp;
1081 	struct vattr vattr;
1082 	int flags, cloexec, cmode;
1083 	int type, indx, error, localtrunc = 0;
1084 	struct flock lf;
1085 	struct nameidata nd;
1086 	uint64_t ni_pledge = 0;
1087 	u_char ni_unveil = 0;
1088 
1089 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1090 		error = pledge_flock(p);
1091 		if (error != 0)
1092 			return (error);
1093 	}
1094 
1095 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1096 
1097 	fdplock(fdp);
1098 	if ((error = falloc(p, &fp, &indx)) != 0) {
1099 		fdpunlock(fdp);
1100 		return (error);
1101 	}
1102 	fdpunlock(fdp);
1103 
1104 	flags = FFLAGS(oflags);
1105 	if (flags & FREAD) {
1106 		ni_pledge |= PLEDGE_RPATH;
1107 		ni_unveil |= UNVEIL_READ;
1108 	}
1109 	if (flags & FWRITE) {
1110 		ni_pledge |= PLEDGE_WPATH;
1111 		ni_unveil |= UNVEIL_WRITE;
1112 	}
1113 	if (oflags & O_CREAT) {
1114 		ni_pledge |= PLEDGE_CPATH;
1115 		ni_unveil |= UNVEIL_CREATE;
1116 	}
1117 
1118 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1119 	if ((p->p_p->ps_flags & PS_PLEDGE))
1120 		cmode &= ACCESSPERMS;
1121 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1122 	nd.ni_pledge = ni_pledge;
1123 	nd.ni_unveil = ni_unveil;
1124 	p->p_dupfd = -1;			/* XXX check for fdopen */
1125 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1126 		localtrunc = 1;
1127 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1128 	}
1129 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1130 		fdplock(fdp);
1131 		if (error == ENODEV &&
1132 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1133 		    (error =
1134 			dupfdopen(p, indx, flags)) == 0) {
1135 			fdpunlock(fdp);
1136 			closef(fp, p);
1137 			*retval = indx;
1138 			return (error);
1139 		}
1140 		if (error == ERESTART)
1141 			error = EINTR;
1142 		fdremove(fdp, indx);
1143 		fdpunlock(fdp);
1144 		closef(fp, p);
1145 		return (error);
1146 	}
1147 	p->p_dupfd = 0;
1148 	vp = nd.ni_vp;
1149 	fp->f_flag = flags & FMASK;
1150 	fp->f_type = DTYPE_VNODE;
1151 	fp->f_ops = &vnops;
1152 	fp->f_data = vp;
1153 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1154 		lf.l_whence = SEEK_SET;
1155 		lf.l_start = 0;
1156 		lf.l_len = 0;
1157 		if (flags & O_EXLOCK)
1158 			lf.l_type = F_WRLCK;
1159 		else
1160 			lf.l_type = F_RDLCK;
1161 		type = F_FLOCK;
1162 		if ((flags & FNONBLOCK) == 0)
1163 			type |= F_WAIT;
1164 		VOP_UNLOCK(vp);
1165 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1166 		if (error) {
1167 			fdplock(fdp);
1168 			/* closef will vn_close the file for us. */
1169 			fdremove(fdp, indx);
1170 			fdpunlock(fdp);
1171 			closef(fp, p);
1172 			return (error);
1173 		}
1174 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1175 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1176 	}
1177 	if (localtrunc) {
1178 		if ((fp->f_flag & FWRITE) == 0)
1179 			error = EACCES;
1180 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1181 			error = EROFS;
1182 		else if (vp->v_type == VDIR)
1183 			error = EISDIR;
1184 		else if ((error = vn_writechk(vp)) == 0) {
1185 			VATTR_NULL(&vattr);
1186 			vattr.va_size = 0;
1187 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1188 		}
1189 		if (error) {
1190 			VOP_UNLOCK(vp);
1191 			fdplock(fdp);
1192 			/* closef will close the file for us. */
1193 			fdremove(fdp, indx);
1194 			fdpunlock(fdp);
1195 			closef(fp, p);
1196 			return (error);
1197 		}
1198 	}
1199 	VOP_UNLOCK(vp);
1200 	*retval = indx;
1201 	fdplock(fdp);
1202 	fdinsert(fdp, indx, cloexec, fp);
1203 	fdpunlock(fdp);
1204 	FRELE(fp, p);
1205 	return (error);
1206 }
1207 
1208 /*
1209  * Open a new created file (in /tmp) suitable for mmaping.
1210  */
1211 int
1212 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1213 {
1214 	struct sys___tmpfd_args /* {
1215 		syscallarg(int) flags;
1216 	} */ *uap = v;
1217 	struct filedesc *fdp = p->p_fd;
1218 	struct file *fp;
1219 	struct vnode *vp;
1220 	int oflags = SCARG(uap, flags);
1221 	int flags, cloexec, cmode;
1222 	int indx, error;
1223 	unsigned int i;
1224 	struct nameidata nd;
1225 	char path[64];
1226 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1227 
1228 	/* most flags are hardwired */
1229 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1230 
1231 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1232 
1233 	fdplock(fdp);
1234 	if ((error = falloc(p, &fp, &indx)) != 0) {
1235 		fdpunlock(fdp);
1236 		return (error);
1237 	}
1238 	fdpunlock(fdp);
1239 
1240 	flags = FFLAGS(oflags);
1241 
1242 	arc4random_buf(path, sizeof(path));
1243 	memcpy(path, "/tmp/", 5);
1244 	for (i = 5; i < sizeof(path) - 1; i++)
1245 		path[i] = letters[(unsigned char)path[i] & 63];
1246 	path[sizeof(path)-1] = 0;
1247 
1248 	cmode = 0600;
1249 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1250 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1251 		if (error == ERESTART)
1252 			error = EINTR;
1253 		fdplock(fdp);
1254 		fdremove(fdp, indx);
1255 		fdpunlock(fdp);
1256 		closef(fp, p);
1257 		return (error);
1258 	}
1259 	vp = nd.ni_vp;
1260 	fp->f_flag = flags & FMASK;
1261 	fp->f_type = DTYPE_VNODE;
1262 	fp->f_ops = &vnops;
1263 	fp->f_data = vp;
1264 	VOP_UNLOCK(vp);
1265 	*retval = indx;
1266 	fdplock(fdp);
1267 	fdinsert(fdp, indx, cloexec, fp);
1268 	fdpunlock(fdp);
1269 	FRELE(fp, p);
1270 
1271 	/* unlink it */
1272 	/* XXX
1273 	 * there is a wee race here, although it is mostly inconsequential.
1274 	 * perhaps someday we can create a file like object without a name...
1275 	 */
1276 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1277 	    AT_FDCWD, path, p);
1278 	if ((error = namei(&nd)) != 0) {
1279 		printf("can't unlink temp file! %d\n", error);
1280 		error = 0;
1281 	} else {
1282 		vp = nd.ni_vp;
1283 		uvm_vnp_uncache(vp);
1284 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1285 		if (error) {
1286 			printf("error removing vop: %d\n", error);
1287 			error = 0;
1288 		}
1289 	}
1290 
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Get file handle system call
1296  */
1297 int
1298 sys_getfh(struct proc *p, void *v, register_t *retval)
1299 {
1300 	struct sys_getfh_args /* {
1301 		syscallarg(const char *) fname;
1302 		syscallarg(fhandle_t *) fhp;
1303 	} */ *uap = v;
1304 	struct vnode *vp;
1305 	fhandle_t fh;
1306 	int error;
1307 	struct nameidata nd;
1308 
1309 	/*
1310 	 * Must be super user
1311 	 */
1312 	error = suser(p);
1313 	if (error)
1314 		return (error);
1315 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1316 	    SCARG(uap, fname), p);
1317 	error = namei(&nd);
1318 	if (error)
1319 		return (error);
1320 	vp = nd.ni_vp;
1321 	memset(&fh, 0, sizeof(fh));
1322 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1323 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1324 	vput(vp);
1325 	if (error)
1326 		return (error);
1327 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1328 	return (error);
1329 }
1330 
1331 /*
1332  * Open a file given a file handle.
1333  *
1334  * Check permissions, allocate an open file structure,
1335  * and call the device open routine if any.
1336  */
1337 int
1338 sys_fhopen(struct proc *p, void *v, register_t *retval)
1339 {
1340 	struct sys_fhopen_args /* {
1341 		syscallarg(const fhandle_t *) fhp;
1342 		syscallarg(int) flags;
1343 	} */ *uap = v;
1344 	struct filedesc *fdp = p->p_fd;
1345 	struct file *fp;
1346 	struct vnode *vp = NULL;
1347 	struct mount *mp;
1348 	struct ucred *cred = p->p_ucred;
1349 	int flags, cloexec;
1350 	int type, indx, error=0;
1351 	struct flock lf;
1352 	struct vattr va;
1353 	fhandle_t fh;
1354 
1355 	/*
1356 	 * Must be super user
1357 	 */
1358 	if ((error = suser(p)))
1359 		return (error);
1360 
1361 	flags = FFLAGS(SCARG(uap, flags));
1362 	if ((flags & (FREAD | FWRITE)) == 0)
1363 		return (EINVAL);
1364 	if ((flags & O_CREAT))
1365 		return (EINVAL);
1366 
1367 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1368 
1369 	fdplock(fdp);
1370 	if ((error = falloc(p, &fp, &indx)) != 0) {
1371 		fdpunlock(fdp);
1372 		fp = NULL;
1373 		goto bad;
1374 	}
1375 	fdpunlock(fdp);
1376 
1377 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1378 		goto bad;
1379 
1380 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1381 		error = ESTALE;
1382 		goto bad;
1383 	}
1384 
1385 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1386 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1387 		goto bad;
1388 	}
1389 
1390 	/* Now do an effective vn_open */
1391 
1392 	if (vp->v_type == VSOCK) {
1393 		error = EOPNOTSUPP;
1394 		goto bad;
1395 	}
1396 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1397 		error = ENOTDIR;
1398 		goto bad;
1399 	}
1400 	if (flags & FREAD) {
1401 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1402 			goto bad;
1403 	}
1404 	if (flags & (FWRITE | O_TRUNC)) {
1405 		if (vp->v_type == VDIR) {
1406 			error = EISDIR;
1407 			goto bad;
1408 		}
1409 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1410 		    (error = vn_writechk(vp)) != 0)
1411 			goto bad;
1412 	}
1413 	if (flags & O_TRUNC) {
1414 		VATTR_NULL(&va);
1415 		va.va_size = 0;
1416 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1417 			goto bad;
1418 	}
1419 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1420 		goto bad;
1421 	if (flags & FWRITE)
1422 		vp->v_writecount++;
1423 
1424 	/* done with modified vn_open, now finish what sys_open does. */
1425 
1426 	fp->f_flag = flags & FMASK;
1427 	fp->f_type = DTYPE_VNODE;
1428 	fp->f_ops = &vnops;
1429 	fp->f_data = vp;
1430 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1431 		lf.l_whence = SEEK_SET;
1432 		lf.l_start = 0;
1433 		lf.l_len = 0;
1434 		if (flags & O_EXLOCK)
1435 			lf.l_type = F_WRLCK;
1436 		else
1437 			lf.l_type = F_RDLCK;
1438 		type = F_FLOCK;
1439 		if ((flags & FNONBLOCK) == 0)
1440 			type |= F_WAIT;
1441 		VOP_UNLOCK(vp);
1442 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1443 		if (error) {
1444 			vp = NULL;	/* closef will vn_close the file */
1445 			goto bad;
1446 		}
1447 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1448 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1449 	}
1450 	VOP_UNLOCK(vp);
1451 	*retval = indx;
1452 	fdplock(fdp);
1453 	fdinsert(fdp, indx, cloexec, fp);
1454 	fdpunlock(fdp);
1455 	FRELE(fp, p);
1456 	return (0);
1457 
1458 bad:
1459 	if (fp) {
1460 		fdplock(fdp);
1461 		fdremove(fdp, indx);
1462 		fdpunlock(fdp);
1463 		closef(fp, p);
1464 		if (vp != NULL)
1465 			vput(vp);
1466 	}
1467 	return (error);
1468 }
1469 
1470 int
1471 sys_fhstat(struct proc *p, void *v, register_t *retval)
1472 {
1473 	struct sys_fhstat_args /* {
1474 		syscallarg(const fhandle_t *) fhp;
1475 		syscallarg(struct stat *) sb;
1476 	} */ *uap = v;
1477 	struct stat sb;
1478 	int error;
1479 	fhandle_t fh;
1480 	struct mount *mp;
1481 	struct vnode *vp;
1482 
1483 	/*
1484 	 * Must be super user
1485 	 */
1486 	if ((error = suser(p)))
1487 		return (error);
1488 
1489 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1490 		return (error);
1491 
1492 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1493 		return (ESTALE);
1494 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1495 		return (error);
1496 	error = vn_stat(vp, &sb, p);
1497 	vput(vp);
1498 	if (error)
1499 		return (error);
1500 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1501 	return (error);
1502 }
1503 
1504 int
1505 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1506 {
1507 	struct sys_fhstatfs_args /* {
1508 		syscallarg(const fhandle_t *) fhp;
1509 		syscallarg(struct statfs *) buf;
1510 	} */ *uap = v;
1511 	struct statfs *sp;
1512 	fhandle_t fh;
1513 	struct mount *mp;
1514 	struct vnode *vp;
1515 	int error;
1516 
1517 	/*
1518 	 * Must be super user
1519 	 */
1520 	if ((error = suser(p)))
1521 		return (error);
1522 
1523 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1524 		return (error);
1525 
1526 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1527 		return (ESTALE);
1528 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1529 		return (error);
1530 	mp = vp->v_mount;
1531 	sp = &mp->mnt_stat;
1532 	vput(vp);
1533 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1534 		return (error);
1535 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1536 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1537 }
1538 
1539 /*
1540  * Create a special file or named pipe.
1541  */
1542 int
1543 sys_mknod(struct proc *p, void *v, register_t *retval)
1544 {
1545 	struct sys_mknod_args /* {
1546 		syscallarg(const char *) path;
1547 		syscallarg(mode_t) mode;
1548 		syscallarg(int) dev;
1549 	} */ *uap = v;
1550 
1551 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1552 	    SCARG(uap, dev)));
1553 }
1554 
1555 int
1556 sys_mknodat(struct proc *p, void *v, register_t *retval)
1557 {
1558 	struct sys_mknodat_args /* {
1559 		syscallarg(int) fd;
1560 		syscallarg(const char *) path;
1561 		syscallarg(mode_t) mode;
1562 		syscallarg(dev_t) dev;
1563 	} */ *uap = v;
1564 
1565 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1566 	    SCARG(uap, mode), SCARG(uap, dev)));
1567 }
1568 
1569 int
1570 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1571 {
1572 	struct vnode *vp;
1573 	struct vattr vattr;
1574 	int error;
1575 	struct nameidata nd;
1576 
1577 	if (dev == VNOVAL)
1578 		return (EINVAL);
1579 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1580 	nd.ni_pledge = PLEDGE_DPATH;
1581 	nd.ni_unveil = UNVEIL_CREATE;
1582 	if ((error = namei(&nd)) != 0)
1583 		return (error);
1584 	vp = nd.ni_vp;
1585 	if (!S_ISFIFO(mode) || dev != 0) {
1586 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1587 			goto out;
1588 		if (p->p_fd->fd_rdir) {
1589 			error = EINVAL;
1590 			goto out;
1591 		}
1592 	}
1593 	if (vp != NULL)
1594 		error = EEXIST;
1595 	else {
1596 		VATTR_NULL(&vattr);
1597 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1598 		if ((p->p_p->ps_flags & PS_PLEDGE))
1599 			vattr.va_mode &= ACCESSPERMS;
1600 		vattr.va_rdev = dev;
1601 
1602 		switch (mode & S_IFMT) {
1603 		case S_IFMT:	/* used by badsect to flag bad sectors */
1604 			vattr.va_type = VBAD;
1605 			break;
1606 		case S_IFCHR:
1607 			vattr.va_type = VCHR;
1608 			break;
1609 		case S_IFBLK:
1610 			vattr.va_type = VBLK;
1611 			break;
1612 		case S_IFIFO:
1613 #ifndef FIFO
1614 			error = EOPNOTSUPP;
1615 			break;
1616 #else
1617 			if (dev == 0) {
1618 				vattr.va_type = VFIFO;
1619 				break;
1620 			}
1621 			/* FALLTHROUGH */
1622 #endif /* FIFO */
1623 		default:
1624 			error = EINVAL;
1625 			break;
1626 		}
1627 	}
1628 out:
1629 	if (!error) {
1630 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1631 		vput(nd.ni_dvp);
1632 	} else {
1633 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1634 		if (nd.ni_dvp == vp)
1635 			vrele(nd.ni_dvp);
1636 		else
1637 			vput(nd.ni_dvp);
1638 		if (vp)
1639 			vrele(vp);
1640 	}
1641 	return (error);
1642 }
1643 
1644 /*
1645  * Create a named pipe.
1646  */
1647 int
1648 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1649 {
1650 	struct sys_mkfifo_args /* {
1651 		syscallarg(const char *) path;
1652 		syscallarg(mode_t) mode;
1653 	} */ *uap = v;
1654 
1655 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1656 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1657 }
1658 
1659 int
1660 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1661 {
1662 	struct sys_mkfifoat_args /* {
1663 		syscallarg(int) fd;
1664 		syscallarg(const char *) path;
1665 		syscallarg(mode_t) mode;
1666 	} */ *uap = v;
1667 
1668 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1669 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1670 }
1671 
1672 /*
1673  * Make a hard file link.
1674  */
1675 int
1676 sys_link(struct proc *p, void *v, register_t *retval)
1677 {
1678 	struct sys_link_args /* {
1679 		syscallarg(const char *) path;
1680 		syscallarg(const char *) link;
1681 	} */ *uap = v;
1682 
1683 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1684 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1685 }
1686 
1687 int
1688 sys_linkat(struct proc *p, void *v, register_t *retval)
1689 {
1690 	struct sys_linkat_args /* {
1691 		syscallarg(int) fd1;
1692 		syscallarg(const char *) path1;
1693 		syscallarg(int) fd2;
1694 		syscallarg(const char *) path2;
1695 		syscallarg(int) flag;
1696 	} */ *uap = v;
1697 
1698 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1699 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1700 }
1701 
1702 int
1703 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1704     const char *path2, int flag)
1705 {
1706 	struct vnode *vp;
1707 	struct nameidata nd;
1708 	int error, follow;
1709 	int flags;
1710 
1711 	if (flag & ~AT_SYMLINK_FOLLOW)
1712 		return (EINVAL);
1713 
1714 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1715 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1716 	nd.ni_pledge = PLEDGE_RPATH;
1717 	nd.ni_unveil = UNVEIL_READ;
1718 	if ((error = namei(&nd)) != 0)
1719 		return (error);
1720 	vp = nd.ni_vp;
1721 
1722 	flags = LOCKPARENT;
1723 	if (vp->v_type == VDIR) {
1724 		flags |= STRIPSLASHES;
1725 	}
1726 
1727 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1728 	nd.ni_pledge = PLEDGE_CPATH;
1729 	nd.ni_unveil = UNVEIL_CREATE;
1730 	if ((error = namei(&nd)) != 0)
1731 		goto out;
1732 	if (nd.ni_vp) {
1733 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1734 		if (nd.ni_dvp == nd.ni_vp)
1735 			vrele(nd.ni_dvp);
1736 		else
1737 			vput(nd.ni_dvp);
1738 		vrele(nd.ni_vp);
1739 		error = EEXIST;
1740 		goto out;
1741 	}
1742 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1743 out:
1744 	vrele(vp);
1745 	return (error);
1746 }
1747 
1748 /*
1749  * Make a symbolic link.
1750  */
1751 int
1752 sys_symlink(struct proc *p, void *v, register_t *retval)
1753 {
1754 	struct sys_symlink_args /* {
1755 		syscallarg(const char *) path;
1756 		syscallarg(const char *) link;
1757 	} */ *uap = v;
1758 
1759 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1760 }
1761 
1762 int
1763 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1764 {
1765 	struct sys_symlinkat_args /* {
1766 		syscallarg(const char *) path;
1767 		syscallarg(int) fd;
1768 		syscallarg(const char *) link;
1769 	} */ *uap = v;
1770 
1771 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1772 	    SCARG(uap, link)));
1773 }
1774 
1775 int
1776 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1777 {
1778 	struct vattr vattr;
1779 	char *path;
1780 	int error;
1781 	struct nameidata nd;
1782 
1783 	path = pool_get(&namei_pool, PR_WAITOK);
1784 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1785 	if (error)
1786 		goto out;
1787 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1788 	nd.ni_pledge = PLEDGE_CPATH;
1789 	nd.ni_unveil = UNVEIL_CREATE;
1790 	if ((error = namei(&nd)) != 0)
1791 		goto out;
1792 	if (nd.ni_vp) {
1793 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1794 		if (nd.ni_dvp == nd.ni_vp)
1795 			vrele(nd.ni_dvp);
1796 		else
1797 			vput(nd.ni_dvp);
1798 		vrele(nd.ni_vp);
1799 		error = EEXIST;
1800 		goto out;
1801 	}
1802 	VATTR_NULL(&vattr);
1803 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1804 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1805 out:
1806 	pool_put(&namei_pool, path);
1807 	return (error);
1808 }
1809 
1810 /*
1811  * Delete a name from the filesystem.
1812  */
1813 int
1814 sys_unlink(struct proc *p, void *v, register_t *retval)
1815 {
1816 	struct sys_unlink_args /* {
1817 		syscallarg(const char *) path;
1818 	} */ *uap = v;
1819 
1820 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1821 }
1822 
1823 int
1824 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1825 {
1826 	struct sys_unlinkat_args /* {
1827 		syscallarg(int) fd;
1828 		syscallarg(const char *) path;
1829 		syscallarg(int) flag;
1830 	} */ *uap = v;
1831 
1832 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1833 	    SCARG(uap, flag)));
1834 }
1835 
1836 int
1837 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1838 {
1839 	struct vnode *vp;
1840 	int error;
1841 	struct nameidata nd;
1842 
1843 	if (flag & ~AT_REMOVEDIR)
1844 		return (EINVAL);
1845 
1846 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1847 	    fd, path, p);
1848 	nd.ni_pledge = PLEDGE_CPATH;
1849 	nd.ni_unveil = UNVEIL_CREATE;
1850 	if ((error = namei(&nd)) != 0)
1851 		return (error);
1852 	vp = nd.ni_vp;
1853 
1854 	if (flag & AT_REMOVEDIR) {
1855 		if (vp->v_type != VDIR) {
1856 			error = ENOTDIR;
1857 			goto out;
1858 		}
1859 		/*
1860 		 * No rmdir "." please.
1861 		 */
1862 		if (nd.ni_dvp == vp) {
1863 			error = EINVAL;
1864 			goto out;
1865 		}
1866 		/*
1867 		 * A mounted on directory cannot be deleted.
1868 		 */
1869 		if (vp->v_mountedhere != NULL) {
1870 			error = EBUSY;
1871 			goto out;
1872 		}
1873 	}
1874 
1875 	/*
1876 	 * The root of a mounted filesystem cannot be deleted.
1877 	 */
1878 	if (vp->v_flag & VROOT)
1879 		error = EBUSY;
1880 out:
1881 	if (!error) {
1882 		if (flag & AT_REMOVEDIR) {
1883 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1884 		} else {
1885 			(void)uvm_vnp_uncache(vp);
1886 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1887 		}
1888 	} else {
1889 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1890 		if (nd.ni_dvp == vp)
1891 			vrele(nd.ni_dvp);
1892 		else
1893 			vput(nd.ni_dvp);
1894 		vput(vp);
1895 	}
1896 	return (error);
1897 }
1898 
1899 /*
1900  * Reposition read/write file offset.
1901  */
1902 int
1903 sys_lseek(struct proc *p, void *v, register_t *retval)
1904 {
1905 	struct sys_lseek_args /* {
1906 		syscallarg(int) fd;
1907 		syscallarg(off_t) offset;
1908 		syscallarg(int) whence;
1909 	} */ *uap = v;
1910 	struct filedesc *fdp = p->p_fd;
1911 	struct file *fp;
1912 	off_t offset;
1913 	int error;
1914 
1915 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1916 		return (EBADF);
1917 	if (fp->f_ops->fo_seek == NULL) {
1918 		error = ESPIPE;
1919 		goto bad;
1920 	}
1921 	offset = SCARG(uap, offset);
1922 
1923 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1924 	if (error)
1925 		goto bad;
1926 
1927 	*(off_t *)retval = offset;
1928 	mtx_enter(&fp->f_mtx);
1929 	fp->f_seek++;
1930 	mtx_leave(&fp->f_mtx);
1931 	error = 0;
1932  bad:
1933 	FRELE(fp, p);
1934 	return (error);
1935 }
1936 
1937 #if 1
1938 int
1939 sys_pad_lseek(struct proc *p, void *v, register_t *retval)
1940 {
1941 	struct sys_pad_lseek_args *uap = v;
1942 	struct sys_lseek_args unpad;
1943 
1944 	SCARG(&unpad, fd) = SCARG(uap, fd);
1945 	SCARG(&unpad, offset) = SCARG(uap, offset);
1946 	SCARG(&unpad, whence) = SCARG(uap, whence);
1947 	return sys_lseek(p, &unpad, retval);
1948 }
1949 #endif
1950 
1951 /*
1952  * Check access permissions.
1953  */
1954 int
1955 sys_access(struct proc *p, void *v, register_t *retval)
1956 {
1957 	struct sys_access_args /* {
1958 		syscallarg(const char *) path;
1959 		syscallarg(int) amode;
1960 	} */ *uap = v;
1961 
1962 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1963 	    SCARG(uap, amode), 0));
1964 }
1965 
1966 int
1967 sys_faccessat(struct proc *p, void *v, register_t *retval)
1968 {
1969 	struct sys_faccessat_args /* {
1970 		syscallarg(int) fd;
1971 		syscallarg(const char *) path;
1972 		syscallarg(int) amode;
1973 		syscallarg(int) flag;
1974 	} */ *uap = v;
1975 
1976 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1977 	    SCARG(uap, amode), SCARG(uap, flag)));
1978 }
1979 
1980 int
1981 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1982 {
1983 	struct vnode *vp;
1984 	struct ucred *newcred, *oldcred;
1985 	struct nameidata nd;
1986 	int error;
1987 
1988 	if (amode & ~(R_OK | W_OK | X_OK))
1989 		return (EINVAL);
1990 	if (flag & ~AT_EACCESS)
1991 		return (EINVAL);
1992 
1993 	newcred = NULL;
1994 	oldcred = p->p_ucred;
1995 
1996 	/*
1997 	 * If access as real ids was requested and they really differ,
1998 	 * give the thread new creds with them reset
1999 	 */
2000 	if ((flag & AT_EACCESS) == 0 &&
2001 	    (oldcred->cr_uid != oldcred->cr_ruid ||
2002 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
2003 		p->p_ucred = newcred = crdup(oldcred);
2004 		newcred->cr_uid = newcred->cr_ruid;
2005 		newcred->cr_gid = newcred->cr_rgid;
2006 	}
2007 
2008 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2009 	nd.ni_pledge = PLEDGE_RPATH;
2010 	nd.ni_unveil = UNVEIL_READ;
2011 	if ((error = namei(&nd)) != 0)
2012 		goto out;
2013 	vp = nd.ni_vp;
2014 
2015 	/* Flags == 0 means only check for existence. */
2016 	if (amode) {
2017 		int vflags = 0;
2018 
2019 		if (amode & R_OK)
2020 			vflags |= VREAD;
2021 		if (amode & W_OK)
2022 			vflags |= VWRITE;
2023 		if (amode & X_OK)
2024 			vflags |= VEXEC;
2025 
2026 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2027 		if (!error && (vflags & VWRITE))
2028 			error = vn_writechk(vp);
2029 	}
2030 	vput(vp);
2031 out:
2032 	if (newcred != NULL) {
2033 		p->p_ucred = oldcred;
2034 		crfree(newcred);
2035 	}
2036 	return (error);
2037 }
2038 
2039 /*
2040  * Get file status; this version follows links.
2041  */
2042 int
2043 sys_stat(struct proc *p, void *v, register_t *retval)
2044 {
2045 	struct sys_stat_args /* {
2046 		syscallarg(const char *) path;
2047 		syscallarg(struct stat *) ub;
2048 	} */ *uap = v;
2049 
2050 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2051 }
2052 
2053 int
2054 sys_fstatat(struct proc *p, void *v, register_t *retval)
2055 {
2056 	struct sys_fstatat_args /* {
2057 		syscallarg(int) fd;
2058 		syscallarg(const char *) path;
2059 		syscallarg(struct stat *) buf;
2060 		syscallarg(int) flag;
2061 	} */ *uap = v;
2062 
2063 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2064 	    SCARG(uap, buf), SCARG(uap, flag)));
2065 }
2066 
2067 int
2068 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2069 {
2070 	struct stat sb;
2071 	int error, follow;
2072 	struct nameidata nd;
2073 
2074 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2075 		return (EINVAL);
2076 
2077 
2078 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2079 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2080 	nd.ni_pledge = PLEDGE_RPATH;
2081 	nd.ni_unveil = UNVEIL_READ;
2082 	if ((error = namei(&nd)) != 0)
2083 		return (error);
2084 	error = vn_stat(nd.ni_vp, &sb, p);
2085 	vput(nd.ni_vp);
2086 	if (error)
2087 		return (error);
2088 	/* Don't let non-root see generation numbers (for NFS security) */
2089 	if (suser(p))
2090 		sb.st_gen = 0;
2091 	error = copyout(&sb, buf, sizeof(sb));
2092 #ifdef KTRACE
2093 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2094 		ktrstat(p, &sb);
2095 #endif
2096 	return (error);
2097 }
2098 
2099 /*
2100  * Get file status; this version does not follow links.
2101  */
2102 int
2103 sys_lstat(struct proc *p, void *v, register_t *retval)
2104 {
2105 	struct sys_lstat_args /* {
2106 		syscallarg(const char *) path;
2107 		syscallarg(struct stat *) ub;
2108 	} */ *uap = v;
2109 
2110 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2111 	    AT_SYMLINK_NOFOLLOW));
2112 }
2113 
2114 /*
2115  * Get configurable pathname variables.
2116  */
2117 int
2118 sys_pathconf(struct proc *p, void *v, register_t *retval)
2119 {
2120 	struct sys_pathconf_args /* {
2121 		syscallarg(const char *) path;
2122 		syscallarg(int) name;
2123 	} */ *uap = v;
2124 	int error;
2125 	struct nameidata nd;
2126 
2127 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2128 	    SCARG(uap, path), p);
2129 	nd.ni_pledge = PLEDGE_RPATH;
2130 	nd.ni_unveil = UNVEIL_READ;
2131 	if ((error = namei(&nd)) != 0)
2132 		return (error);
2133 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2134 	vput(nd.ni_vp);
2135 	return (error);
2136 }
2137 
2138 /*
2139  * Return target name of a symbolic link.
2140  */
2141 int
2142 sys_readlink(struct proc *p, void *v, register_t *retval)
2143 {
2144 	struct sys_readlink_args /* {
2145 		syscallarg(const char *) path;
2146 		syscallarg(char *) buf;
2147 		syscallarg(size_t) count;
2148 	} */ *uap = v;
2149 
2150 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2151 	    SCARG(uap, count), retval));
2152 }
2153 
2154 int
2155 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2156 {
2157 	struct sys_readlinkat_args /* {
2158 		syscallarg(int) fd;
2159 		syscallarg(const char *) path;
2160 		syscallarg(char *) buf;
2161 		syscallarg(size_t) count;
2162 	} */ *uap = v;
2163 
2164 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2165 	    SCARG(uap, buf), SCARG(uap, count), retval));
2166 }
2167 
2168 int
2169 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2170     size_t count, register_t *retval)
2171 {
2172 	struct vnode *vp;
2173 	struct iovec aiov;
2174 	struct uio auio;
2175 	int error;
2176 	struct nameidata nd;
2177 
2178 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2179 	nd.ni_pledge = PLEDGE_RPATH;
2180 	nd.ni_unveil = UNVEIL_READ;
2181 	if ((error = namei(&nd)) != 0)
2182 		return (error);
2183 	vp = nd.ni_vp;
2184 	if (vp->v_type != VLNK)
2185 		error = EINVAL;
2186 	else {
2187 		aiov.iov_base = buf;
2188 		aiov.iov_len = count;
2189 		auio.uio_iov = &aiov;
2190 		auio.uio_iovcnt = 1;
2191 		auio.uio_offset = 0;
2192 		auio.uio_rw = UIO_READ;
2193 		auio.uio_segflg = UIO_USERSPACE;
2194 		auio.uio_procp = p;
2195 		auio.uio_resid = count;
2196 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2197 		*retval = count - auio.uio_resid;
2198 	}
2199 	vput(vp);
2200 	return (error);
2201 }
2202 
2203 /*
2204  * Change flags of a file given a path name.
2205  */
2206 int
2207 sys_chflags(struct proc *p, void *v, register_t *retval)
2208 {
2209 	struct sys_chflags_args /* {
2210 		syscallarg(const char *) path;
2211 		syscallarg(u_int) flags;
2212 	} */ *uap = v;
2213 
2214 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2215 	    SCARG(uap, flags), 0));
2216 }
2217 
2218 int
2219 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2220 {
2221 	struct sys_chflagsat_args /* {
2222 		syscallarg(int) fd;
2223 		syscallarg(const char *) path;
2224 		syscallarg(u_int) flags;
2225 		syscallarg(int) atflags;
2226 	} */ *uap = v;
2227 
2228 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2229 	    SCARG(uap, flags), SCARG(uap, atflags)));
2230 }
2231 
2232 int
2233 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2234 {
2235 	struct nameidata nd;
2236 	int error, follow;
2237 
2238 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2239 		return (EINVAL);
2240 
2241 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2242 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2243 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2244 	nd.ni_unveil = UNVEIL_WRITE;
2245 	if ((error = namei(&nd)) != 0)
2246 		return (error);
2247 	return (dovchflags(p, nd.ni_vp, flags));
2248 }
2249 
2250 /*
2251  * Change flags of a file given a file descriptor.
2252  */
2253 int
2254 sys_fchflags(struct proc *p, void *v, register_t *retval)
2255 {
2256 	struct sys_fchflags_args /* {
2257 		syscallarg(int) fd;
2258 		syscallarg(u_int) flags;
2259 	} */ *uap = v;
2260 	struct file *fp;
2261 	struct vnode *vp;
2262 	int error;
2263 
2264 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2265 		return (error);
2266 	vp = fp->f_data;
2267 	vref(vp);
2268 	FRELE(fp, p);
2269 	return (dovchflags(p, vp, SCARG(uap, flags)));
2270 }
2271 
2272 int
2273 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2274 {
2275 	struct vattr vattr;
2276 	int error;
2277 
2278 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2279 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2280 		error = EROFS;
2281 	else if (flags == VNOVAL)
2282 		error = EINVAL;
2283 	else {
2284 		if (suser(p)) {
2285 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2286 			    != 0)
2287 				goto out;
2288 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2289 				error = EINVAL;
2290 				goto out;
2291 			}
2292 		}
2293 		VATTR_NULL(&vattr);
2294 		vattr.va_flags = flags;
2295 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2296 	}
2297 out:
2298 	vput(vp);
2299 	return (error);
2300 }
2301 
2302 /*
2303  * Change mode of a file given path name.
2304  */
2305 int
2306 sys_chmod(struct proc *p, void *v, register_t *retval)
2307 {
2308 	struct sys_chmod_args /* {
2309 		syscallarg(const char *) path;
2310 		syscallarg(mode_t) mode;
2311 	} */ *uap = v;
2312 
2313 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2314 }
2315 
2316 int
2317 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2318 {
2319 	struct sys_fchmodat_args /* {
2320 		syscallarg(int) fd;
2321 		syscallarg(const char *) path;
2322 		syscallarg(mode_t) mode;
2323 		syscallarg(int) flag;
2324 	} */ *uap = v;
2325 
2326 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2327 	    SCARG(uap, mode), SCARG(uap, flag)));
2328 }
2329 
2330 int
2331 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2332 {
2333 	struct vnode *vp;
2334 	struct vattr vattr;
2335 	int error, follow;
2336 	struct nameidata nd;
2337 
2338 	if (mode & ~(S_IFMT | ALLPERMS))
2339 		return (EINVAL);
2340 	if ((p->p_p->ps_flags & PS_PLEDGE))
2341 		mode &= ACCESSPERMS;
2342 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2343 		return (EINVAL);
2344 
2345 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2346 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2347 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2348 	nd.ni_unveil = UNVEIL_WRITE;
2349 	if ((error = namei(&nd)) != 0)
2350 		return (error);
2351 	vp = nd.ni_vp;
2352 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2353 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2354 		error = EROFS;
2355 	else {
2356 		VATTR_NULL(&vattr);
2357 		vattr.va_mode = mode & ALLPERMS;
2358 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2359 	}
2360 	vput(vp);
2361 	return (error);
2362 }
2363 
2364 /*
2365  * Change mode of a file given a file descriptor.
2366  */
2367 int
2368 sys_fchmod(struct proc *p, void *v, register_t *retval)
2369 {
2370 	struct sys_fchmod_args /* {
2371 		syscallarg(int) fd;
2372 		syscallarg(mode_t) mode;
2373 	} */ *uap = v;
2374 	struct vattr vattr;
2375 	struct vnode *vp;
2376 	struct file *fp;
2377 	mode_t mode = SCARG(uap, mode);
2378 	int error;
2379 
2380 	if (mode & ~(S_IFMT | ALLPERMS))
2381 		return (EINVAL);
2382 	if ((p->p_p->ps_flags & PS_PLEDGE))
2383 		mode &= ACCESSPERMS;
2384 
2385 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2386 		return (error);
2387 	vp = fp->f_data;
2388 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2389 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2390 		error = EROFS;
2391 	else {
2392 		VATTR_NULL(&vattr);
2393 		vattr.va_mode = mode & ALLPERMS;
2394 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2395 	}
2396 	VOP_UNLOCK(vp);
2397 	FRELE(fp, p);
2398 	return (error);
2399 }
2400 
2401 /*
2402  * Set ownership given a path name.
2403  */
2404 int
2405 sys_chown(struct proc *p, void *v, register_t *retval)
2406 {
2407 	struct sys_chown_args /* {
2408 		syscallarg(const char *) path;
2409 		syscallarg(uid_t) uid;
2410 		syscallarg(gid_t) gid;
2411 	} */ *uap = v;
2412 
2413 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2414 	    SCARG(uap, gid), 0));
2415 }
2416 
2417 int
2418 sys_fchownat(struct proc *p, void *v, register_t *retval)
2419 {
2420 	struct sys_fchownat_args /* {
2421 		syscallarg(int) fd;
2422 		syscallarg(const char *) path;
2423 		syscallarg(uid_t) uid;
2424 		syscallarg(gid_t) gid;
2425 		syscallarg(int) flag;
2426 	} */ *uap = v;
2427 
2428 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2429 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2430 }
2431 
2432 int
2433 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2434     int flag)
2435 {
2436 	struct vnode *vp;
2437 	struct vattr vattr;
2438 	int error, follow;
2439 	struct nameidata nd;
2440 	mode_t mode;
2441 
2442 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2443 		return (EINVAL);
2444 
2445 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2446 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2447 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2448 	nd.ni_unveil = UNVEIL_WRITE;
2449 	if ((error = namei(&nd)) != 0)
2450 		return (error);
2451 	vp = nd.ni_vp;
2452 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2453 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2454 		error = EROFS;
2455 	else {
2456 		if ((error = pledge_chown(p, uid, gid)))
2457 			goto out;
2458 		if ((uid != -1 || gid != -1) &&
2459 		    !vnoperm(vp) &&
2460 		    (suser(p) || suid_clear)) {
2461 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2462 			if (error)
2463 				goto out;
2464 			mode = vattr.va_mode & ~(VSUID | VSGID);
2465 			if (mode == vattr.va_mode)
2466 				mode = VNOVAL;
2467 		} else
2468 			mode = VNOVAL;
2469 		VATTR_NULL(&vattr);
2470 		vattr.va_uid = uid;
2471 		vattr.va_gid = gid;
2472 		vattr.va_mode = mode;
2473 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2474 	}
2475 out:
2476 	vput(vp);
2477 	return (error);
2478 }
2479 
2480 /*
2481  * Set ownership given a path name, without following links.
2482  */
2483 int
2484 sys_lchown(struct proc *p, void *v, register_t *retval)
2485 {
2486 	struct sys_lchown_args /* {
2487 		syscallarg(const char *) path;
2488 		syscallarg(uid_t) uid;
2489 		syscallarg(gid_t) gid;
2490 	} */ *uap = v;
2491 	struct vnode *vp;
2492 	struct vattr vattr;
2493 	int error;
2494 	struct nameidata nd;
2495 	mode_t mode;
2496 	uid_t uid = SCARG(uap, uid);
2497 	gid_t gid = SCARG(uap, gid);
2498 
2499 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2500 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2501 	nd.ni_unveil = UNVEIL_WRITE;
2502 	if ((error = namei(&nd)) != 0)
2503 		return (error);
2504 	vp = nd.ni_vp;
2505 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2506 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2507 		error = EROFS;
2508 	else {
2509 		if ((error = pledge_chown(p, uid, gid)))
2510 			goto out;
2511 		if ((uid != -1 || gid != -1) &&
2512 		    !vnoperm(vp) &&
2513 		    (suser(p) || suid_clear)) {
2514 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2515 			if (error)
2516 				goto out;
2517 			mode = vattr.va_mode & ~(VSUID | VSGID);
2518 			if (mode == vattr.va_mode)
2519 				mode = VNOVAL;
2520 		} else
2521 			mode = VNOVAL;
2522 		VATTR_NULL(&vattr);
2523 		vattr.va_uid = uid;
2524 		vattr.va_gid = gid;
2525 		vattr.va_mode = mode;
2526 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2527 	}
2528 out:
2529 	vput(vp);
2530 	return (error);
2531 }
2532 
2533 /*
2534  * Set ownership given a file descriptor.
2535  */
2536 int
2537 sys_fchown(struct proc *p, void *v, register_t *retval)
2538 {
2539 	struct sys_fchown_args /* {
2540 		syscallarg(int) fd;
2541 		syscallarg(uid_t) uid;
2542 		syscallarg(gid_t) gid;
2543 	} */ *uap = v;
2544 	struct vnode *vp;
2545 	struct vattr vattr;
2546 	int error;
2547 	struct file *fp;
2548 	mode_t mode;
2549 	uid_t uid = SCARG(uap, uid);
2550 	gid_t gid = SCARG(uap, gid);
2551 
2552 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2553 		return (error);
2554 	vp = fp->f_data;
2555 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2556 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2557 		error = EROFS;
2558 	else {
2559 		if ((error = pledge_chown(p, uid, gid)))
2560 			goto out;
2561 		if ((uid != -1 || gid != -1) &&
2562 		    !vnoperm(vp) &&
2563 		    (suser(p) || suid_clear)) {
2564 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2565 			if (error)
2566 				goto out;
2567 			mode = vattr.va_mode & ~(VSUID | VSGID);
2568 			if (mode == vattr.va_mode)
2569 				mode = VNOVAL;
2570 		} else
2571 			mode = VNOVAL;
2572 		VATTR_NULL(&vattr);
2573 		vattr.va_uid = uid;
2574 		vattr.va_gid = gid;
2575 		vattr.va_mode = mode;
2576 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2577 	}
2578 out:
2579 	VOP_UNLOCK(vp);
2580 	FRELE(fp, p);
2581 	return (error);
2582 }
2583 
2584 /*
2585  * Set the access and modification times given a path name.
2586  */
2587 int
2588 sys_utimes(struct proc *p, void *v, register_t *retval)
2589 {
2590 	struct sys_utimes_args /* {
2591 		syscallarg(const char *) path;
2592 		syscallarg(const struct timeval *) tptr;
2593 	} */ *uap = v;
2594 
2595 	struct timespec ts[2];
2596 	struct timeval tv[2];
2597 	const struct timeval *tvp;
2598 	int error;
2599 
2600 	tvp = SCARG(uap, tptr);
2601 	if (tvp != NULL) {
2602 		error = copyin(tvp, tv, sizeof(tv));
2603 		if (error)
2604 			return (error);
2605 #ifdef KTRACE
2606 		if (KTRPOINT(p, KTR_STRUCT))
2607 			ktrabstimeval(p, &tv);
2608 #endif
2609 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2610 			return (EINVAL);
2611 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2612 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2613 	} else
2614 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2615 
2616 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2617 }
2618 
2619 int
2620 sys_utimensat(struct proc *p, void *v, register_t *retval)
2621 {
2622 	struct sys_utimensat_args /* {
2623 		syscallarg(int) fd;
2624 		syscallarg(const char *) path;
2625 		syscallarg(const struct timespec *) times;
2626 		syscallarg(int) flag;
2627 	} */ *uap = v;
2628 
2629 	struct timespec ts[2];
2630 	const struct timespec *tsp;
2631 	int error, i;
2632 
2633 	tsp = SCARG(uap, times);
2634 	if (tsp != NULL) {
2635 		error = copyin(tsp, ts, sizeof(ts));
2636 		if (error)
2637 			return (error);
2638 		for (i = 0; i < nitems(ts); i++) {
2639 			if (ts[i].tv_nsec == UTIME_NOW)
2640 				continue;
2641 			if (ts[i].tv_nsec == UTIME_OMIT)
2642 				continue;
2643 #ifdef KTRACE
2644 			if (KTRPOINT(p, KTR_STRUCT))
2645 				ktrabstimespec(p, &ts[i]);
2646 #endif
2647 			if (!timespecisvalid(&ts[i]))
2648 				return (EINVAL);
2649 		}
2650 	} else
2651 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2652 
2653 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2654 	    SCARG(uap, flag)));
2655 }
2656 
2657 int
2658 doutimensat(struct proc *p, int fd, const char *path,
2659     struct timespec ts[2], int flag)
2660 {
2661 	struct vnode *vp;
2662 	int error, follow;
2663 	struct nameidata nd;
2664 
2665 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2666 		return (EINVAL);
2667 
2668 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2669 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2670 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2671 	nd.ni_unveil = UNVEIL_WRITE;
2672 	if ((error = namei(&nd)) != 0)
2673 		return (error);
2674 	vp = nd.ni_vp;
2675 
2676 	return (dovutimens(p, vp, ts));
2677 }
2678 
2679 int
2680 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2681 {
2682 	struct vattr vattr;
2683 	struct timespec now;
2684 	int error;
2685 
2686 #ifdef KTRACE
2687 	/* if they're both UTIME_NOW, then don't report either */
2688 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2689 	    KTRPOINT(p, KTR_STRUCT)) {
2690 		ktrabstimespec(p, &ts[0]);
2691 		ktrabstimespec(p, &ts[1]);
2692 	}
2693 #endif
2694 
2695 	VATTR_NULL(&vattr);
2696 
2697 	/*  make sure ctime is updated even if neither mtime nor atime is */
2698 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2699 
2700 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2701 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2702 			vattr.va_vaflags |= VA_UTIMES_NULL;
2703 
2704 		getnanotime(&now);
2705 		if (ts[0].tv_nsec == UTIME_NOW)
2706 			ts[0] = now;
2707 		if (ts[1].tv_nsec == UTIME_NOW)
2708 			ts[1] = now;
2709 	}
2710 
2711 	if (ts[0].tv_nsec != UTIME_OMIT)
2712 		vattr.va_atime = ts[0];
2713 	if (ts[1].tv_nsec != UTIME_OMIT)
2714 		vattr.va_mtime = ts[1];
2715 
2716 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2717 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2718 		error = EROFS;
2719 	else
2720 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2721 	vput(vp);
2722 	return (error);
2723 }
2724 
2725 /*
2726  * Set the access and modification times given a file descriptor.
2727  */
2728 int
2729 sys_futimes(struct proc *p, void *v, register_t *retval)
2730 {
2731 	struct sys_futimes_args /* {
2732 		syscallarg(int) fd;
2733 		syscallarg(const struct timeval *) tptr;
2734 	} */ *uap = v;
2735 	struct timeval tv[2];
2736 	struct timespec ts[2];
2737 	const struct timeval *tvp;
2738 	int error;
2739 
2740 	tvp = SCARG(uap, tptr);
2741 	if (tvp != NULL) {
2742 		error = copyin(tvp, tv, sizeof(tv));
2743 		if (error)
2744 			return (error);
2745 #ifdef KTRACE
2746 		if (KTRPOINT(p, KTR_STRUCT)) {
2747 			ktrabstimeval(p, &tv[0]);
2748 			ktrabstimeval(p, &tv[1]);
2749 		}
2750 #endif
2751 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2752 			return (EINVAL);
2753 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2754 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2755 	} else
2756 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2757 
2758 	return (dofutimens(p, SCARG(uap, fd), ts));
2759 }
2760 
2761 int
2762 sys_futimens(struct proc *p, void *v, register_t *retval)
2763 {
2764 	struct sys_futimens_args /* {
2765 		syscallarg(int) fd;
2766 		syscallarg(const struct timespec *) times;
2767 	} */ *uap = v;
2768 	struct timespec ts[2];
2769 	const struct timespec *tsp;
2770 	int error, i;
2771 
2772 	tsp = SCARG(uap, times);
2773 	if (tsp != NULL) {
2774 		error = copyin(tsp, ts, sizeof(ts));
2775 		if (error)
2776 			return (error);
2777 		for (i = 0; i < nitems(ts); i++) {
2778 			if (ts[i].tv_nsec == UTIME_NOW)
2779 				continue;
2780 			if (ts[i].tv_nsec == UTIME_OMIT)
2781 				continue;
2782 #ifdef KTRACE
2783 			if (KTRPOINT(p, KTR_STRUCT))
2784 				ktrabstimespec(p, &ts[i]);
2785 #endif
2786 			if (!timespecisvalid(&ts[i]))
2787 				return (EINVAL);
2788 		}
2789 	} else
2790 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2791 
2792 	return (dofutimens(p, SCARG(uap, fd), ts));
2793 }
2794 
2795 int
2796 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2797 {
2798 	struct file *fp;
2799 	struct vnode *vp;
2800 	int error;
2801 
2802 	if ((error = getvnode(p, fd, &fp)) != 0)
2803 		return (error);
2804 	vp = fp->f_data;
2805 	vref(vp);
2806 	FRELE(fp, p);
2807 
2808 	return (dovutimens(p, vp, ts));
2809 }
2810 
2811 /*
2812  * Truncate a file given its path name.
2813  */
2814 int
2815 sys_truncate(struct proc *p, void *v, register_t *retval)
2816 {
2817 	struct sys_truncate_args /* {
2818 		syscallarg(const char *) path;
2819 		syscallarg(off_t) length;
2820 	} */ *uap = v;
2821 	struct vnode *vp;
2822 	struct vattr vattr;
2823 	int error;
2824 	struct nameidata nd;
2825 
2826 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2827 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2828 	nd.ni_unveil = UNVEIL_WRITE;
2829 	if ((error = namei(&nd)) != 0)
2830 		return (error);
2831 	vp = nd.ni_vp;
2832 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2833 	if (vp->v_type == VDIR)
2834 		error = EISDIR;
2835 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2836 	    (error = vn_writechk(vp)) == 0) {
2837 		VATTR_NULL(&vattr);
2838 		vattr.va_size = SCARG(uap, length);
2839 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2840 	}
2841 	vput(vp);
2842 	return (error);
2843 }
2844 
2845 /*
2846  * Truncate a file given a file descriptor.
2847  */
2848 int
2849 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2850 {
2851 	struct sys_ftruncate_args /* {
2852 		syscallarg(int) fd;
2853 		syscallarg(off_t) length;
2854 	} */ *uap = v;
2855 	struct vattr vattr;
2856 	struct vnode *vp;
2857 	struct file *fp;
2858 	off_t len;
2859 	int error;
2860 
2861 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2862 		return (error);
2863 	len = SCARG(uap, length);
2864 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2865 		error = EINVAL;
2866 		goto bad;
2867 	}
2868 	vp = fp->f_data;
2869 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2870 	if (vp->v_type == VDIR)
2871 		error = EISDIR;
2872 	else if ((error = vn_writechk(vp)) == 0) {
2873 		VATTR_NULL(&vattr);
2874 		vattr.va_size = len;
2875 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2876 	}
2877 	VOP_UNLOCK(vp);
2878 bad:
2879 	FRELE(fp, p);
2880 	return (error);
2881 }
2882 
2883 #if 1
2884 int
2885 sys_pad_truncate(struct proc *p, void *v, register_t *retval)
2886 {
2887 	struct sys_pad_truncate_args *uap = v;
2888 	struct sys_truncate_args unpad;
2889 
2890 	SCARG(&unpad, path) = SCARG(uap, path);
2891 	SCARG(&unpad, length) = SCARG(uap, length);
2892 	return sys_truncate(p, &unpad, retval);
2893 }
2894 
2895 int
2896 sys_pad_ftruncate(struct proc *p, void *v, register_t *retval)
2897 {
2898 	struct sys_pad_ftruncate_args *uap = v;
2899 	struct sys_ftruncate_args unpad;
2900 
2901 	SCARG(&unpad, fd) = SCARG(uap, fd);
2902 	SCARG(&unpad, length) = SCARG(uap, length);
2903 	return sys_ftruncate(p, &unpad, retval);
2904 }
2905 #endif
2906 
2907 /*
2908  * Sync an open file.
2909  */
2910 int
2911 sys_fsync(struct proc *p, void *v, register_t *retval)
2912 {
2913 	struct sys_fsync_args /* {
2914 		syscallarg(int) fd;
2915 	} */ *uap = v;
2916 	struct vnode *vp;
2917 	struct file *fp;
2918 	int error;
2919 
2920 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2921 		return (error);
2922 	vp = fp->f_data;
2923 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2924 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2925 #ifdef FFS_SOFTUPDATES
2926 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2927 		error = softdep_fsync(vp);
2928 #endif
2929 
2930 	VOP_UNLOCK(vp);
2931 	FRELE(fp, p);
2932 	return (error);
2933 }
2934 
2935 /*
2936  * Rename files.  Source and destination must either both be directories,
2937  * or both not be directories.  If target is a directory, it must be empty.
2938  */
2939 int
2940 sys_rename(struct proc *p, void *v, register_t *retval)
2941 {
2942 	struct sys_rename_args /* {
2943 		syscallarg(const char *) from;
2944 		syscallarg(const char *) to;
2945 	} */ *uap = v;
2946 
2947 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2948 	    SCARG(uap, to)));
2949 }
2950 
2951 int
2952 sys_renameat(struct proc *p, void *v, register_t *retval)
2953 {
2954 	struct sys_renameat_args /* {
2955 		syscallarg(int) fromfd;
2956 		syscallarg(const char *) from;
2957 		syscallarg(int) tofd;
2958 		syscallarg(const char *) to;
2959 	} */ *uap = v;
2960 
2961 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2962 	    SCARG(uap, tofd), SCARG(uap, to)));
2963 }
2964 
2965 int
2966 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2967     const char *to)
2968 {
2969 	struct vnode *tvp, *fvp, *tdvp;
2970 	struct nameidata fromnd, tond;
2971 	int error;
2972 	int flags;
2973 
2974 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2975 	    fromfd, from, p);
2976 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2977 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2978 	if ((error = namei(&fromnd)) != 0)
2979 		return (error);
2980 	fvp = fromnd.ni_vp;
2981 
2982 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2983 	/*
2984 	 * rename("foo/", "bar/");  is  OK
2985 	 */
2986 	if (fvp->v_type == VDIR)
2987 		flags |= STRIPSLASHES;
2988 
2989 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2990 	tond.ni_pledge = PLEDGE_CPATH;
2991 	tond.ni_unveil = UNVEIL_CREATE;
2992 	if ((error = namei(&tond)) != 0) {
2993 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2994 		vrele(fromnd.ni_dvp);
2995 		vrele(fvp);
2996 		goto out1;
2997 	}
2998 	tdvp = tond.ni_dvp;
2999 	tvp = tond.ni_vp;
3000 	if (tvp != NULL) {
3001 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3002 			error = ENOTDIR;
3003 			goto out;
3004 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3005 			error = EISDIR;
3006 			goto out;
3007 		}
3008 	}
3009 	if (fvp == tdvp)
3010 		error = EINVAL;
3011 	/*
3012 	 * If source is the same as the destination (that is the
3013 	 * same inode number)
3014 	 */
3015 	if (fvp == tvp)
3016 		error = -1;
3017 out:
3018 	if (!error) {
3019 		if (tvp) {
3020 			(void)uvm_vnp_uncache(tvp);
3021 		}
3022 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3023 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3024 	} else {
3025 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3026 		if (tdvp == tvp)
3027 			vrele(tdvp);
3028 		else
3029 			vput(tdvp);
3030 		if (tvp)
3031 			vput(tvp);
3032 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3033 		vrele(fromnd.ni_dvp);
3034 		vrele(fvp);
3035 	}
3036 	vrele(tond.ni_startdir);
3037 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3038 out1:
3039 	if (fromnd.ni_startdir)
3040 		vrele(fromnd.ni_startdir);
3041 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3042 	if (error == -1)
3043 		return (0);
3044 	return (error);
3045 }
3046 
3047 /*
3048  * Make a directory file.
3049  */
3050 int
3051 sys_mkdir(struct proc *p, void *v, register_t *retval)
3052 {
3053 	struct sys_mkdir_args /* {
3054 		syscallarg(const char *) path;
3055 		syscallarg(mode_t) mode;
3056 	} */ *uap = v;
3057 
3058 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3059 }
3060 
3061 int
3062 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3063 {
3064 	struct sys_mkdirat_args /* {
3065 		syscallarg(int) fd;
3066 		syscallarg(const char *) path;
3067 		syscallarg(mode_t) mode;
3068 	} */ *uap = v;
3069 
3070 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3071 	    SCARG(uap, mode)));
3072 }
3073 
3074 int
3075 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3076 {
3077 	struct vnode *vp;
3078 	struct vattr vattr;
3079 	int error;
3080 	struct nameidata nd;
3081 
3082 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3083 	    fd, path, p);
3084 	nd.ni_pledge = PLEDGE_CPATH;
3085 	nd.ni_unveil = UNVEIL_CREATE;
3086 	if ((error = namei(&nd)) != 0)
3087 		return (error);
3088 	vp = nd.ni_vp;
3089 	if (vp != NULL) {
3090 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3091 		if (nd.ni_dvp == vp)
3092 			vrele(nd.ni_dvp);
3093 		else
3094 			vput(nd.ni_dvp);
3095 		vrele(vp);
3096 		return (EEXIST);
3097 	}
3098 	VATTR_NULL(&vattr);
3099 	vattr.va_type = VDIR;
3100 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3101 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3102 	if (!error)
3103 		vput(nd.ni_vp);
3104 	return (error);
3105 }
3106 
3107 /*
3108  * Remove a directory file.
3109  */
3110 int
3111 sys_rmdir(struct proc *p, void *v, register_t *retval)
3112 {
3113 	struct sys_rmdir_args /* {
3114 		syscallarg(const char *) path;
3115 	} */ *uap = v;
3116 
3117 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3118 }
3119 
3120 /*
3121  * Read a block of directory entries in a file system independent format.
3122  */
3123 int
3124 sys_getdents(struct proc *p, void *v, register_t *retval)
3125 {
3126 	struct sys_getdents_args /* {
3127 		syscallarg(int) fd;
3128 		syscallarg(void *) buf;
3129 		syscallarg(size_t) buflen;
3130 	} */ *uap = v;
3131 	struct vnode *vp;
3132 	struct file *fp;
3133 	struct uio auio;
3134 	struct iovec aiov;
3135 	size_t buflen;
3136 	int error, eofflag;
3137 
3138 	buflen = SCARG(uap, buflen);
3139 
3140 	if (buflen > INT_MAX)
3141 		return (EINVAL);
3142 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3143 		return (error);
3144 	if ((fp->f_flag & FREAD) == 0) {
3145 		error = EBADF;
3146 		goto bad;
3147 	}
3148 	vp = fp->f_data;
3149 	if (vp->v_type != VDIR) {
3150 		error = EINVAL;
3151 		goto bad;
3152 	}
3153 
3154 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3155 
3156 	if (fp->f_offset < 0) {
3157 		VOP_UNLOCK(vp);
3158 		error = EINVAL;
3159 		goto bad;
3160 	}
3161 
3162 	aiov.iov_base = SCARG(uap, buf);
3163 	aiov.iov_len = buflen;
3164 	auio.uio_iov = &aiov;
3165 	auio.uio_iovcnt = 1;
3166 	auio.uio_rw = UIO_READ;
3167 	auio.uio_segflg = UIO_USERSPACE;
3168 	auio.uio_procp = p;
3169 	auio.uio_resid = buflen;
3170 	auio.uio_offset = fp->f_offset;
3171 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3172 	mtx_enter(&fp->f_mtx);
3173 	fp->f_offset = auio.uio_offset;
3174 	mtx_leave(&fp->f_mtx);
3175 	VOP_UNLOCK(vp);
3176 	if (error)
3177 		goto bad;
3178 	*retval = buflen - auio.uio_resid;
3179 bad:
3180 	FRELE(fp, p);
3181 	return (error);
3182 }
3183 
3184 /*
3185  * Set the mode mask for creation of filesystem nodes.
3186  */
3187 int
3188 sys_umask(struct proc *p, void *v, register_t *retval)
3189 {
3190 	struct sys_umask_args /* {
3191 		syscallarg(mode_t) newmask;
3192 	} */ *uap = v;
3193 	struct filedesc *fdp = p->p_fd;
3194 
3195 	fdplock(fdp);
3196 	*retval = fdp->fd_cmask;
3197 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3198 	fdpunlock(fdp);
3199 	return (0);
3200 }
3201 
3202 /*
3203  * Void all references to file by ripping underlying filesystem
3204  * away from vnode.
3205  */
3206 int
3207 sys_revoke(struct proc *p, void *v, register_t *retval)
3208 {
3209 	struct sys_revoke_args /* {
3210 		syscallarg(const char *) path;
3211 	} */ *uap = v;
3212 	struct vnode *vp;
3213 	struct vattr vattr;
3214 	int error;
3215 	struct nameidata nd;
3216 
3217 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3218 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3219 	nd.ni_unveil = UNVEIL_READ;
3220 	if ((error = namei(&nd)) != 0)
3221 		return (error);
3222 	vp = nd.ni_vp;
3223 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3224 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3225 		error = ENOTTY;
3226 		goto out;
3227 	}
3228 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3229 		goto out;
3230 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3231 	    (error = suser(p)))
3232 		goto out;
3233 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3234 		VOP_REVOKE(vp, REVOKEALL);
3235 out:
3236 	vrele(vp);
3237 	return (error);
3238 }
3239 
3240 /*
3241  * Convert a user file descriptor to a kernel file entry.
3242  *
3243  * On return *fpp is FREF:ed.
3244  */
3245 int
3246 getvnode(struct proc *p, int fd, struct file **fpp)
3247 {
3248 	struct file *fp;
3249 	struct vnode *vp;
3250 
3251 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3252 		return (EBADF);
3253 
3254 	if (fp->f_type != DTYPE_VNODE) {
3255 		FRELE(fp, p);
3256 		return (EINVAL);
3257 	}
3258 
3259 	vp = fp->f_data;
3260 	if (vp->v_type == VBAD) {
3261 		FRELE(fp, p);
3262 		return (EBADF);
3263 	}
3264 
3265 	*fpp = fp;
3266 
3267 	return (0);
3268 }
3269 
3270 /*
3271  * Positional read system call.
3272  */
3273 int
3274 sys_pread(struct proc *p, void *v, register_t *retval)
3275 {
3276 	struct sys_pread_args /* {
3277 		syscallarg(int) fd;
3278 		syscallarg(void *) buf;
3279 		syscallarg(size_t) nbyte;
3280 		syscallarg(off_t) offset;
3281 	} */ *uap = v;
3282 	struct iovec iov;
3283 	struct uio auio;
3284 
3285 	iov.iov_base = SCARG(uap, buf);
3286 	iov.iov_len = SCARG(uap, nbyte);
3287 	if (iov.iov_len > SSIZE_MAX)
3288 		return (EINVAL);
3289 
3290 	auio.uio_iov = &iov;
3291 	auio.uio_iovcnt = 1;
3292 	auio.uio_resid = iov.iov_len;
3293 	auio.uio_offset = SCARG(uap, offset);
3294 
3295 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3296 }
3297 
3298 /*
3299  * Positional scatter read system call.
3300  */
3301 int
3302 sys_preadv(struct proc *p, void *v, register_t *retval)
3303 {
3304 	struct sys_preadv_args /* {
3305 		syscallarg(int) fd;
3306 		syscallarg(const struct iovec *) iovp;
3307 		syscallarg(int) iovcnt;
3308 		syscallarg(off_t) offset;
3309 	} */ *uap = v;
3310 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3311 	int error, iovcnt = SCARG(uap, iovcnt);
3312 	struct uio auio;
3313 	size_t resid;
3314 
3315 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3316 	if (error)
3317 		goto done;
3318 
3319 	auio.uio_iov = iov;
3320 	auio.uio_iovcnt = iovcnt;
3321 	auio.uio_resid = resid;
3322 	auio.uio_offset = SCARG(uap, offset);
3323 
3324 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3325  done:
3326 	iovec_free(iov, iovcnt);
3327 	return (error);
3328 }
3329 
3330 /*
3331  * Positional write system call.
3332  */
3333 int
3334 sys_pwrite(struct proc *p, void *v, register_t *retval)
3335 {
3336 	struct sys_pwrite_args /* {
3337 		syscallarg(int) fd;
3338 		syscallarg(const void *) buf;
3339 		syscallarg(size_t) nbyte;
3340 		syscallarg(off_t) offset;
3341 	} */ *uap = v;
3342 	struct iovec iov;
3343 	struct uio auio;
3344 
3345 	iov.iov_base = (void *)SCARG(uap, buf);
3346 	iov.iov_len = SCARG(uap, nbyte);
3347 	if (iov.iov_len > SSIZE_MAX)
3348 		return (EINVAL);
3349 
3350 	auio.uio_iov = &iov;
3351 	auio.uio_iovcnt = 1;
3352 	auio.uio_resid = iov.iov_len;
3353 	auio.uio_offset = SCARG(uap, offset);
3354 
3355 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3356 }
3357 
3358 /*
3359  * Positional gather write system call.
3360  */
3361 int
3362 sys_pwritev(struct proc *p, void *v, register_t *retval)
3363 {
3364 	struct sys_pwritev_args /* {
3365 		syscallarg(int) fd;
3366 		syscallarg(const struct iovec *) iovp;
3367 		syscallarg(int) iovcnt;
3368 		syscallarg(off_t) offset;
3369 	} */ *uap = v;
3370 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3371 	int error, iovcnt = SCARG(uap, iovcnt);
3372 	struct uio auio;
3373 	size_t resid;
3374 
3375 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3376 	if (error)
3377 		goto done;
3378 
3379 	auio.uio_iov = iov;
3380 	auio.uio_iovcnt = iovcnt;
3381 	auio.uio_resid = resid;
3382 	auio.uio_offset = SCARG(uap, offset);
3383 
3384 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3385  done:
3386 	iovec_free(iov, iovcnt);
3387 	return (error);
3388 }
3389 
3390 #if 1
3391 int
3392 sys_pad_pread(struct proc *p, void *v, register_t *retval)
3393 {
3394 	struct sys_pad_pread_args *uap = v;
3395 	struct sys_pread_args unpad;
3396 
3397 	SCARG(&unpad, fd) = SCARG(uap, fd);
3398 	SCARG(&unpad, buf) = SCARG(uap, buf);
3399 	SCARG(&unpad, nbyte) = SCARG(uap, nbyte);
3400 	SCARG(&unpad, offset) = SCARG(uap, offset);
3401 	return sys_pread(p, &unpad, retval);
3402 }
3403 
3404 int
3405 sys_pad_preadv(struct proc *p, void *v, register_t *retval)
3406 {
3407 	struct sys_pad_preadv_args *uap = v;
3408 	struct sys_preadv_args unpad;
3409 
3410 	SCARG(&unpad, fd) = SCARG(uap, fd);
3411 	SCARG(&unpad, iovp) = SCARG(uap, iovp);
3412 	SCARG(&unpad, iovcnt) = SCARG(uap, iovcnt);
3413 	SCARG(&unpad, offset) = SCARG(uap, offset);
3414 	return sys_preadv(p, &unpad, retval);
3415 }
3416 
3417 int
3418 sys_pad_pwrite(struct proc *p, void *v, register_t *retval)
3419 {
3420 	struct sys_pad_pwrite_args *uap = v;
3421 	struct sys_pwrite_args unpad;
3422 
3423 	SCARG(&unpad, fd) = SCARG(uap, fd);
3424 	SCARG(&unpad, buf) = SCARG(uap, buf);
3425 	SCARG(&unpad, nbyte) = SCARG(uap, nbyte);
3426 	SCARG(&unpad, offset) = SCARG(uap, offset);
3427 	return sys_pwrite(p, &unpad, retval);
3428 }
3429 
3430 int
3431 sys_pad_pwritev(struct proc *p, void *v, register_t *retval)
3432 {
3433 	struct sys_pad_pwritev_args *uap = v;
3434 	struct sys_pwritev_args unpad;
3435 
3436 	SCARG(&unpad, fd) = SCARG(uap, fd);
3437 	SCARG(&unpad, iovp) = SCARG(uap, iovp);
3438 	SCARG(&unpad, iovcnt) = SCARG(uap, iovcnt);
3439 	SCARG(&unpad, offset) = SCARG(uap, offset);
3440 	return sys_pwritev(p, &unpad, retval);
3441 }
3442 #endif
3443