xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.314 2019/03/24 18:14:20 beck Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 int unveil_add(struct proc *, struct nameidata *, const char *);
94 void unveil_removevnode(struct vnode *vp);
95 void unveil_free_traversed_vnodes(struct nameidata *);
96 ssize_t unveil_find_cover(struct vnode *, struct proc *);
97 struct unveil *unveil_lookup(struct vnode *, struct proc *, ssize_t *);
98 
99 /*
100  * Virtual File System System Calls
101  */
102 
103 /*
104  * Mount a file system.
105  */
106 int
107 sys_mount(struct proc *p, void *v, register_t *retval)
108 {
109 	struct sys_mount_args /* {
110 		syscallarg(const char *) type;
111 		syscallarg(const char *) path;
112 		syscallarg(int) flags;
113 		syscallarg(void *) data;
114 	} */ *uap = v;
115 	struct vnode *vp;
116 	struct mount *mp;
117 	int error, mntflag = 0;
118 	char fstypename[MFSNAMELEN];
119 	char fspath[MNAMELEN];
120 	struct nameidata nd;
121 	struct vfsconf *vfsp;
122 	int flags = SCARG(uap, flags);
123 	void *args = NULL;
124 
125 	if ((error = suser(p)))
126 		return (error);
127 
128 	/*
129 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
130 	 */
131 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
132 	if (error)
133 		return(error);
134 
135 	/*
136 	 * Get vnode to be covered
137 	 */
138 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
139 	if ((error = namei(&nd)) != 0)
140 		goto fail;
141 	vp = nd.ni_vp;
142 	if (flags & MNT_UPDATE) {
143 		if ((vp->v_flag & VROOT) == 0) {
144 			vput(vp);
145 			error = EINVAL;
146 			goto fail;
147 		}
148 		mp = vp->v_mount;
149 		vfsp = mp->mnt_vfc;
150 
151 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
152 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
153 		if (error) {
154 			vput(vp);
155 			goto fail;
156 		}
157 
158 		mntflag = mp->mnt_flag;
159 		/*
160 		 * We only allow the filesystem to be reloaded if it
161 		 * is currently mounted read-only.
162 		 */
163 		if ((flags & MNT_RELOAD) &&
164 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
165 			vput(vp);
166 			error = EOPNOTSUPP;	/* Needs translation */
167 			goto fail;
168 		}
169 
170 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
171 			vput(vp);
172 			goto fail;
173 		}
174 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
175 		goto update;
176 	}
177 	/*
178 	 * Do not allow disabling of permission checks unless exec and access to
179 	 * device files is disabled too.
180 	 */
181 	if ((flags & MNT_NOPERM) &&
182 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
183 		vput(vp);
184 		error = EPERM;
185 		goto fail;
186 	}
187 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
188 		vput(vp);
189 		goto fail;
190 	}
191 	if (vp->v_type != VDIR) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
196 	if (error) {
197 		vput(vp);
198 		goto fail;
199 	}
200 	vfsp = vfs_byname(fstypename);
201 	if (vfsp == NULL) {
202 		vput(vp);
203 		error = EOPNOTSUPP;
204 		goto fail;
205 	}
206 
207 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
208 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
209 	if (error) {
210 		vput(vp);
211 		goto fail;
212 	}
213 
214 	if (vp->v_mountedhere != NULL) {
215 		vput(vp);
216 		error = EBUSY;
217 		goto fail;
218 	}
219 
220 	/*
221 	 * Allocate and initialize the file system.
222 	 */
223 	mp = vfs_mount_alloc(vp, vfsp);
224 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
225 
226 update:
227 	/* Ensure that the parent mountpoint does not get unmounted. */
228 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
229 	if (error) {
230 		if (mp->mnt_flag & MNT_UPDATE) {
231 			mp->mnt_flag = mntflag;
232 			vfs_unbusy(mp);
233 		} else {
234 			vfs_unbusy(mp);
235 			vfs_mount_free(mp);
236 		}
237 		vput(vp);
238 		goto fail;
239 	}
240 
241 	/*
242 	 * Set the mount level flags.
243 	 */
244 	if (flags & MNT_RDONLY)
245 		mp->mnt_flag |= MNT_RDONLY;
246 	else if (mp->mnt_flag & MNT_RDONLY)
247 		mp->mnt_flag |= MNT_WANTRDWR;
248 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
249 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
250 	    MNT_NOPERM | MNT_FORCE);
251 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
252 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
253 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
254 	/*
255 	 * Mount the filesystem.
256 	 */
257 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
258 	if (!error) {
259 		mp->mnt_stat.f_ctime = time_second;
260 	}
261 	if (mp->mnt_flag & MNT_UPDATE) {
262 		vfs_unbusy(vp->v_mount);
263 		vput(vp);
264 		if (mp->mnt_flag & MNT_WANTRDWR)
265 			mp->mnt_flag &= ~MNT_RDONLY;
266 		mp->mnt_flag &=~
267 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
268 		if (error)
269 			mp->mnt_flag = mntflag;
270 
271 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
272 			if (mp->mnt_syncer == NULL)
273 				error = vfs_allocate_syncvnode(mp);
274 		} else {
275 			if (mp->mnt_syncer != NULL)
276 				vgone(mp->mnt_syncer);
277 			mp->mnt_syncer = NULL;
278 		}
279 
280 		vfs_unbusy(mp);
281 		goto fail;
282 	}
283 
284 	vp->v_mountedhere = mp;
285 
286 	/*
287 	 * Put the new filesystem on the mount list after root.
288 	 */
289 	cache_purge(vp);
290 	if (!error) {
291 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
292 		checkdirs(vp);
293 		vfs_unbusy(vp->v_mount);
294 		VOP_UNLOCK(vp);
295 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
296 			error = vfs_allocate_syncvnode(mp);
297 		vfs_unbusy(mp);
298 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
299 		if ((error = VFS_START(mp, 0, p)) != 0)
300 			vrele(vp);
301 	} else {
302 		mp->mnt_vnodecovered->v_mountedhere = NULL;
303 		vfs_unbusy(mp);
304 		vfs_mount_free(mp);
305 		vfs_unbusy(vp->v_mount);
306 		vput(vp);
307 	}
308 fail:
309 	if (args)
310 		free(args, M_TEMP, vfsp->vfc_datasize);
311 	return (error);
312 }
313 
314 /*
315  * Scan all active processes to see if any of them have a current
316  * or root directory onto which the new filesystem has just been
317  * mounted. If so, replace them with the new mount point, keeping
318  * track of how many were replaced.  That's the number of references
319  * the old vnode had that we've replaced, so finish by vrele()'ing
320  * it that many times.  This puts off any possible sleeping until
321  * we've finished walking the allprocess list.
322  */
323 void
324 checkdirs(struct vnode *olddp)
325 {
326 	struct filedesc *fdp;
327 	struct vnode *newdp;
328 	struct process *pr;
329 	u_int  free_count = 0;
330 
331 	if (olddp->v_usecount == 1)
332 		return;
333 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
334 		panic("mount: lost mount");
335 	LIST_FOREACH(pr, &allprocess, ps_list) {
336 		fdp = pr->ps_fd;
337 		if (fdp->fd_cdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_cdir = newdp;
341 		}
342 		if (fdp->fd_rdir == olddp) {
343 			free_count++;
344 			vref(newdp);
345 			fdp->fd_rdir = newdp;
346 		}
347 		pr->ps_uvpcwd = NULL;
348 	}
349 	if (rootvnode == olddp) {
350 		free_count++;
351 		vref(newdp);
352 		rootvnode = newdp;
353 	}
354 	while (free_count-- > 0)
355 		vrele(olddp);
356 	vput(newdp);
357 }
358 
359 /*
360  * Unmount a file system.
361  *
362  * Note: unmount takes a path to the vnode mounted on as argument,
363  * not special file (as before).
364  */
365 int
366 sys_unmount(struct proc *p, void *v, register_t *retval)
367 {
368 	struct sys_unmount_args /* {
369 		syscallarg(const char *) path;
370 		syscallarg(int) flags;
371 	} */ *uap = v;
372 	struct vnode *vp;
373 	struct mount *mp;
374 	int error;
375 	struct nameidata nd;
376 
377 	if ((error = suser(p)) != 0)
378 		return (error);
379 
380 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
381 	    SCARG(uap, path), p);
382 	if ((error = namei(&nd)) != 0)
383 		return (error);
384 	vp = nd.ni_vp;
385 	mp = vp->v_mount;
386 
387 	/*
388 	 * Don't allow unmounting the root file system.
389 	 */
390 	if (mp->mnt_flag & MNT_ROOTFS) {
391 		vput(vp);
392 		return (EINVAL);
393 	}
394 
395 	/*
396 	 * Must be the root of the filesystem
397 	 */
398 	if ((vp->v_flag & VROOT) == 0) {
399 		vput(vp);
400 		return (EINVAL);
401 	}
402 	vput(vp);
403 
404 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
405 		return (EBUSY);
406 
407 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
408 }
409 
410 /*
411  * Do the actual file system unmount.
412  */
413 int
414 dounmount(struct mount *mp, int flags, struct proc *p)
415 {
416 	SLIST_HEAD(, mount) mplist;
417 	struct mount *nmp;
418 	int error;
419 
420 	SLIST_INIT(&mplist);
421 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
422 
423 	/*
424 	 * Collect nested mount points. This takes advantage of the mount list
425 	 * being ordered - nested mount points come after their parent.
426 	 */
427 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
428 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
429 			if (mp->mnt_vnodecovered == NULLVP ||
430 			    mp->mnt_vnodecovered->v_mount != nmp)
431 				continue;
432 
433 			if ((flags & MNT_FORCE) == 0) {
434 				error = EBUSY;
435 				goto err;
436 			}
437 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
438 			if (error) {
439 				if ((flags & MNT_DOOMED)) {
440 					/*
441 					 * If the mount point was busy due to
442 					 * being unmounted, it has been removed
443 					 * from the mount list already.
444 					 * Restart the iteration from the last
445 					 * collected busy entry.
446 					 */
447 					mp = SLIST_FIRST(&mplist);
448 					break;
449 				}
450 				goto err;
451 			}
452 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
453 			break;
454 		}
455 	}
456 
457 	/*
458 	 * Nested mount points cannot appear during this loop as mounting
459 	 * requires a read lock for the parent mount point.
460 	 */
461 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
462 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
463 		error = dounmount_leaf(mp, flags, p);
464 		if (error)
465 			goto err;
466 	}
467 	return (0);
468 
469 err:
470 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
471 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
472 		vfs_unbusy(mp);
473 	}
474 	return (error);
475 }
476 
477 int
478 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
479 {
480 	struct vnode *coveredvp;
481 	struct vnode *vp, *nvp;
482 	int error;
483 	int hadsyncer = 0;
484 
485 	mp->mnt_flag &=~ MNT_ASYNC;
486 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
487 	if (mp->mnt_syncer != NULL) {
488 		hadsyncer = 1;
489 		vgone(mp->mnt_syncer);
490 		mp->mnt_syncer = NULL;
491 	}
492 
493 	/*
494 	 * Before calling file system unmount, make sure
495 	 * all unveils to vnodes in here are dropped.
496 	 */
497 	LIST_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
498 		unveil_removevnode(vp);
499 	}
500 
501 	if (((mp->mnt_flag & MNT_RDONLY) ||
502 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
503 	    (flags & MNT_FORCE))
504 		error = VFS_UNMOUNT(mp, flags, p);
505 
506 	if (error && !(flags & MNT_DOOMED)) {
507 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
508 			(void) vfs_allocate_syncvnode(mp);
509 		vfs_unbusy(mp);
510 		return (error);
511 	}
512 
513 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
514 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
515 		coveredvp->v_mountedhere = NULL;
516 		vrele(coveredvp);
517 	}
518 
519 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
520 		panic("unmount: dangling vnode");
521 
522 	vfs_unbusy(mp);
523 	vfs_mount_free(mp);
524 
525 	return (0);
526 }
527 
528 /*
529  * Sync each mounted filesystem.
530  */
531 #ifdef DEBUG
532 int syncprt = 0;
533 struct ctldebug debug0 = { "syncprt", &syncprt };
534 #endif
535 
536 int
537 sys_sync(struct proc *p, void *v, register_t *retval)
538 {
539 	struct mount *mp;
540 	int asyncflag;
541 
542 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
543 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
544 			continue;
545 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
546 			asyncflag = mp->mnt_flag & MNT_ASYNC;
547 			mp->mnt_flag &= ~MNT_ASYNC;
548 			uvm_vnp_sync(mp);
549 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
550 			if (asyncflag)
551 				mp->mnt_flag |= MNT_ASYNC;
552 		}
553 		vfs_unbusy(mp);
554 	}
555 
556 	return (0);
557 }
558 
559 /*
560  * Change filesystem quotas.
561  */
562 int
563 sys_quotactl(struct proc *p, void *v, register_t *retval)
564 {
565 	struct sys_quotactl_args /* {
566 		syscallarg(const char *) path;
567 		syscallarg(int) cmd;
568 		syscallarg(int) uid;
569 		syscallarg(char *) arg;
570 	} */ *uap = v;
571 	struct mount *mp;
572 	int error;
573 	struct nameidata nd;
574 
575 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
576 	if ((error = namei(&nd)) != 0)
577 		return (error);
578 	mp = nd.ni_vp->v_mount;
579 	vrele(nd.ni_vp);
580 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
581 	    SCARG(uap, arg), p));
582 }
583 
584 int
585 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
586 {
587 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
588 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
589 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
590 	char *s, *d;
591 	int error;
592 
593 	/* Don't let non-root see filesystem id (for NFS security) */
594 	if (suser(p)) {
595 		fsid_t fsid;
596 
597 		s = (char *)sp;
598 		d = (char *)uaddr;
599 
600 		memset(&fsid, 0, sizeof(fsid));
601 
602 		if ((error = copyout(s, d, co_sz1)) != 0)
603 			return (error);
604 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
605 			return (error);
606 		return (copyout(s + co_off2, d + co_off2, co_sz2));
607 	}
608 
609 	return (copyout(sp, uaddr, sizeof(*sp)));
610 }
611 
612 /*
613  * Get filesystem statistics.
614  */
615 int
616 sys_statfs(struct proc *p, void *v, register_t *retval)
617 {
618 	struct sys_statfs_args /* {
619 		syscallarg(const char *) path;
620 		syscallarg(struct statfs *) buf;
621 	} */ *uap = v;
622 	struct mount *mp;
623 	struct statfs *sp;
624 	int error;
625 	struct nameidata nd;
626 
627 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
628 	    SCARG(uap, path), p);
629 	nd.ni_pledge = PLEDGE_RPATH;
630 	nd.ni_unveil = UNVEIL_READ;
631 	if ((error = namei(&nd)) != 0)
632 		return (error);
633 	mp = nd.ni_vp->v_mount;
634 	sp = &mp->mnt_stat;
635 	vrele(nd.ni_vp);
636 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
637 		return (error);
638 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
639 
640 	return (copyout_statfs(sp, SCARG(uap, buf), p));
641 }
642 
643 /*
644  * Get filesystem statistics.
645  */
646 int
647 sys_fstatfs(struct proc *p, void *v, register_t *retval)
648 {
649 	struct sys_fstatfs_args /* {
650 		syscallarg(int) fd;
651 		syscallarg(struct statfs *) buf;
652 	} */ *uap = v;
653 	struct file *fp;
654 	struct mount *mp;
655 	struct statfs *sp;
656 	int error;
657 
658 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
659 		return (error);
660 	mp = ((struct vnode *)fp->f_data)->v_mount;
661 	if (!mp) {
662 		FRELE(fp, p);
663 		return (ENOENT);
664 	}
665 	sp = &mp->mnt_stat;
666 	error = VFS_STATFS(mp, sp, p);
667 	FRELE(fp, p);
668 	if (error)
669 		return (error);
670 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
671 
672 	return (copyout_statfs(sp, SCARG(uap, buf), p));
673 }
674 
675 /*
676  * Get statistics on all filesystems.
677  */
678 int
679 sys_getfsstat(struct proc *p, void *v, register_t *retval)
680 {
681 	struct sys_getfsstat_args /* {
682 		syscallarg(struct statfs *) buf;
683 		syscallarg(size_t) bufsize;
684 		syscallarg(int) flags;
685 	} */ *uap = v;
686 	struct mount *mp;
687 	struct statfs *sp;
688 	struct statfs *sfsp;
689 	size_t count, maxcount;
690 	int error, flags = SCARG(uap, flags);
691 
692 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
693 	sfsp = SCARG(uap, buf);
694 	count = 0;
695 
696 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
697 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
698 			continue;
699 		if (sfsp && count < maxcount) {
700 			sp = &mp->mnt_stat;
701 
702 			/* Refresh stats unless MNT_NOWAIT is specified */
703 			if (flags != MNT_NOWAIT &&
704 			    flags != MNT_LAZY &&
705 			    (flags == MNT_WAIT ||
706 			    flags == 0) &&
707 			    (error = VFS_STATFS(mp, sp, p))) {
708 				vfs_unbusy(mp);
709 				continue;
710 			}
711 
712 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
713 #if notyet
714 			if (mp->mnt_flag & MNT_SOFTDEP)
715 				sp->f_eflags = STATFS_SOFTUPD;
716 #endif
717 			error = (copyout_statfs(sp, sfsp, p));
718 			if (error) {
719 				vfs_unbusy(mp);
720 				return (error);
721 			}
722 			sfsp++;
723 		}
724 		count++;
725 		vfs_unbusy(mp);
726 	}
727 
728 	if (sfsp && count > maxcount)
729 		*retval = maxcount;
730 	else
731 		*retval = count;
732 
733 	return (0);
734 }
735 
736 /*
737  * Change current working directory to a given file descriptor.
738  */
739 int
740 sys_fchdir(struct proc *p, void *v, register_t *retval)
741 {
742 	struct sys_fchdir_args /* {
743 		syscallarg(int) fd;
744 	} */ *uap = v;
745 	struct filedesc *fdp = p->p_fd;
746 	struct vnode *vp, *tdp, *old_cdir;
747 	struct mount *mp;
748 	struct file *fp;
749 	int error;
750 
751 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
752 		return (EBADF);
753 	vp = fp->f_data;
754 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
755 		FRELE(fp, p);
756 		return (ENOTDIR);
757 	}
758 	vref(vp);
759 	FRELE(fp, p);
760 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
761 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
762 
763 	while (!error && (mp = vp->v_mountedhere) != NULL) {
764 		if (vfs_busy(mp, VB_READ|VB_WAIT))
765 			continue;
766 		error = VFS_ROOT(mp, &tdp);
767 		vfs_unbusy(mp);
768 		if (error)
769 			break;
770 		vput(vp);
771 		vp = tdp;
772 	}
773 	if (error) {
774 		vput(vp);
775 		return (error);
776 	}
777 	VOP_UNLOCK(vp);
778 	old_cdir = fdp->fd_cdir;
779 	fdp->fd_cdir = vp;
780 	vrele(old_cdir);
781 	return (0);
782 }
783 
784 /*
785  * Change current working directory (``.'').
786  */
787 int
788 sys_chdir(struct proc *p, void *v, register_t *retval)
789 {
790 	struct sys_chdir_args /* {
791 		syscallarg(const char *) path;
792 	} */ *uap = v;
793 	struct filedesc *fdp = p->p_fd;
794 	struct vnode *old_cdir;
795 	int error;
796 	struct nameidata nd;
797 
798 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
799 	    SCARG(uap, path), p);
800 	nd.ni_pledge = PLEDGE_RPATH;
801 	nd.ni_unveil = UNVEIL_READ;
802 	if ((error = change_dir(&nd, p)) != 0)
803 		return (error);
804 	p->p_p->ps_uvpcwd = nd.ni_unveil_match;
805 	old_cdir = fdp->fd_cdir;
806 	fdp->fd_cdir = nd.ni_vp;
807 	vrele(old_cdir);
808 	return (0);
809 }
810 
811 /*
812  * Change notion of root (``/'') directory.
813  */
814 int
815 sys_chroot(struct proc *p, void *v, register_t *retval)
816 {
817 	struct sys_chroot_args /* {
818 		syscallarg(const char *) path;
819 	} */ *uap = v;
820 	struct filedesc *fdp = p->p_fd;
821 	struct vnode *old_cdir, *old_rdir;
822 	int error;
823 	struct nameidata nd;
824 
825 	if ((error = suser(p)) != 0)
826 		return (error);
827 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
828 	    SCARG(uap, path), p);
829 	if ((error = change_dir(&nd, p)) != 0)
830 		return (error);
831 	if (fdp->fd_rdir != NULL) {
832 		/*
833 		 * A chroot() done inside a changed root environment does
834 		 * an automatic chdir to avoid the out-of-tree experience.
835 		 */
836 		vref(nd.ni_vp);
837 		old_rdir = fdp->fd_rdir;
838 		old_cdir = fdp->fd_cdir;
839 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
840 		vrele(old_rdir);
841 		vrele(old_cdir);
842 	} else
843 		fdp->fd_rdir = nd.ni_vp;
844 	return (0);
845 }
846 
847 /*
848  * Common routine for chroot and chdir.
849  */
850 static int
851 change_dir(struct nameidata *ndp, struct proc *p)
852 {
853 	struct vnode *vp;
854 	int error;
855 
856 	if ((error = namei(ndp)) != 0)
857 		return (error);
858 	vp = ndp->ni_vp;
859 	if (vp->v_type != VDIR)
860 		error = ENOTDIR;
861 	else
862 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
863 	if (error)
864 		vput(vp);
865 	else
866 		VOP_UNLOCK(vp);
867 	return (error);
868 }
869 
870 int
871 sys_unveil(struct proc *p, void *v, register_t *retval)
872 {
873 	struct sys_unveil_args /* {
874 		syscallarg(const char *) path;
875 		syscallarg(const char *) permissions;
876 	} */ *uap = v;
877 	char pathname[MAXPATHLEN];
878 	struct nameidata nd;
879 	size_t pathlen;
880 	char permissions[5];
881 	int error, allow;
882 
883 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
884 		p->p_p->ps_uvdone = 1;
885 		return (0);
886 	}
887 
888 	if (p->p_p->ps_uvdone != 0)
889 		return EPERM;
890 
891 	error = copyinstr(SCARG(uap, permissions), permissions,
892 	    sizeof(permissions), NULL);
893 	if (error)
894 		return(error);
895 	error = copyinstr(SCARG(uap, path), pathname, sizeof(pathname), &pathlen);
896 	if (error)
897 		return(error);
898 
899 #ifdef KTRACE
900 	if (KTRPOINT(p, KTR_STRUCT))
901 		ktrstruct(p, "unveil", permissions, strlen(permissions));
902 #endif
903 	if (pathlen < 2)
904 		return EINVAL;
905 
906 	if (pathlen == 2 && pathname[0] == '/')
907 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
908 		    UIO_SYSSPACE, pathname, p);
909 	else
910 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
911 		    UIO_SYSSPACE, pathname, p);
912 
913 	nd.ni_pledge = PLEDGE_UNVEIL;
914 	if ((error = namei(&nd)) != 0)
915 		goto end;
916 
917 	/*
918 	 * XXX Any access to the file or directory will allow us to
919 	 * pledge path it
920 	 */
921 	allow = ((nd.ni_vp &&
922 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
923 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
924 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
925 	    (nd.ni_dvp &&
926 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
927 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
928 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
929 
930 	/* release lock from namei, but keep ref */
931 	if (nd.ni_vp)
932 		VOP_UNLOCK(nd.ni_vp);
933 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
934 		VOP_UNLOCK(nd.ni_dvp);
935 
936 	if (allow) {
937 		error = unveil_add(p, &nd, permissions);
938 		p->p_p->ps_uvpcwd = unveil_lookup(p->p_fd->fd_cdir, p, NULL);
939 		if (p->p_p->ps_uvpcwd == NULL) {
940 			ssize_t i = unveil_find_cover(p->p_fd->fd_cdir, p);
941 			if (i >= 0)
942 				p->p_p->ps_uvpcwd = &p->p_p->ps_uvpaths[i];
943 		}
944 	}
945 	else
946 		error = EPERM;
947 
948 	/* release vref from namei, but not vref from unveil_add */
949 	if (nd.ni_vp)
950 		vrele(nd.ni_vp);
951 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
952 		vrele(nd.ni_dvp);
953 
954 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
955 end:
956 	unveil_free_traversed_vnodes(&nd);
957 
958 	return (error);
959 }
960 
961 /*
962  * Check permissions, allocate an open file structure,
963  * and call the device open routine if any.
964  */
965 int
966 sys_open(struct proc *p, void *v, register_t *retval)
967 {
968 	struct sys_open_args /* {
969 		syscallarg(const char *) path;
970 		syscallarg(int) flags;
971 		syscallarg(mode_t) mode;
972 	} */ *uap = v;
973 
974 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
975 	    SCARG(uap, mode), retval));
976 }
977 
978 int
979 sys_openat(struct proc *p, void *v, register_t *retval)
980 {
981 	struct sys_openat_args /* {
982 		syscallarg(int) fd;
983 		syscallarg(const char *) path;
984 		syscallarg(int) flags;
985 		syscallarg(mode_t) mode;
986 	} */ *uap = v;
987 
988 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
989 	    SCARG(uap, flags), SCARG(uap, mode), retval));
990 }
991 
992 int
993 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
994     register_t *retval)
995 {
996 	struct filedesc *fdp = p->p_fd;
997 	struct file *fp;
998 	struct vnode *vp;
999 	struct vattr vattr;
1000 	int flags, cloexec, cmode;
1001 	int type, indx, error, localtrunc = 0;
1002 	struct flock lf;
1003 	struct nameidata nd;
1004 	uint64_t ni_pledge = 0;
1005 	u_char ni_unveil = 0;
1006 
1007 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1008 		error = pledge_flock(p);
1009 		if (error != 0)
1010 			return (error);
1011 	}
1012 
1013 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1014 
1015 	fdplock(fdp);
1016 	if ((error = falloc(p, &fp, &indx)) != 0)
1017 		goto out;
1018 	fdpunlock(fdp);
1019 
1020 	flags = FFLAGS(oflags);
1021 	if (flags & FREAD) {
1022 		ni_pledge |= PLEDGE_RPATH;
1023 		ni_unveil |= UNVEIL_READ;
1024 	}
1025 	if (flags & FWRITE) {
1026 		ni_pledge |= PLEDGE_WPATH;
1027 		ni_unveil |= UNVEIL_WRITE;
1028 	}
1029 	if (oflags & O_CREAT) {
1030 		ni_pledge |= PLEDGE_CPATH;
1031 		ni_unveil |= UNVEIL_CREATE;
1032 	}
1033 
1034 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1035 	if ((p->p_p->ps_flags & PS_PLEDGE))
1036 		cmode &= ACCESSPERMS;
1037 	NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
1038 	nd.ni_pledge = ni_pledge;
1039 	nd.ni_unveil = ni_unveil;
1040 	p->p_dupfd = -1;			/* XXX check for fdopen */
1041 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1042 		localtrunc = 1;
1043 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1044 	}
1045 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1046 		fdplock(fdp);
1047 		if (error == ENODEV &&
1048 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1049 		    (error =
1050 			dupfdopen(p, indx, flags)) == 0) {
1051 			closef(fp, p);
1052 			*retval = indx;
1053 			goto out;
1054 		}
1055 		if (error == ERESTART)
1056 			error = EINTR;
1057 		fdremove(fdp, indx);
1058 		closef(fp, p);
1059 		goto out;
1060 	}
1061 	p->p_dupfd = 0;
1062 	vp = nd.ni_vp;
1063 	fp->f_flag = flags & FMASK;
1064 	fp->f_type = DTYPE_VNODE;
1065 	fp->f_ops = &vnops;
1066 	fp->f_data = vp;
1067 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1068 		lf.l_whence = SEEK_SET;
1069 		lf.l_start = 0;
1070 		lf.l_len = 0;
1071 		if (flags & O_EXLOCK)
1072 			lf.l_type = F_WRLCK;
1073 		else
1074 			lf.l_type = F_RDLCK;
1075 		type = F_FLOCK;
1076 		if ((flags & FNONBLOCK) == 0)
1077 			type |= F_WAIT;
1078 		VOP_UNLOCK(vp);
1079 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1080 		if (error) {
1081 			fdplock(fdp);
1082 			/* closef will vn_close the file for us. */
1083 			fdremove(fdp, indx);
1084 			closef(fp, p);
1085 			goto out;
1086 		}
1087 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1088 		fp->f_iflags |= FIF_HASLOCK;
1089 	}
1090 	if (localtrunc) {
1091 		if ((fp->f_flag & FWRITE) == 0)
1092 			error = EACCES;
1093 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1094 			error = EROFS;
1095 		else if (vp->v_type == VDIR)
1096 			error = EISDIR;
1097 		else if ((error = vn_writechk(vp)) == 0) {
1098 			VATTR_NULL(&vattr);
1099 			vattr.va_size = 0;
1100 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1101 		}
1102 		if (error) {
1103 			VOP_UNLOCK(vp);
1104 			fdplock(fdp);
1105 			/* closef will close the file for us. */
1106 			fdremove(fdp, indx);
1107 			closef(fp, p);
1108 			goto out;
1109 		}
1110 	}
1111 	VOP_UNLOCK(vp);
1112 	*retval = indx;
1113 	fdplock(fdp);
1114 	fdinsert(fdp, indx, cloexec, fp);
1115 	FRELE(fp, p);
1116 out:
1117 	fdpunlock(fdp);
1118 	return (error);
1119 }
1120 
1121 /*
1122  * Get file handle system call
1123  */
1124 int
1125 sys_getfh(struct proc *p, void *v, register_t *retval)
1126 {
1127 	struct sys_getfh_args /* {
1128 		syscallarg(const char *) fname;
1129 		syscallarg(fhandle_t *) fhp;
1130 	} */ *uap = v;
1131 	struct vnode *vp;
1132 	fhandle_t fh;
1133 	int error;
1134 	struct nameidata nd;
1135 
1136 	/*
1137 	 * Must be super user
1138 	 */
1139 	error = suser(p);
1140 	if (error)
1141 		return (error);
1142 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1143 	    SCARG(uap, fname), p);
1144 	error = namei(&nd);
1145 	if (error)
1146 		return (error);
1147 	vp = nd.ni_vp;
1148 	memset(&fh, 0, sizeof(fh));
1149 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1150 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1151 	vput(vp);
1152 	if (error)
1153 		return (error);
1154 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1155 	return (error);
1156 }
1157 
1158 /*
1159  * Open a file given a file handle.
1160  *
1161  * Check permissions, allocate an open file structure,
1162  * and call the device open routine if any.
1163  */
1164 int
1165 sys_fhopen(struct proc *p, void *v, register_t *retval)
1166 {
1167 	struct sys_fhopen_args /* {
1168 		syscallarg(const fhandle_t *) fhp;
1169 		syscallarg(int) flags;
1170 	} */ *uap = v;
1171 	struct filedesc *fdp = p->p_fd;
1172 	struct file *fp;
1173 	struct vnode *vp = NULL;
1174 	struct mount *mp;
1175 	struct ucred *cred = p->p_ucred;
1176 	int flags, cloexec;
1177 	int type, indx, error=0;
1178 	struct flock lf;
1179 	struct vattr va;
1180 	fhandle_t fh;
1181 
1182 	/*
1183 	 * Must be super user
1184 	 */
1185 	if ((error = suser(p)))
1186 		return (error);
1187 
1188 	flags = FFLAGS(SCARG(uap, flags));
1189 	if ((flags & (FREAD | FWRITE)) == 0)
1190 		return (EINVAL);
1191 	if ((flags & O_CREAT))
1192 		return (EINVAL);
1193 
1194 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1195 
1196 	fdplock(fdp);
1197 	if ((error = falloc(p, &fp, &indx)) != 0) {
1198 		fp = NULL;
1199 		goto bad;
1200 	}
1201 
1202 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1203 		goto bad;
1204 
1205 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1206 		error = ESTALE;
1207 		goto bad;
1208 	}
1209 
1210 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1211 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1212 		goto bad;
1213 	}
1214 
1215 	/* Now do an effective vn_open */
1216 
1217 	if (vp->v_type == VSOCK) {
1218 		error = EOPNOTSUPP;
1219 		goto bad;
1220 	}
1221 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1222 		error = ENOTDIR;
1223 		goto bad;
1224 	}
1225 	if (flags & FREAD) {
1226 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1227 			goto bad;
1228 	}
1229 	if (flags & (FWRITE | O_TRUNC)) {
1230 		if (vp->v_type == VDIR) {
1231 			error = EISDIR;
1232 			goto bad;
1233 		}
1234 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1235 		    (error = vn_writechk(vp)) != 0)
1236 			goto bad;
1237 	}
1238 	if (flags & O_TRUNC) {
1239 		VATTR_NULL(&va);
1240 		va.va_size = 0;
1241 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1242 			goto bad;
1243 	}
1244 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1245 		goto bad;
1246 	if (flags & FWRITE)
1247 		vp->v_writecount++;
1248 
1249 	/* done with modified vn_open, now finish what sys_open does. */
1250 
1251 	fp->f_flag = flags & FMASK;
1252 	fp->f_type = DTYPE_VNODE;
1253 	fp->f_ops = &vnops;
1254 	fp->f_data = vp;
1255 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1256 		lf.l_whence = SEEK_SET;
1257 		lf.l_start = 0;
1258 		lf.l_len = 0;
1259 		if (flags & O_EXLOCK)
1260 			lf.l_type = F_WRLCK;
1261 		else
1262 			lf.l_type = F_RDLCK;
1263 		type = F_FLOCK;
1264 		if ((flags & FNONBLOCK) == 0)
1265 			type |= F_WAIT;
1266 		VOP_UNLOCK(vp);
1267 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1268 		if (error) {
1269 			vp = NULL;	/* closef will vn_close the file */
1270 			goto bad;
1271 		}
1272 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1273 		fp->f_iflags |= FIF_HASLOCK;
1274 	}
1275 	VOP_UNLOCK(vp);
1276 	*retval = indx;
1277 	fdinsert(fdp, indx, cloexec, fp);
1278 	fdpunlock(fdp);
1279 	FRELE(fp, p);
1280 	return (0);
1281 
1282 bad:
1283 	if (fp) {
1284 		fdremove(fdp, indx);
1285 		closef(fp, p);
1286 		if (vp != NULL)
1287 			vput(vp);
1288 	}
1289 	fdpunlock(fdp);
1290 	return (error);
1291 }
1292 
1293 int
1294 sys_fhstat(struct proc *p, void *v, register_t *retval)
1295 {
1296 	struct sys_fhstat_args /* {
1297 		syscallarg(const fhandle_t *) fhp;
1298 		syscallarg(struct stat *) sb;
1299 	} */ *uap = v;
1300 	struct stat sb;
1301 	int error;
1302 	fhandle_t fh;
1303 	struct mount *mp;
1304 	struct vnode *vp;
1305 
1306 	/*
1307 	 * Must be super user
1308 	 */
1309 	if ((error = suser(p)))
1310 		return (error);
1311 
1312 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1313 		return (error);
1314 
1315 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1316 		return (ESTALE);
1317 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1318 		return (error);
1319 	error = vn_stat(vp, &sb, p);
1320 	vput(vp);
1321 	if (error)
1322 		return (error);
1323 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1324 	return (error);
1325 }
1326 
1327 int
1328 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1329 {
1330 	struct sys_fhstatfs_args /* {
1331 		syscallarg(const fhandle_t *) fhp;
1332 		syscallarg(struct statfs *) buf;
1333 	} */ *uap = v;
1334 	struct statfs *sp;
1335 	fhandle_t fh;
1336 	struct mount *mp;
1337 	struct vnode *vp;
1338 	int error;
1339 
1340 	/*
1341 	 * Must be super user
1342 	 */
1343 	if ((error = suser(p)))
1344 		return (error);
1345 
1346 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1347 		return (error);
1348 
1349 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1350 		return (ESTALE);
1351 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1352 		return (error);
1353 	mp = vp->v_mount;
1354 	sp = &mp->mnt_stat;
1355 	vput(vp);
1356 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1357 		return (error);
1358 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1359 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1360 }
1361 
1362 /*
1363  * Create a special file or named pipe.
1364  */
1365 int
1366 sys_mknod(struct proc *p, void *v, register_t *retval)
1367 {
1368 	struct sys_mknod_args /* {
1369 		syscallarg(const char *) path;
1370 		syscallarg(mode_t) mode;
1371 		syscallarg(int) dev;
1372 	} */ *uap = v;
1373 
1374 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1375 	    SCARG(uap, dev)));
1376 }
1377 
1378 int
1379 sys_mknodat(struct proc *p, void *v, register_t *retval)
1380 {
1381 	struct sys_mknodat_args /* {
1382 		syscallarg(int) fd;
1383 		syscallarg(const char *) path;
1384 		syscallarg(mode_t) mode;
1385 		syscallarg(dev_t) dev;
1386 	} */ *uap = v;
1387 
1388 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1389 	    SCARG(uap, mode), SCARG(uap, dev)));
1390 }
1391 
1392 int
1393 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1394 {
1395 	struct vnode *vp;
1396 	struct vattr vattr;
1397 	int error;
1398 	struct nameidata nd;
1399 
1400 	if (dev == VNOVAL)
1401 		return (EINVAL);
1402 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1403 	nd.ni_pledge = PLEDGE_DPATH;
1404 	nd.ni_unveil = UNVEIL_CREATE;
1405 	if ((error = namei(&nd)) != 0)
1406 		return (error);
1407 	vp = nd.ni_vp;
1408 	if (!S_ISFIFO(mode) || dev != 0) {
1409 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1410 			goto out;
1411 		if (p->p_fd->fd_rdir) {
1412 			error = EINVAL;
1413 			goto out;
1414 		}
1415 	}
1416 	if (vp != NULL)
1417 		error = EEXIST;
1418 	else {
1419 		VATTR_NULL(&vattr);
1420 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1421 		if ((p->p_p->ps_flags & PS_PLEDGE))
1422 			vattr.va_mode &= ACCESSPERMS;
1423 		vattr.va_rdev = dev;
1424 
1425 		switch (mode & S_IFMT) {
1426 		case S_IFMT:	/* used by badsect to flag bad sectors */
1427 			vattr.va_type = VBAD;
1428 			break;
1429 		case S_IFCHR:
1430 			vattr.va_type = VCHR;
1431 			break;
1432 		case S_IFBLK:
1433 			vattr.va_type = VBLK;
1434 			break;
1435 		case S_IFIFO:
1436 #ifndef FIFO
1437 			error = EOPNOTSUPP;
1438 			break;
1439 #else
1440 			if (dev == 0) {
1441 				vattr.va_type = VFIFO;
1442 				break;
1443 			}
1444 			/* FALLTHROUGH */
1445 #endif /* FIFO */
1446 		default:
1447 			error = EINVAL;
1448 			break;
1449 		}
1450 	}
1451 out:
1452 	if (!error) {
1453 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1454 		vput(nd.ni_dvp);
1455 	} else {
1456 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1457 		if (nd.ni_dvp == vp)
1458 			vrele(nd.ni_dvp);
1459 		else
1460 			vput(nd.ni_dvp);
1461 		if (vp)
1462 			vrele(vp);
1463 	}
1464 	return (error);
1465 }
1466 
1467 /*
1468  * Create a named pipe.
1469  */
1470 int
1471 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1472 {
1473 	struct sys_mkfifo_args /* {
1474 		syscallarg(const char *) path;
1475 		syscallarg(mode_t) mode;
1476 	} */ *uap = v;
1477 
1478 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1479 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1480 }
1481 
1482 int
1483 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1484 {
1485 	struct sys_mkfifoat_args /* {
1486 		syscallarg(int) fd;
1487 		syscallarg(const char *) path;
1488 		syscallarg(mode_t) mode;
1489 	} */ *uap = v;
1490 
1491 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1492 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1493 }
1494 
1495 /*
1496  * Make a hard file link.
1497  */
1498 int
1499 sys_link(struct proc *p, void *v, register_t *retval)
1500 {
1501 	struct sys_link_args /* {
1502 		syscallarg(const char *) path;
1503 		syscallarg(const char *) link;
1504 	} */ *uap = v;
1505 
1506 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1507 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1508 }
1509 
1510 int
1511 sys_linkat(struct proc *p, void *v, register_t *retval)
1512 {
1513 	struct sys_linkat_args /* {
1514 		syscallarg(int) fd1;
1515 		syscallarg(const char *) path1;
1516 		syscallarg(int) fd2;
1517 		syscallarg(const char *) path2;
1518 		syscallarg(int) flag;
1519 	} */ *uap = v;
1520 
1521 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1522 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1523 }
1524 
1525 int
1526 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1527     const char *path2, int flag)
1528 {
1529 	struct vnode *vp;
1530 	struct nameidata nd;
1531 	int error, follow;
1532 	int flags;
1533 
1534 	if (flag & ~AT_SYMLINK_FOLLOW)
1535 		return (EINVAL);
1536 
1537 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1538 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1539 	nd.ni_pledge = PLEDGE_RPATH;
1540 	nd.ni_unveil = UNVEIL_READ;
1541 	if ((error = namei(&nd)) != 0)
1542 		return (error);
1543 	vp = nd.ni_vp;
1544 
1545 	flags = LOCKPARENT;
1546 	if (vp->v_type == VDIR) {
1547 		flags |= STRIPSLASHES;
1548 	}
1549 
1550 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1551 	nd.ni_pledge = PLEDGE_CPATH;
1552 	nd.ni_unveil = UNVEIL_CREATE;
1553 	if ((error = namei(&nd)) != 0)
1554 		goto out;
1555 	if (nd.ni_vp) {
1556 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1557 		if (nd.ni_dvp == nd.ni_vp)
1558 			vrele(nd.ni_dvp);
1559 		else
1560 			vput(nd.ni_dvp);
1561 		vrele(nd.ni_vp);
1562 		error = EEXIST;
1563 		goto out;
1564 	}
1565 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1566 out:
1567 	vrele(vp);
1568 	return (error);
1569 }
1570 
1571 /*
1572  * Make a symbolic link.
1573  */
1574 int
1575 sys_symlink(struct proc *p, void *v, register_t *retval)
1576 {
1577 	struct sys_symlink_args /* {
1578 		syscallarg(const char *) path;
1579 		syscallarg(const char *) link;
1580 	} */ *uap = v;
1581 
1582 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1583 }
1584 
1585 int
1586 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1587 {
1588 	struct sys_symlinkat_args /* {
1589 		syscallarg(const char *) path;
1590 		syscallarg(int) fd;
1591 		syscallarg(const char *) link;
1592 	} */ *uap = v;
1593 
1594 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1595 	    SCARG(uap, link)));
1596 }
1597 
1598 int
1599 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1600 {
1601 	struct vattr vattr;
1602 	char *path;
1603 	int error;
1604 	struct nameidata nd;
1605 
1606 	path = pool_get(&namei_pool, PR_WAITOK);
1607 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1608 	if (error)
1609 		goto out;
1610 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1611 	nd.ni_pledge = PLEDGE_CPATH;
1612 	nd.ni_unveil = UNVEIL_CREATE;
1613 	if ((error = namei(&nd)) != 0)
1614 		goto out;
1615 	if (nd.ni_vp) {
1616 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1617 		if (nd.ni_dvp == nd.ni_vp)
1618 			vrele(nd.ni_dvp);
1619 		else
1620 			vput(nd.ni_dvp);
1621 		vrele(nd.ni_vp);
1622 		error = EEXIST;
1623 		goto out;
1624 	}
1625 	VATTR_NULL(&vattr);
1626 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1627 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1628 out:
1629 	pool_put(&namei_pool, path);
1630 	return (error);
1631 }
1632 
1633 /*
1634  * Delete a name from the filesystem.
1635  */
1636 int
1637 sys_unlink(struct proc *p, void *v, register_t *retval)
1638 {
1639 	struct sys_unlink_args /* {
1640 		syscallarg(const char *) path;
1641 	} */ *uap = v;
1642 
1643 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1644 }
1645 
1646 int
1647 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1648 {
1649 	struct sys_unlinkat_args /* {
1650 		syscallarg(int) fd;
1651 		syscallarg(const char *) path;
1652 		syscallarg(int) flag;
1653 	} */ *uap = v;
1654 
1655 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1656 	    SCARG(uap, flag)));
1657 }
1658 
1659 int
1660 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1661 {
1662 	struct vnode *vp;
1663 	int error;
1664 	struct nameidata nd;
1665 
1666 	if (flag & ~AT_REMOVEDIR)
1667 		return (EINVAL);
1668 
1669 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1670 	    fd, path, p);
1671 	nd.ni_pledge = PLEDGE_CPATH;
1672 	nd.ni_unveil = UNVEIL_CREATE;
1673 	if ((error = namei(&nd)) != 0)
1674 		return (error);
1675 	vp = nd.ni_vp;
1676 
1677 	if (flag & AT_REMOVEDIR) {
1678 		if (vp->v_type != VDIR) {
1679 			error = ENOTDIR;
1680 			goto out;
1681 		}
1682 		/*
1683 		 * No rmdir "." please.
1684 		 */
1685 		if (nd.ni_dvp == vp) {
1686 			error = EINVAL;
1687 			goto out;
1688 		}
1689 		/*
1690 		 * A mounted on directory cannot be deleted.
1691 		 */
1692 		if (vp->v_mountedhere != NULL) {
1693 			error = EBUSY;
1694 			goto out;
1695 		}
1696 	}
1697 
1698 	/*
1699 	 * The root of a mounted filesystem cannot be deleted.
1700 	 */
1701 	if (vp->v_flag & VROOT)
1702 		error = EBUSY;
1703 out:
1704 	if (!error) {
1705 		if (flag & AT_REMOVEDIR) {
1706 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1707 		} else {
1708 			(void)uvm_vnp_uncache(vp);
1709 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1710 		}
1711 	} else {
1712 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1713 		if (nd.ni_dvp == vp)
1714 			vrele(nd.ni_dvp);
1715 		else
1716 			vput(nd.ni_dvp);
1717 		vput(vp);
1718 	}
1719 	return (error);
1720 }
1721 
1722 /*
1723  * Reposition read/write file offset.
1724  */
1725 int
1726 sys_lseek(struct proc *p, void *v, register_t *retval)
1727 {
1728 	struct sys_lseek_args /* {
1729 		syscallarg(int) fd;
1730 		syscallarg(int) pad;
1731 		syscallarg(off_t) offset;
1732 		syscallarg(int) whence;
1733 	} */ *uap = v;
1734 	struct filedesc *fdp = p->p_fd;
1735 	struct file *fp;
1736 	off_t offset;
1737 	int error;
1738 
1739 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1740 		return (EBADF);
1741 	if (fp->f_ops->fo_seek == NULL) {
1742 		error = ESPIPE;
1743 		goto bad;
1744 	}
1745 	offset = SCARG(uap, offset);
1746 
1747 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1748 	if (error)
1749 		goto bad;
1750 
1751 	*(off_t *)retval = offset;
1752 	mtx_enter(&fp->f_mtx);
1753 	fp->f_seek++;
1754 	mtx_leave(&fp->f_mtx);
1755 	error = 0;
1756  bad:
1757 	FRELE(fp, p);
1758 	return (error);
1759 }
1760 
1761 /*
1762  * Check access permissions.
1763  */
1764 int
1765 sys_access(struct proc *p, void *v, register_t *retval)
1766 {
1767 	struct sys_access_args /* {
1768 		syscallarg(const char *) path;
1769 		syscallarg(int) amode;
1770 	} */ *uap = v;
1771 
1772 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1773 	    SCARG(uap, amode), 0));
1774 }
1775 
1776 int
1777 sys_faccessat(struct proc *p, void *v, register_t *retval)
1778 {
1779 	struct sys_faccessat_args /* {
1780 		syscallarg(int) fd;
1781 		syscallarg(const char *) path;
1782 		syscallarg(int) amode;
1783 		syscallarg(int) flag;
1784 	} */ *uap = v;
1785 
1786 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1787 	    SCARG(uap, amode), SCARG(uap, flag)));
1788 }
1789 
1790 int
1791 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1792 {
1793 	struct vnode *vp;
1794 	struct ucred *newcred, *oldcred;
1795 	struct nameidata nd;
1796 	int error;
1797 
1798 	if (amode & ~(R_OK | W_OK | X_OK))
1799 		return (EINVAL);
1800 	if (flag & ~AT_EACCESS)
1801 		return (EINVAL);
1802 
1803 	newcred = NULL;
1804 	oldcred = p->p_ucred;
1805 
1806 	/*
1807 	 * If access as real ids was requested and they really differ,
1808 	 * give the thread new creds with them reset
1809 	 */
1810 	if ((flag & AT_EACCESS) == 0 &&
1811 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1812 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1813 		p->p_ucred = newcred = crdup(oldcred);
1814 		newcred->cr_uid = newcred->cr_ruid;
1815 		newcred->cr_gid = newcred->cr_rgid;
1816 	}
1817 
1818 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1819 	nd.ni_pledge = PLEDGE_RPATH;
1820 	nd.ni_unveil = UNVEIL_READ;
1821 	if ((error = namei(&nd)) != 0)
1822 		goto out;
1823 	vp = nd.ni_vp;
1824 
1825 	/* Flags == 0 means only check for existence. */
1826 	if (amode) {
1827 		int vflags = 0;
1828 
1829 		if (amode & R_OK)
1830 			vflags |= VREAD;
1831 		if (amode & W_OK)
1832 			vflags |= VWRITE;
1833 		if (amode & X_OK)
1834 			vflags |= VEXEC;
1835 
1836 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
1837 		if (!error && (vflags & VWRITE))
1838 			error = vn_writechk(vp);
1839 	}
1840 	vput(vp);
1841 out:
1842 	if (newcred != NULL) {
1843 		p->p_ucred = oldcred;
1844 		crfree(newcred);
1845 	}
1846 	return (error);
1847 }
1848 
1849 /*
1850  * Get file status; this version follows links.
1851  */
1852 int
1853 sys_stat(struct proc *p, void *v, register_t *retval)
1854 {
1855 	struct sys_stat_args /* {
1856 		syscallarg(const char *) path;
1857 		syscallarg(struct stat *) ub;
1858 	} */ *uap = v;
1859 
1860 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
1861 }
1862 
1863 int
1864 sys_fstatat(struct proc *p, void *v, register_t *retval)
1865 {
1866 	struct sys_fstatat_args /* {
1867 		syscallarg(int) fd;
1868 		syscallarg(const char *) path;
1869 		syscallarg(struct stat *) buf;
1870 		syscallarg(int) flag;
1871 	} */ *uap = v;
1872 
1873 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
1874 	    SCARG(uap, buf), SCARG(uap, flag)));
1875 }
1876 
1877 int
1878 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
1879 {
1880 	struct stat sb;
1881 	int error, follow;
1882 	struct nameidata nd;
1883 
1884 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1885 		return (EINVAL);
1886 
1887 
1888 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1889 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1890 	nd.ni_pledge = PLEDGE_RPATH;
1891 	nd.ni_unveil = UNVEIL_READ;
1892 	if ((error = namei(&nd)) != 0)
1893 		return (error);
1894 	error = vn_stat(nd.ni_vp, &sb, p);
1895 	vput(nd.ni_vp);
1896 	if (error)
1897 		return (error);
1898 	if (nd.ni_pledge & PLEDGE_STATLIE) {
1899 		if (S_ISDIR(sb.st_mode) || S_ISLNK(sb.st_mode)) {
1900 			if (sb.st_uid >= 1000) {
1901 				sb.st_uid = p->p_ucred->cr_uid;
1902 				sb.st_gid = p->p_ucred->cr_gid;;
1903 			}
1904 			sb.st_gen = 0;
1905 		} else
1906 			return (ENOENT);
1907 	}
1908 	/* Don't let non-root see generation numbers (for NFS security) */
1909 	if (suser(p))
1910 		sb.st_gen = 0;
1911 	error = copyout(&sb, buf, sizeof(sb));
1912 #ifdef KTRACE
1913 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
1914 		ktrstat(p, &sb);
1915 #endif
1916 	return (error);
1917 }
1918 
1919 /*
1920  * Get file status; this version does not follow links.
1921  */
1922 int
1923 sys_lstat(struct proc *p, void *v, register_t *retval)
1924 {
1925 	struct sys_lstat_args /* {
1926 		syscallarg(const char *) path;
1927 		syscallarg(struct stat *) ub;
1928 	} */ *uap = v;
1929 
1930 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
1931 	    AT_SYMLINK_NOFOLLOW));
1932 }
1933 
1934 /*
1935  * Get configurable pathname variables.
1936  */
1937 int
1938 sys_pathconf(struct proc *p, void *v, register_t *retval)
1939 {
1940 	struct sys_pathconf_args /* {
1941 		syscallarg(const char *) path;
1942 		syscallarg(int) name;
1943 	} */ *uap = v;
1944 	int error;
1945 	struct nameidata nd;
1946 
1947 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1948 	    SCARG(uap, path), p);
1949 	nd.ni_pledge = PLEDGE_RPATH;
1950 	nd.ni_unveil = UNVEIL_READ;
1951 	if ((error = namei(&nd)) != 0)
1952 		return (error);
1953 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1954 	vput(nd.ni_vp);
1955 	return (error);
1956 }
1957 
1958 /*
1959  * Return target name of a symbolic link.
1960  */
1961 int
1962 sys_readlink(struct proc *p, void *v, register_t *retval)
1963 {
1964 	struct sys_readlink_args /* {
1965 		syscallarg(const char *) path;
1966 		syscallarg(char *) buf;
1967 		syscallarg(size_t) count;
1968 	} */ *uap = v;
1969 
1970 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
1971 	    SCARG(uap, count), retval));
1972 }
1973 
1974 int
1975 sys_readlinkat(struct proc *p, void *v, register_t *retval)
1976 {
1977 	struct sys_readlinkat_args /* {
1978 		syscallarg(int) fd;
1979 		syscallarg(const char *) path;
1980 		syscallarg(char *) buf;
1981 		syscallarg(size_t) count;
1982 	} */ *uap = v;
1983 
1984 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1985 	    SCARG(uap, buf), SCARG(uap, count), retval));
1986 }
1987 
1988 int
1989 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
1990     size_t count, register_t *retval)
1991 {
1992 	struct vnode *vp;
1993 	struct iovec aiov;
1994 	struct uio auio;
1995 	int error;
1996 	struct nameidata nd;
1997 
1998 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1999 	nd.ni_pledge = PLEDGE_RPATH;
2000 	nd.ni_unveil = UNVEIL_INSPECT;
2001 	if ((error = namei(&nd)) != 0)
2002 		return (error);
2003 	vp = nd.ni_vp;
2004 	if (vp->v_type != VLNK)
2005 		error = EINVAL;
2006 	else {
2007 		aiov.iov_base = buf;
2008 		aiov.iov_len = count;
2009 		auio.uio_iov = &aiov;
2010 		auio.uio_iovcnt = 1;
2011 		auio.uio_offset = 0;
2012 		auio.uio_rw = UIO_READ;
2013 		auio.uio_segflg = UIO_USERSPACE;
2014 		auio.uio_procp = p;
2015 		auio.uio_resid = count;
2016 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2017 		*retval = count - auio.uio_resid;
2018 	}
2019 	vput(vp);
2020 	return (error);
2021 }
2022 
2023 /*
2024  * Change flags of a file given a path name.
2025  */
2026 int
2027 sys_chflags(struct proc *p, void *v, register_t *retval)
2028 {
2029 	struct sys_chflags_args /* {
2030 		syscallarg(const char *) path;
2031 		syscallarg(u_int) flags;
2032 	} */ *uap = v;
2033 
2034 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2035 	    SCARG(uap, flags), 0));
2036 }
2037 
2038 int
2039 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2040 {
2041 	struct sys_chflagsat_args /* {
2042 		syscallarg(int) fd;
2043 		syscallarg(const char *) path;
2044 		syscallarg(u_int) flags;
2045 		syscallarg(int) atflags;
2046 	} */ *uap = v;
2047 
2048 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2049 	    SCARG(uap, flags), SCARG(uap, atflags)));
2050 }
2051 
2052 int
2053 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2054 {
2055 	struct nameidata nd;
2056 	int error, follow;
2057 
2058 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2059 		return (EINVAL);
2060 
2061 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2062 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2063 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2064 	nd.ni_unveil = UNVEIL_WRITE;
2065 	if ((error = namei(&nd)) != 0)
2066 		return (error);
2067 	return (dovchflags(p, nd.ni_vp, flags));
2068 }
2069 
2070 /*
2071  * Change flags of a file given a file descriptor.
2072  */
2073 int
2074 sys_fchflags(struct proc *p, void *v, register_t *retval)
2075 {
2076 	struct sys_fchflags_args /* {
2077 		syscallarg(int) fd;
2078 		syscallarg(u_int) flags;
2079 	} */ *uap = v;
2080 	struct file *fp;
2081 	struct vnode *vp;
2082 	int error;
2083 
2084 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2085 		return (error);
2086 	vp = fp->f_data;
2087 	vref(vp);
2088 	FRELE(fp, p);
2089 	return (dovchflags(p, vp, SCARG(uap, flags)));
2090 }
2091 
2092 int
2093 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2094 {
2095 	struct vattr vattr;
2096 	int error;
2097 
2098 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2099 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2100 		error = EROFS;
2101 	else if (flags == VNOVAL)
2102 		error = EINVAL;
2103 	else {
2104 		if (suser(p)) {
2105 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2106 			    != 0)
2107 				goto out;
2108 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2109 				error = EINVAL;
2110 				goto out;
2111 			}
2112 		}
2113 		VATTR_NULL(&vattr);
2114 		vattr.va_flags = flags;
2115 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2116 	}
2117 out:
2118 	vput(vp);
2119 	return (error);
2120 }
2121 
2122 /*
2123  * Change mode of a file given path name.
2124  */
2125 int
2126 sys_chmod(struct proc *p, void *v, register_t *retval)
2127 {
2128 	struct sys_chmod_args /* {
2129 		syscallarg(const char *) path;
2130 		syscallarg(mode_t) mode;
2131 	} */ *uap = v;
2132 
2133 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2134 }
2135 
2136 int
2137 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2138 {
2139 	struct sys_fchmodat_args /* {
2140 		syscallarg(int) fd;
2141 		syscallarg(const char *) path;
2142 		syscallarg(mode_t) mode;
2143 		syscallarg(int) flag;
2144 	} */ *uap = v;
2145 
2146 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2147 	    SCARG(uap, mode), SCARG(uap, flag)));
2148 }
2149 
2150 int
2151 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2152 {
2153 	struct vnode *vp;
2154 	struct vattr vattr;
2155 	int error, follow;
2156 	struct nameidata nd;
2157 
2158 	if (mode & ~(S_IFMT | ALLPERMS))
2159 		return (EINVAL);
2160 	if ((p->p_p->ps_flags & PS_PLEDGE))
2161 		mode &= ACCESSPERMS;
2162 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2163 		return (EINVAL);
2164 
2165 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2166 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2167 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2168 	nd.ni_unveil = UNVEIL_WRITE;
2169 	if ((error = namei(&nd)) != 0)
2170 		return (error);
2171 	vp = nd.ni_vp;
2172 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2173 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2174 		error = EROFS;
2175 	else {
2176 		VATTR_NULL(&vattr);
2177 		vattr.va_mode = mode & ALLPERMS;
2178 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2179 	}
2180 	vput(vp);
2181 	return (error);
2182 }
2183 
2184 /*
2185  * Change mode of a file given a file descriptor.
2186  */
2187 int
2188 sys_fchmod(struct proc *p, void *v, register_t *retval)
2189 {
2190 	struct sys_fchmod_args /* {
2191 		syscallarg(int) fd;
2192 		syscallarg(mode_t) mode;
2193 	} */ *uap = v;
2194 	struct vattr vattr;
2195 	struct vnode *vp;
2196 	struct file *fp;
2197 	mode_t mode = SCARG(uap, mode);
2198 	int error;
2199 
2200 	if (mode & ~(S_IFMT | ALLPERMS))
2201 		return (EINVAL);
2202 	if ((p->p_p->ps_flags & PS_PLEDGE))
2203 		mode &= ACCESSPERMS;
2204 
2205 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2206 		return (error);
2207 	vp = fp->f_data;
2208 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2209 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2210 		error = EROFS;
2211 	else {
2212 		VATTR_NULL(&vattr);
2213 		vattr.va_mode = mode & ALLPERMS;
2214 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2215 	}
2216 	VOP_UNLOCK(vp);
2217 	FRELE(fp, p);
2218 	return (error);
2219 }
2220 
2221 /*
2222  * Set ownership given a path name.
2223  */
2224 int
2225 sys_chown(struct proc *p, void *v, register_t *retval)
2226 {
2227 	struct sys_chown_args /* {
2228 		syscallarg(const char *) path;
2229 		syscallarg(uid_t) uid;
2230 		syscallarg(gid_t) gid;
2231 	} */ *uap = v;
2232 
2233 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2234 	    SCARG(uap, gid), 0));
2235 }
2236 
2237 int
2238 sys_fchownat(struct proc *p, void *v, register_t *retval)
2239 {
2240 	struct sys_fchownat_args /* {
2241 		syscallarg(int) fd;
2242 		syscallarg(const char *) path;
2243 		syscallarg(uid_t) uid;
2244 		syscallarg(gid_t) gid;
2245 		syscallarg(int) flag;
2246 	} */ *uap = v;
2247 
2248 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2249 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2250 }
2251 
2252 int
2253 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2254     int flag)
2255 {
2256 	struct vnode *vp;
2257 	struct vattr vattr;
2258 	int error, follow;
2259 	struct nameidata nd;
2260 	mode_t mode;
2261 
2262 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2263 		return (EINVAL);
2264 
2265 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2266 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2267 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2268 	nd.ni_unveil = UNVEIL_WRITE;
2269 	if ((error = namei(&nd)) != 0)
2270 		return (error);
2271 	vp = nd.ni_vp;
2272 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2273 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2274 		error = EROFS;
2275 	else {
2276 		if ((error = pledge_chown(p, uid, gid)))
2277 			goto out;
2278 		if ((uid != -1 || gid != -1) &&
2279 		    !vnoperm(vp) &&
2280 		    (suser(p) || suid_clear)) {
2281 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2282 			if (error)
2283 				goto out;
2284 			mode = vattr.va_mode & ~(VSUID | VSGID);
2285 			if (mode == vattr.va_mode)
2286 				mode = VNOVAL;
2287 		} else
2288 			mode = VNOVAL;
2289 		VATTR_NULL(&vattr);
2290 		vattr.va_uid = uid;
2291 		vattr.va_gid = gid;
2292 		vattr.va_mode = mode;
2293 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2294 	}
2295 out:
2296 	vput(vp);
2297 	return (error);
2298 }
2299 
2300 /*
2301  * Set ownership given a path name, without following links.
2302  */
2303 int
2304 sys_lchown(struct proc *p, void *v, register_t *retval)
2305 {
2306 	struct sys_lchown_args /* {
2307 		syscallarg(const char *) path;
2308 		syscallarg(uid_t) uid;
2309 		syscallarg(gid_t) gid;
2310 	} */ *uap = v;
2311 	struct vnode *vp;
2312 	struct vattr vattr;
2313 	int error;
2314 	struct nameidata nd;
2315 	mode_t mode;
2316 	uid_t uid = SCARG(uap, uid);
2317 	gid_t gid = SCARG(uap, gid);
2318 
2319 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2320 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2321 	nd.ni_unveil = UNVEIL_WRITE;
2322 	if ((error = namei(&nd)) != 0)
2323 		return (error);
2324 	vp = nd.ni_vp;
2325 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2326 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2327 		error = EROFS;
2328 	else {
2329 		if ((error = pledge_chown(p, uid, gid)))
2330 			goto out;
2331 		if ((uid != -1 || gid != -1) &&
2332 		    !vnoperm(vp) &&
2333 		    (suser(p) || suid_clear)) {
2334 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2335 			if (error)
2336 				goto out;
2337 			mode = vattr.va_mode & ~(VSUID | VSGID);
2338 			if (mode == vattr.va_mode)
2339 				mode = VNOVAL;
2340 		} else
2341 			mode = VNOVAL;
2342 		VATTR_NULL(&vattr);
2343 		vattr.va_uid = uid;
2344 		vattr.va_gid = gid;
2345 		vattr.va_mode = mode;
2346 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2347 	}
2348 out:
2349 	vput(vp);
2350 	return (error);
2351 }
2352 
2353 /*
2354  * Set ownership given a file descriptor.
2355  */
2356 int
2357 sys_fchown(struct proc *p, void *v, register_t *retval)
2358 {
2359 	struct sys_fchown_args /* {
2360 		syscallarg(int) fd;
2361 		syscallarg(uid_t) uid;
2362 		syscallarg(gid_t) gid;
2363 	} */ *uap = v;
2364 	struct vnode *vp;
2365 	struct vattr vattr;
2366 	int error;
2367 	struct file *fp;
2368 	mode_t mode;
2369 	uid_t uid = SCARG(uap, uid);
2370 	gid_t gid = SCARG(uap, gid);
2371 
2372 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2373 		return (error);
2374 	vp = fp->f_data;
2375 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2376 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2377 		error = EROFS;
2378 	else {
2379 		if ((error = pledge_chown(p, uid, gid)))
2380 			goto out;
2381 		if ((uid != -1 || gid != -1) &&
2382 		    !vnoperm(vp) &&
2383 		    (suser(p) || suid_clear)) {
2384 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2385 			if (error)
2386 				goto out;
2387 			mode = vattr.va_mode & ~(VSUID | VSGID);
2388 			if (mode == vattr.va_mode)
2389 				mode = VNOVAL;
2390 		} else
2391 			mode = VNOVAL;
2392 		VATTR_NULL(&vattr);
2393 		vattr.va_uid = uid;
2394 		vattr.va_gid = gid;
2395 		vattr.va_mode = mode;
2396 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2397 	}
2398 out:
2399 	VOP_UNLOCK(vp);
2400 	FRELE(fp, p);
2401 	return (error);
2402 }
2403 
2404 /*
2405  * Set the access and modification times given a path name.
2406  */
2407 int
2408 sys_utimes(struct proc *p, void *v, register_t *retval)
2409 {
2410 	struct sys_utimes_args /* {
2411 		syscallarg(const char *) path;
2412 		syscallarg(const struct timeval *) tptr;
2413 	} */ *uap = v;
2414 
2415 	struct timespec ts[2];
2416 	struct timeval tv[2];
2417 	const struct timeval *tvp;
2418 	int error;
2419 
2420 	tvp = SCARG(uap, tptr);
2421 	if (tvp != NULL) {
2422 		error = copyin(tvp, tv, sizeof(tv));
2423 		if (error)
2424 			return (error);
2425 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2426 			return (EINVAL);
2427 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2428 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2429 	} else
2430 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2431 
2432 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2433 }
2434 
2435 int
2436 sys_utimensat(struct proc *p, void *v, register_t *retval)
2437 {
2438 	struct sys_utimensat_args /* {
2439 		syscallarg(int) fd;
2440 		syscallarg(const char *) path;
2441 		syscallarg(const struct timespec *) times;
2442 		syscallarg(int) flag;
2443 	} */ *uap = v;
2444 
2445 	struct timespec ts[2];
2446 	const struct timespec *tsp;
2447 	int error, i;
2448 
2449 	tsp = SCARG(uap, times);
2450 	if (tsp != NULL) {
2451 		error = copyin(tsp, ts, sizeof(ts));
2452 		if (error)
2453 			return (error);
2454 		for (i = 0; i < nitems(ts); i++) {
2455 			if (ts[i].tv_nsec == UTIME_NOW)
2456 				continue;
2457 			if (ts[i].tv_nsec == UTIME_OMIT)
2458 				continue;
2459 			if (!timespecisvalid(&ts[i]))
2460 				return (EINVAL);
2461 		}
2462 	} else
2463 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2464 
2465 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2466 	    SCARG(uap, flag)));
2467 }
2468 
2469 int
2470 doutimensat(struct proc *p, int fd, const char *path,
2471     struct timespec ts[2], int flag)
2472 {
2473 	struct vnode *vp;
2474 	int error, follow;
2475 	struct nameidata nd;
2476 
2477 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2478 		return (EINVAL);
2479 
2480 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2481 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2482 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2483 	nd.ni_unveil = UNVEIL_WRITE;
2484 	if ((error = namei(&nd)) != 0)
2485 		return (error);
2486 	vp = nd.ni_vp;
2487 
2488 	return (dovutimens(p, vp, ts));
2489 }
2490 
2491 int
2492 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2493 {
2494 	struct vattr vattr;
2495 	struct timespec now;
2496 	int error;
2497 
2498 #ifdef KTRACE
2499 	/* if they're both UTIME_NOW, then don't report either */
2500 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2501 	    KTRPOINT(p, KTR_STRUCT)) {
2502 		ktrabstimespec(p, &ts[0]);
2503 		ktrabstimespec(p, &ts[1]);
2504 	}
2505 #endif
2506 
2507 	VATTR_NULL(&vattr);
2508 
2509 	/*  make sure ctime is updated even if neither mtime nor atime is */
2510 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2511 
2512 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2513 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2514 			vattr.va_vaflags |= VA_UTIMES_NULL;
2515 
2516 		getnanotime(&now);
2517 		if (ts[0].tv_nsec == UTIME_NOW)
2518 			ts[0] = now;
2519 		if (ts[1].tv_nsec == UTIME_NOW)
2520 			ts[1] = now;
2521 	}
2522 
2523 	if (ts[0].tv_nsec != UTIME_OMIT)
2524 		vattr.va_atime = ts[0];
2525 	if (ts[1].tv_nsec != UTIME_OMIT)
2526 		vattr.va_mtime = ts[1];
2527 
2528 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2529 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2530 		error = EROFS;
2531 	else
2532 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2533 	vput(vp);
2534 	return (error);
2535 }
2536 
2537 /*
2538  * Set the access and modification times given a file descriptor.
2539  */
2540 int
2541 sys_futimes(struct proc *p, void *v, register_t *retval)
2542 {
2543 	struct sys_futimes_args /* {
2544 		syscallarg(int) fd;
2545 		syscallarg(const struct timeval *) tptr;
2546 	} */ *uap = v;
2547 	struct timeval tv[2];
2548 	struct timespec ts[2];
2549 	const struct timeval *tvp;
2550 	int error;
2551 
2552 	tvp = SCARG(uap, tptr);
2553 	if (tvp != NULL) {
2554 		error = copyin(tvp, tv, sizeof(tv));
2555 		if (error)
2556 			return (error);
2557 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2558 			return (EINVAL);
2559 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2560 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2561 	} else
2562 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2563 
2564 	return (dofutimens(p, SCARG(uap, fd), ts));
2565 }
2566 
2567 int
2568 sys_futimens(struct proc *p, void *v, register_t *retval)
2569 {
2570 	struct sys_futimens_args /* {
2571 		syscallarg(int) fd;
2572 		syscallarg(const struct timespec *) times;
2573 	} */ *uap = v;
2574 	struct timespec ts[2];
2575 	const struct timespec *tsp;
2576 	int error, i;
2577 
2578 	tsp = SCARG(uap, times);
2579 	if (tsp != NULL) {
2580 		error = copyin(tsp, ts, sizeof(ts));
2581 		if (error)
2582 			return (error);
2583 		for (i = 0; i < nitems(ts); i++) {
2584 			if (ts[i].tv_nsec == UTIME_NOW)
2585 				continue;
2586 			if (ts[i].tv_nsec == UTIME_OMIT)
2587 				continue;
2588 			if (!timespecisvalid(&ts[i]))
2589 				return (EINVAL);
2590 		}
2591 	} else
2592 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2593 
2594 	return (dofutimens(p, SCARG(uap, fd), ts));
2595 }
2596 
2597 int
2598 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2599 {
2600 	struct file *fp;
2601 	struct vnode *vp;
2602 	int error;
2603 
2604 	if ((error = getvnode(p, fd, &fp)) != 0)
2605 		return (error);
2606 	vp = fp->f_data;
2607 	vref(vp);
2608 	FRELE(fp, p);
2609 
2610 	return (dovutimens(p, vp, ts));
2611 }
2612 
2613 /*
2614  * Truncate a file given its path name.
2615  */
2616 int
2617 sys_truncate(struct proc *p, void *v, register_t *retval)
2618 {
2619 	struct sys_truncate_args /* {
2620 		syscallarg(const char *) path;
2621 		syscallarg(int) pad;
2622 		syscallarg(off_t) length;
2623 	} */ *uap = v;
2624 	struct vnode *vp;
2625 	struct vattr vattr;
2626 	int error;
2627 	struct nameidata nd;
2628 
2629 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2630 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2631 	nd.ni_unveil = UNVEIL_WRITE;
2632 	if ((error = namei(&nd)) != 0)
2633 		return (error);
2634 	vp = nd.ni_vp;
2635 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2636 	if (vp->v_type == VDIR)
2637 		error = EISDIR;
2638 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2639 	    (error = vn_writechk(vp)) == 0) {
2640 		VATTR_NULL(&vattr);
2641 		vattr.va_size = SCARG(uap, length);
2642 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2643 	}
2644 	vput(vp);
2645 	return (error);
2646 }
2647 
2648 /*
2649  * Truncate a file given a file descriptor.
2650  */
2651 int
2652 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2653 {
2654 	struct sys_ftruncate_args /* {
2655 		syscallarg(int) fd;
2656 		syscallarg(int) pad;
2657 		syscallarg(off_t) length;
2658 	} */ *uap = v;
2659 	struct vattr vattr;
2660 	struct vnode *vp;
2661 	struct file *fp;
2662 	off_t len;
2663 	int error;
2664 
2665 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2666 		return (error);
2667 	len = SCARG(uap, length);
2668 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2669 		error = EINVAL;
2670 		goto bad;
2671 	}
2672 	vp = fp->f_data;
2673 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2674 	if (vp->v_type == VDIR)
2675 		error = EISDIR;
2676 	else if ((error = vn_writechk(vp)) == 0) {
2677 		VATTR_NULL(&vattr);
2678 		vattr.va_size = len;
2679 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2680 	}
2681 	VOP_UNLOCK(vp);
2682 bad:
2683 	FRELE(fp, p);
2684 	return (error);
2685 }
2686 
2687 /*
2688  * Sync an open file.
2689  */
2690 int
2691 sys_fsync(struct proc *p, void *v, register_t *retval)
2692 {
2693 	struct sys_fsync_args /* {
2694 		syscallarg(int) fd;
2695 	} */ *uap = v;
2696 	struct vnode *vp;
2697 	struct file *fp;
2698 	int error;
2699 
2700 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2701 		return (error);
2702 	vp = fp->f_data;
2703 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2704 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2705 #ifdef FFS_SOFTUPDATES
2706 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2707 		error = softdep_fsync(vp);
2708 #endif
2709 
2710 	VOP_UNLOCK(vp);
2711 	FRELE(fp, p);
2712 	return (error);
2713 }
2714 
2715 /*
2716  * Rename files.  Source and destination must either both be directories,
2717  * or both not be directories.  If target is a directory, it must be empty.
2718  */
2719 int
2720 sys_rename(struct proc *p, void *v, register_t *retval)
2721 {
2722 	struct sys_rename_args /* {
2723 		syscallarg(const char *) from;
2724 		syscallarg(const char *) to;
2725 	} */ *uap = v;
2726 
2727 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2728 	    SCARG(uap, to)));
2729 }
2730 
2731 int
2732 sys_renameat(struct proc *p, void *v, register_t *retval)
2733 {
2734 	struct sys_renameat_args /* {
2735 		syscallarg(int) fromfd;
2736 		syscallarg(const char *) from;
2737 		syscallarg(int) tofd;
2738 		syscallarg(const char *) to;
2739 	} */ *uap = v;
2740 
2741 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2742 	    SCARG(uap, tofd), SCARG(uap, to)));
2743 }
2744 
2745 int
2746 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2747     const char *to)
2748 {
2749 	struct vnode *tvp, *fvp, *tdvp;
2750 	struct nameidata fromnd, tond;
2751 	int error;
2752 	int flags;
2753 
2754 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2755 	    fromfd, from, p);
2756 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2757 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2758 	if ((error = namei(&fromnd)) != 0)
2759 		return (error);
2760 	fvp = fromnd.ni_vp;
2761 
2762 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2763 	/*
2764 	 * rename("foo/", "bar/");  is  OK
2765 	 */
2766 	if (fvp->v_type == VDIR)
2767 		flags |= STRIPSLASHES;
2768 
2769 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2770 	tond.ni_pledge = PLEDGE_CPATH;
2771 	tond.ni_unveil = UNVEIL_CREATE;
2772 	if ((error = namei(&tond)) != 0) {
2773 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2774 		vrele(fromnd.ni_dvp);
2775 		vrele(fvp);
2776 		goto out1;
2777 	}
2778 	tdvp = tond.ni_dvp;
2779 	tvp = tond.ni_vp;
2780 	if (tvp != NULL) {
2781 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2782 			error = ENOTDIR;
2783 			goto out;
2784 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2785 			error = EISDIR;
2786 			goto out;
2787 		}
2788 	}
2789 	if (fvp == tdvp)
2790 		error = EINVAL;
2791 	/*
2792 	 * If source is the same as the destination (that is the
2793 	 * same inode number)
2794 	 */
2795 	if (fvp == tvp)
2796 		error = -1;
2797 out:
2798 	if (!error) {
2799 		if (tvp) {
2800 			(void)uvm_vnp_uncache(tvp);
2801 		}
2802 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2803 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2804 	} else {
2805 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2806 		if (tdvp == tvp)
2807 			vrele(tdvp);
2808 		else
2809 			vput(tdvp);
2810 		if (tvp)
2811 			vput(tvp);
2812 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2813 		vrele(fromnd.ni_dvp);
2814 		vrele(fvp);
2815 	}
2816 	vrele(tond.ni_startdir);
2817 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2818 out1:
2819 	if (fromnd.ni_startdir)
2820 		vrele(fromnd.ni_startdir);
2821 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2822 	if (error == -1)
2823 		return (0);
2824 	return (error);
2825 }
2826 
2827 /*
2828  * Make a directory file.
2829  */
2830 int
2831 sys_mkdir(struct proc *p, void *v, register_t *retval)
2832 {
2833 	struct sys_mkdir_args /* {
2834 		syscallarg(const char *) path;
2835 		syscallarg(mode_t) mode;
2836 	} */ *uap = v;
2837 
2838 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
2839 }
2840 
2841 int
2842 sys_mkdirat(struct proc *p, void *v, register_t *retval)
2843 {
2844 	struct sys_mkdirat_args /* {
2845 		syscallarg(int) fd;
2846 		syscallarg(const char *) path;
2847 		syscallarg(mode_t) mode;
2848 	} */ *uap = v;
2849 
2850 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
2851 	    SCARG(uap, mode)));
2852 }
2853 
2854 int
2855 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
2856 {
2857 	struct vnode *vp;
2858 	struct vattr vattr;
2859 	int error;
2860 	struct nameidata nd;
2861 
2862 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
2863 	    fd, path, p);
2864 	nd.ni_pledge = PLEDGE_CPATH;
2865 	nd.ni_unveil = UNVEIL_CREATE;
2866 	if ((error = namei(&nd)) != 0)
2867 		return (error);
2868 	vp = nd.ni_vp;
2869 	if (vp != NULL) {
2870 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2871 		if (nd.ni_dvp == vp)
2872 			vrele(nd.ni_dvp);
2873 		else
2874 			vput(nd.ni_dvp);
2875 		vrele(vp);
2876 		return (EEXIST);
2877 	}
2878 	VATTR_NULL(&vattr);
2879 	vattr.va_type = VDIR;
2880 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2881 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2882 	if (!error)
2883 		vput(nd.ni_vp);
2884 	return (error);
2885 }
2886 
2887 /*
2888  * Remove a directory file.
2889  */
2890 int
2891 sys_rmdir(struct proc *p, void *v, register_t *retval)
2892 {
2893 	struct sys_rmdir_args /* {
2894 		syscallarg(const char *) path;
2895 	} */ *uap = v;
2896 
2897 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
2898 }
2899 
2900 /*
2901  * Read a block of directory entries in a file system independent format.
2902  */
2903 int
2904 sys_getdents(struct proc *p, void *v, register_t *retval)
2905 {
2906 	struct sys_getdents_args /* {
2907 		syscallarg(int) fd;
2908 		syscallarg(void *) buf;
2909 		syscallarg(size_t) buflen;
2910 	} */ *uap = v;
2911 	struct vnode *vp;
2912 	struct file *fp;
2913 	struct uio auio;
2914 	struct iovec aiov;
2915 	size_t buflen;
2916 	int error, eofflag;
2917 
2918 	buflen = SCARG(uap, buflen);
2919 
2920 	if (buflen > INT_MAX)
2921 		return EINVAL;
2922 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2923 		return (error);
2924 	if ((fp->f_flag & FREAD) == 0) {
2925 		error = EBADF;
2926 		goto bad;
2927 	}
2928 	if (fp->f_offset < 0) {
2929 		error = EINVAL;
2930 		goto bad;
2931 	}
2932 	vp = fp->f_data;
2933 	if (vp->v_type != VDIR) {
2934 		error = EINVAL;
2935 		goto bad;
2936 	}
2937 	aiov.iov_base = SCARG(uap, buf);
2938 	aiov.iov_len = buflen;
2939 	auio.uio_iov = &aiov;
2940 	auio.uio_iovcnt = 1;
2941 	auio.uio_rw = UIO_READ;
2942 	auio.uio_segflg = UIO_USERSPACE;
2943 	auio.uio_procp = p;
2944 	auio.uio_resid = buflen;
2945 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2946 	auio.uio_offset = fp->f_offset;
2947 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
2948 	fp->f_offset = auio.uio_offset;
2949 	VOP_UNLOCK(vp);
2950 	if (error)
2951 		goto bad;
2952 	*retval = buflen - auio.uio_resid;
2953 bad:
2954 	FRELE(fp, p);
2955 	return (error);
2956 }
2957 
2958 /*
2959  * Set the mode mask for creation of filesystem nodes.
2960  */
2961 int
2962 sys_umask(struct proc *p, void *v, register_t *retval)
2963 {
2964 	struct sys_umask_args /* {
2965 		syscallarg(mode_t) newmask;
2966 	} */ *uap = v;
2967 	struct filedesc *fdp;
2968 
2969 	fdp = p->p_fd;
2970 	*retval = fdp->fd_cmask;
2971 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2972 	return (0);
2973 }
2974 
2975 /*
2976  * Void all references to file by ripping underlying filesystem
2977  * away from vnode.
2978  */
2979 int
2980 sys_revoke(struct proc *p, void *v, register_t *retval)
2981 {
2982 	struct sys_revoke_args /* {
2983 		syscallarg(const char *) path;
2984 	} */ *uap = v;
2985 	struct vnode *vp;
2986 	struct vattr vattr;
2987 	int error;
2988 	struct nameidata nd;
2989 
2990 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2991 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
2992 	nd.ni_unveil = UNVEIL_READ;
2993 	if ((error = namei(&nd)) != 0)
2994 		return (error);
2995 	vp = nd.ni_vp;
2996 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
2997 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
2998 		error = ENOTTY;
2999 		goto out;
3000 	}
3001 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3002 		goto out;
3003 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3004 	    (error = suser(p)))
3005 		goto out;
3006 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3007 		VOP_REVOKE(vp, REVOKEALL);
3008 out:
3009 	vrele(vp);
3010 	return (error);
3011 }
3012 
3013 /*
3014  * Convert a user file descriptor to a kernel file entry.
3015  *
3016  * On return *fpp is FREF:ed.
3017  */
3018 int
3019 getvnode(struct proc *p, int fd, struct file **fpp)
3020 {
3021 	struct file *fp;
3022 	struct vnode *vp;
3023 
3024 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3025 		return (EBADF);
3026 
3027 	if (fp->f_type != DTYPE_VNODE) {
3028 		FRELE(fp, p);
3029 		return (EINVAL);
3030 	}
3031 
3032 	vp = fp->f_data;
3033 	if (vp->v_type == VBAD) {
3034 		FRELE(fp, p);
3035 		return (EBADF);
3036 	}
3037 
3038 	*fpp = fp;
3039 
3040 	return (0);
3041 }
3042 
3043 /*
3044  * Positional read system call.
3045  */
3046 int
3047 sys_pread(struct proc *p, void *v, register_t *retval)
3048 {
3049 	struct sys_pread_args /* {
3050 		syscallarg(int) fd;
3051 		syscallarg(void *) buf;
3052 		syscallarg(size_t) nbyte;
3053 		syscallarg(int) pad;
3054 		syscallarg(off_t) offset;
3055 	} */ *uap = v;
3056 	struct iovec iov;
3057 	struct uio auio;
3058 
3059 	iov.iov_base = SCARG(uap, buf);
3060 	iov.iov_len = SCARG(uap, nbyte);
3061 	if (iov.iov_len > SSIZE_MAX)
3062 		return (EINVAL);
3063 
3064 	auio.uio_iov = &iov;
3065 	auio.uio_iovcnt = 1;
3066 	auio.uio_resid = iov.iov_len;
3067 	auio.uio_offset = SCARG(uap, offset);
3068 
3069 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3070 }
3071 
3072 /*
3073  * Positional scatter read system call.
3074  */
3075 int
3076 sys_preadv(struct proc *p, void *v, register_t *retval)
3077 {
3078 	struct sys_preadv_args /* {
3079 		syscallarg(int) fd;
3080 		syscallarg(const struct iovec *) iovp;
3081 		syscallarg(int) iovcnt;
3082 		syscallarg(int) pad;
3083 		syscallarg(off_t) offset;
3084 	} */ *uap = v;
3085 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3086 	int error, iovcnt = SCARG(uap, iovcnt);
3087 	struct uio auio;
3088 	size_t resid;
3089 
3090 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3091 	if (error)
3092 		goto done;
3093 
3094 	auio.uio_iov = iov;
3095 	auio.uio_iovcnt = iovcnt;
3096 	auio.uio_resid = resid;
3097 	auio.uio_offset = SCARG(uap, offset);
3098 
3099 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3100  done:
3101 	iovec_free(iov, iovcnt);
3102  	return (error);
3103 }
3104 
3105 /*
3106  * Positional write system call.
3107  */
3108 int
3109 sys_pwrite(struct proc *p, void *v, register_t *retval)
3110 {
3111 	struct sys_pwrite_args /* {
3112 		syscallarg(int) fd;
3113 		syscallarg(const void *) buf;
3114 		syscallarg(size_t) nbyte;
3115 		syscallarg(int) pad;
3116 		syscallarg(off_t) offset;
3117 	} */ *uap = v;
3118 	struct iovec iov;
3119 	struct uio auio;
3120 
3121 	iov.iov_base = (void *)SCARG(uap, buf);
3122 	iov.iov_len = SCARG(uap, nbyte);
3123 	if (iov.iov_len > SSIZE_MAX)
3124 		return (EINVAL);
3125 
3126 	auio.uio_iov = &iov;
3127 	auio.uio_iovcnt = 1;
3128 	auio.uio_resid = iov.iov_len;
3129 	auio.uio_offset = SCARG(uap, offset);
3130 
3131 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3132 }
3133 
3134 /*
3135  * Positional gather write system call.
3136  */
3137 int
3138 sys_pwritev(struct proc *p, void *v, register_t *retval)
3139 {
3140 	struct sys_pwritev_args /* {
3141 		syscallarg(int) fd;
3142 		syscallarg(const struct iovec *) iovp;
3143 		syscallarg(int) iovcnt;
3144 		syscallarg(int) pad;
3145 		syscallarg(off_t) offset;
3146 	} */ *uap = v;
3147 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3148 	int error, iovcnt = SCARG(uap, iovcnt);
3149 	struct uio auio;
3150 	size_t resid;
3151 
3152 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3153 	if (error)
3154 		goto done;
3155 
3156 	auio.uio_iov = iov;
3157 	auio.uio_iovcnt = iovcnt;
3158 	auio.uio_resid = resid;
3159 	auio.uio_offset = SCARG(uap, offset);
3160 
3161 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3162  done:
3163 	iovec_free(iov, iovcnt);
3164  	return (error);
3165 }
3166