xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision f1dd7b858388b4a23f4f67a4957ec5ff656ebbe8)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.349 2021/02/11 12:08:21 claudio Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/fcntl.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/lock.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/pledge.h>
55 #include <sys/uio.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/dkio.h>
59 #include <sys/disklabel.h>
60 #include <sys/ktrace.h>
61 #include <sys/unistd.h>
62 #include <sys/specdev.h>
63 
64 #include <sys/syscallargs.h>
65 
66 extern int suid_clear;
67 
68 static int change_dir(struct nameidata *, struct proc *);
69 
70 void checkdirs(struct vnode *);
71 
72 int copyout_statfs(struct statfs *, void *, struct proc *);
73 
74 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
75 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
76 int dolinkat(struct proc *, int, const char *, int, const char *, int);
77 int dosymlinkat(struct proc *, const char *, int, const char *);
78 int dounlinkat(struct proc *, int, const char *, int);
79 int dofaccessat(struct proc *, int, const char *, int, int);
80 int dofstatat(struct proc *, int, const char *, struct stat *, int);
81 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
82     register_t *);
83 int dochflagsat(struct proc *, int, const char *, u_int, int);
84 int dovchflags(struct proc *, struct vnode *, u_int);
85 int dofchmodat(struct proc *, int, const char *, mode_t, int);
86 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
87 int dorenameat(struct proc *, int, const char *, int, const char *);
88 int domkdirat(struct proc *, int, const char *, mode_t);
89 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
90 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
91 int dofutimens(struct proc *, int, struct timespec [2]);
92 int dounmount_leaf(struct mount *, int, struct proc *);
93 
94 /*
95  * Virtual File System System Calls
96  */
97 
98 /*
99  * Mount a file system.
100  */
101 int
102 sys_mount(struct proc *p, void *v, register_t *retval)
103 {
104 	struct sys_mount_args /* {
105 		syscallarg(const char *) type;
106 		syscallarg(const char *) path;
107 		syscallarg(int) flags;
108 		syscallarg(void *) data;
109 	} */ *uap = v;
110 	struct vnode *vp;
111 	struct mount *mp;
112 	int error, mntflag = 0;
113 	char fstypename[MFSNAMELEN];
114 	char fspath[MNAMELEN];
115 	struct nameidata nd;
116 	struct vfsconf *vfsp;
117 	int flags = SCARG(uap, flags);
118 	void *args = NULL;
119 
120 	if ((error = suser(p)))
121 		return (error);
122 
123 	/*
124 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
125 	 */
126 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
127 	if (error)
128 		return(error);
129 
130 	/*
131 	 * Get vnode to be covered
132 	 */
133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
134 	if ((error = namei(&nd)) != 0)
135 		goto fail;
136 	vp = nd.ni_vp;
137 	if (flags & MNT_UPDATE) {
138 		if ((vp->v_flag & VROOT) == 0) {
139 			vput(vp);
140 			error = EINVAL;
141 			goto fail;
142 		}
143 		mp = vp->v_mount;
144 		vfsp = mp->mnt_vfc;
145 
146 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
147 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
148 		if (error) {
149 			vput(vp);
150 			goto fail;
151 		}
152 
153 		mntflag = mp->mnt_flag;
154 		/*
155 		 * We only allow the filesystem to be reloaded if it
156 		 * is currently mounted read-only.
157 		 */
158 		if ((flags & MNT_RELOAD) &&
159 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 			vput(vp);
161 			error = EOPNOTSUPP;	/* Needs translation */
162 			goto fail;
163 		}
164 
165 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
166 			vput(vp);
167 			goto fail;
168 		}
169 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
170 		goto update;
171 	}
172 	/*
173 	 * Do not allow disabling of permission checks unless exec and access to
174 	 * device files is disabled too.
175 	 */
176 	if ((flags & MNT_NOPERM) &&
177 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
178 		vput(vp);
179 		error = EPERM;
180 		goto fail;
181 	}
182 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
183 		vput(vp);
184 		goto fail;
185 	}
186 	if (vp->v_type != VDIR) {
187 		vput(vp);
188 		goto fail;
189 	}
190 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
191 	if (error) {
192 		vput(vp);
193 		goto fail;
194 	}
195 	vfsp = vfs_byname(fstypename);
196 	if (vfsp == NULL) {
197 		vput(vp);
198 		error = EOPNOTSUPP;
199 		goto fail;
200 	}
201 
202 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
203 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
204 	if (error) {
205 		vput(vp);
206 		goto fail;
207 	}
208 
209 	if (vp->v_mountedhere != NULL) {
210 		vput(vp);
211 		error = EBUSY;
212 		goto fail;
213 	}
214 
215 	/*
216 	 * Allocate and initialize the file system.
217 	 */
218 	mp = vfs_mount_alloc(vp, vfsp);
219 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
220 
221 update:
222 	/* Ensure that the parent mountpoint does not get unmounted. */
223 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
224 	if (error) {
225 		if (mp->mnt_flag & MNT_UPDATE) {
226 			mp->mnt_flag = mntflag;
227 			vfs_unbusy(mp);
228 		} else {
229 			vfs_unbusy(mp);
230 			vfs_mount_free(mp);
231 		}
232 		vput(vp);
233 		goto fail;
234 	}
235 
236 	/*
237 	 * Set the mount level flags.
238 	 */
239 	if (flags & MNT_RDONLY)
240 		mp->mnt_flag |= MNT_RDONLY;
241 	else if (mp->mnt_flag & MNT_RDONLY)
242 		mp->mnt_flag |= MNT_WANTRDWR;
243 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
244 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
245 	    MNT_NOPERM | MNT_FORCE);
246 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
247 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
248 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
249 	/*
250 	 * Mount the filesystem.
251 	 */
252 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
253 	if (!error) {
254 		mp->mnt_stat.f_ctime = gettime();
255 	}
256 	if (mp->mnt_flag & MNT_UPDATE) {
257 		vfs_unbusy(vp->v_mount);
258 		vput(vp);
259 		if (mp->mnt_flag & MNT_WANTRDWR)
260 			mp->mnt_flag &= ~MNT_RDONLY;
261 		mp->mnt_flag &= ~MNT_OP_FLAGS;
262 		if (error)
263 			mp->mnt_flag = mntflag;
264 
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
266 			if (mp->mnt_syncer == NULL)
267 				error = vfs_allocate_syncvnode(mp);
268 		} else {
269 			if (mp->mnt_syncer != NULL)
270 				vgone(mp->mnt_syncer);
271 			mp->mnt_syncer = NULL;
272 		}
273 
274 		vfs_unbusy(mp);
275 		goto fail;
276 	}
277 
278 	mp->mnt_flag &= ~MNT_OP_FLAGS;
279 	vp->v_mountedhere = mp;
280 
281 	/*
282 	 * Put the new filesystem on the mount list after root.
283 	 */
284 	cache_purge(vp);
285 	if (!error) {
286 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
287 		checkdirs(vp);
288 		vfs_unbusy(vp->v_mount);
289 		VOP_UNLOCK(vp);
290 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
291 			error = vfs_allocate_syncvnode(mp);
292 		vfs_unbusy(mp);
293 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
294 		if ((error = VFS_START(mp, 0, p)) != 0)
295 			vrele(vp);
296 	} else {
297 		mp->mnt_vnodecovered->v_mountedhere = NULL;
298 		vfs_unbusy(mp);
299 		vfs_mount_free(mp);
300 		vfs_unbusy(vp->v_mount);
301 		vput(vp);
302 	}
303 fail:
304 	if (args)
305 		free(args, M_TEMP, vfsp->vfc_datasize);
306 	return (error);
307 }
308 
309 /*
310  * Scan all active processes to see if any of them have a current
311  * or root directory onto which the new filesystem has just been
312  * mounted. If so, replace them with the new mount point, keeping
313  * track of how many were replaced.  That's the number of references
314  * the old vnode had that we've replaced, so finish by vrele()'ing
315  * it that many times.  This puts off any possible sleeping until
316  * we've finished walking the allprocess list.
317  */
318 void
319 checkdirs(struct vnode *olddp)
320 {
321 	struct filedesc *fdp;
322 	struct vnode *newdp;
323 	struct process *pr;
324 	u_int  free_count = 0;
325 
326 	if (olddp->v_usecount == 1)
327 		return;
328 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
329 		panic("mount: lost mount");
330 	LIST_FOREACH(pr, &allprocess, ps_list) {
331 		fdp = pr->ps_fd;
332 		if (fdp->fd_cdir == olddp) {
333 			free_count++;
334 			vref(newdp);
335 			fdp->fd_cdir = newdp;
336 		}
337 		if (fdp->fd_rdir == olddp) {
338 			free_count++;
339 			vref(newdp);
340 			fdp->fd_rdir = newdp;
341 		}
342 		pr->ps_uvpcwd = NULL;
343 	}
344 	if (rootvnode == olddp) {
345 		free_count++;
346 		vref(newdp);
347 		rootvnode = newdp;
348 	}
349 	while (free_count-- > 0)
350 		vrele(olddp);
351 	vput(newdp);
352 }
353 
354 /*
355  * Unmount a file system.
356  *
357  * Note: unmount takes a path to the vnode mounted on as argument,
358  * not special file (as before).
359  */
360 int
361 sys_unmount(struct proc *p, void *v, register_t *retval)
362 {
363 	struct sys_unmount_args /* {
364 		syscallarg(const char *) path;
365 		syscallarg(int) flags;
366 	} */ *uap = v;
367 	struct vnode *vp;
368 	struct mount *mp;
369 	int error;
370 	struct nameidata nd;
371 
372 	if ((error = suser(p)) != 0)
373 		return (error);
374 
375 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
376 	    SCARG(uap, path), p);
377 	if ((error = namei(&nd)) != 0)
378 		return (error);
379 	vp = nd.ni_vp;
380 	mp = vp->v_mount;
381 
382 	/*
383 	 * Don't allow unmounting the root file system.
384 	 */
385 	if (mp->mnt_flag & MNT_ROOTFS) {
386 		vput(vp);
387 		return (EINVAL);
388 	}
389 
390 	/*
391 	 * Must be the root of the filesystem
392 	 */
393 	if ((vp->v_flag & VROOT) == 0) {
394 		vput(vp);
395 		return (EINVAL);
396 	}
397 	vput(vp);
398 
399 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
400 		return (EBUSY);
401 
402 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
403 }
404 
405 /*
406  * Do the actual file system unmount.
407  */
408 int
409 dounmount(struct mount *mp, int flags, struct proc *p)
410 {
411 	SLIST_HEAD(, mount) mplist;
412 	struct mount *nmp;
413 	int error;
414 
415 	SLIST_INIT(&mplist);
416 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
417 
418 	/*
419 	 * Collect nested mount points. This takes advantage of the mount list
420 	 * being ordered - nested mount points come after their parent.
421 	 */
422 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
423 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
424 			if (mp->mnt_vnodecovered == NULLVP ||
425 			    mp->mnt_vnodecovered->v_mount != nmp)
426 				continue;
427 
428 			if ((flags & MNT_FORCE) == 0) {
429 				error = EBUSY;
430 				goto err;
431 			}
432 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
433 			if (error) {
434 				if ((flags & MNT_DOOMED)) {
435 					/*
436 					 * If the mount point was busy due to
437 					 * being unmounted, it has been removed
438 					 * from the mount list already.
439 					 * Restart the iteration from the last
440 					 * collected busy entry.
441 					 */
442 					mp = SLIST_FIRST(&mplist);
443 					break;
444 				}
445 				goto err;
446 			}
447 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
448 			break;
449 		}
450 	}
451 
452 	/*
453 	 * Nested mount points cannot appear during this loop as mounting
454 	 * requires a read lock for the parent mount point.
455 	 */
456 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
457 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
458 		error = dounmount_leaf(mp, flags, p);
459 		if (error)
460 			goto err;
461 	}
462 	return (0);
463 
464 err:
465 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
466 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
467 		vfs_unbusy(mp);
468 	}
469 	return (error);
470 }
471 
472 int
473 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
474 {
475 	struct vnode *coveredvp;
476 	struct vnode *vp, *nvp;
477 	int error;
478 	int hadsyncer = 0;
479 
480 	mp->mnt_flag &=~ MNT_ASYNC;
481 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
482 	if (mp->mnt_syncer != NULL) {
483 		hadsyncer = 1;
484 		vgone(mp->mnt_syncer);
485 		mp->mnt_syncer = NULL;
486 	}
487 
488 	/*
489 	 * Before calling file system unmount, make sure
490 	 * all unveils to vnodes in here are dropped.
491 	 */
492 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
493 		unveil_removevnode(vp);
494 	}
495 
496 	if (((mp->mnt_flag & MNT_RDONLY) ||
497 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
498 	    (flags & MNT_FORCE))
499 		error = VFS_UNMOUNT(mp, flags, p);
500 
501 	if (error && !(flags & MNT_DOOMED)) {
502 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
503 			(void) vfs_allocate_syncvnode(mp);
504 		vfs_unbusy(mp);
505 		return (error);
506 	}
507 
508 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
509 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
510 		coveredvp->v_mountedhere = NULL;
511 		vrele(coveredvp);
512 	}
513 
514 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
515 		panic("unmount: dangling vnode");
516 
517 	vfs_unbusy(mp);
518 	vfs_mount_free(mp);
519 
520 	return (0);
521 }
522 
523 /*
524  * Sync each mounted filesystem.
525  */
526 int
527 sys_sync(struct proc *p, void *v, register_t *retval)
528 {
529 	struct mount *mp;
530 	int asyncflag;
531 
532 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
533 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
534 			continue;
535 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
536 			asyncflag = mp->mnt_flag & MNT_ASYNC;
537 			mp->mnt_flag &= ~MNT_ASYNC;
538 			uvm_vnp_sync(mp);
539 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
540 			if (asyncflag)
541 				mp->mnt_flag |= MNT_ASYNC;
542 		}
543 		vfs_unbusy(mp);
544 	}
545 
546 	return (0);
547 }
548 
549 /*
550  * Change filesystem quotas.
551  */
552 int
553 sys_quotactl(struct proc *p, void *v, register_t *retval)
554 {
555 	struct sys_quotactl_args /* {
556 		syscallarg(const char *) path;
557 		syscallarg(int) cmd;
558 		syscallarg(int) uid;
559 		syscallarg(char *) arg;
560 	} */ *uap = v;
561 	struct mount *mp;
562 	int error;
563 	struct nameidata nd;
564 
565 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
566 	if ((error = namei(&nd)) != 0)
567 		return (error);
568 	mp = nd.ni_vp->v_mount;
569 	vrele(nd.ni_vp);
570 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
571 	    SCARG(uap, arg), p));
572 }
573 
574 int
575 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
576 {
577 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
578 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
579 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
580 	char *s, *d;
581 	int error;
582 
583 	/* Don't let non-root see filesystem id (for NFS security) */
584 	if (suser(p)) {
585 		fsid_t fsid;
586 
587 		s = (char *)sp;
588 		d = (char *)uaddr;
589 
590 		memset(&fsid, 0, sizeof(fsid));
591 
592 		if ((error = copyout(s, d, co_sz1)) != 0)
593 			return (error);
594 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
595 			return (error);
596 		return (copyout(s + co_off2, d + co_off2, co_sz2));
597 	}
598 
599 	return (copyout(sp, uaddr, sizeof(*sp)));
600 }
601 
602 /*
603  * Get filesystem statistics.
604  */
605 int
606 sys_statfs(struct proc *p, void *v, register_t *retval)
607 {
608 	struct sys_statfs_args /* {
609 		syscallarg(const char *) path;
610 		syscallarg(struct statfs *) buf;
611 	} */ *uap = v;
612 	struct mount *mp;
613 	struct statfs *sp;
614 	int error;
615 	struct nameidata nd;
616 
617 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
618 	    SCARG(uap, path), p);
619 	nd.ni_pledge = PLEDGE_RPATH;
620 	nd.ni_unveil = UNVEIL_READ;
621 	if ((error = namei(&nd)) != 0)
622 		return (error);
623 	mp = nd.ni_vp->v_mount;
624 	sp = &mp->mnt_stat;
625 	vrele(nd.ni_vp);
626 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
627 		return (error);
628 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
629 
630 	return (copyout_statfs(sp, SCARG(uap, buf), p));
631 }
632 
633 /*
634  * Get filesystem statistics.
635  */
636 int
637 sys_fstatfs(struct proc *p, void *v, register_t *retval)
638 {
639 	struct sys_fstatfs_args /* {
640 		syscallarg(int) fd;
641 		syscallarg(struct statfs *) buf;
642 	} */ *uap = v;
643 	struct file *fp;
644 	struct mount *mp;
645 	struct statfs *sp;
646 	int error;
647 
648 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
649 		return (error);
650 	mp = ((struct vnode *)fp->f_data)->v_mount;
651 	if (!mp) {
652 		FRELE(fp, p);
653 		return (ENOENT);
654 	}
655 	sp = &mp->mnt_stat;
656 	error = VFS_STATFS(mp, sp, p);
657 	FRELE(fp, p);
658 	if (error)
659 		return (error);
660 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
661 
662 	return (copyout_statfs(sp, SCARG(uap, buf), p));
663 }
664 
665 /*
666  * Get statistics on all filesystems.
667  */
668 int
669 sys_getfsstat(struct proc *p, void *v, register_t *retval)
670 {
671 	struct sys_getfsstat_args /* {
672 		syscallarg(struct statfs *) buf;
673 		syscallarg(size_t) bufsize;
674 		syscallarg(int) flags;
675 	} */ *uap = v;
676 	struct mount *mp;
677 	struct statfs *sp;
678 	struct statfs *sfsp;
679 	size_t count, maxcount;
680 	int error, flags = SCARG(uap, flags);
681 
682 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
683 	sfsp = SCARG(uap, buf);
684 	count = 0;
685 
686 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
687 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
688 			continue;
689 		if (sfsp && count < maxcount) {
690 			sp = &mp->mnt_stat;
691 
692 			/* Refresh stats unless MNT_NOWAIT is specified */
693 			if (flags != MNT_NOWAIT &&
694 			    flags != MNT_LAZY &&
695 			    (flags == MNT_WAIT ||
696 			    flags == 0) &&
697 			    (error = VFS_STATFS(mp, sp, p))) {
698 				vfs_unbusy(mp);
699 				continue;
700 			}
701 
702 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
703 #if notyet
704 			if (mp->mnt_flag & MNT_SOFTDEP)
705 				sp->f_eflags = STATFS_SOFTUPD;
706 #endif
707 			error = (copyout_statfs(sp, sfsp, p));
708 			if (error) {
709 				vfs_unbusy(mp);
710 				return (error);
711 			}
712 			sfsp++;
713 		}
714 		count++;
715 		vfs_unbusy(mp);
716 	}
717 
718 	if (sfsp && count > maxcount)
719 		*retval = maxcount;
720 	else
721 		*retval = count;
722 
723 	return (0);
724 }
725 
726 /*
727  * Change current working directory to a given file descriptor.
728  */
729 int
730 sys_fchdir(struct proc *p, void *v, register_t *retval)
731 {
732 	struct sys_fchdir_args /* {
733 		syscallarg(int) fd;
734 	} */ *uap = v;
735 	struct filedesc *fdp = p->p_fd;
736 	struct vnode *vp, *tdp, *old_cdir;
737 	struct mount *mp;
738 	struct file *fp;
739 	int error;
740 
741 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
742 		return (EBADF);
743 	vp = fp->f_data;
744 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
745 		FRELE(fp, p);
746 		return (ENOTDIR);
747 	}
748 	vref(vp);
749 	FRELE(fp, p);
750 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
751 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
752 
753 	while (!error && (mp = vp->v_mountedhere) != NULL) {
754 		if (vfs_busy(mp, VB_READ|VB_WAIT))
755 			continue;
756 		error = VFS_ROOT(mp, &tdp);
757 		vfs_unbusy(mp);
758 		if (error)
759 			break;
760 		vput(vp);
761 		vp = tdp;
762 	}
763 	if (error) {
764 		vput(vp);
765 		return (error);
766 	}
767 	VOP_UNLOCK(vp);
768 	old_cdir = fdp->fd_cdir;
769 	fdp->fd_cdir = vp;
770 	vrele(old_cdir);
771 	return (0);
772 }
773 
774 /*
775  * Change current working directory (``.'').
776  */
777 int
778 sys_chdir(struct proc *p, void *v, register_t *retval)
779 {
780 	struct sys_chdir_args /* {
781 		syscallarg(const char *) path;
782 	} */ *uap = v;
783 	struct filedesc *fdp = p->p_fd;
784 	struct vnode *old_cdir;
785 	int error;
786 	struct nameidata nd;
787 
788 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
789 	    SCARG(uap, path), p);
790 	nd.ni_pledge = PLEDGE_RPATH;
791 	nd.ni_unveil = UNVEIL_READ;
792 	if ((error = change_dir(&nd, p)) != 0)
793 		return (error);
794 	p->p_p->ps_uvpcwd = nd.ni_unveil_match;
795 	old_cdir = fdp->fd_cdir;
796 	fdp->fd_cdir = nd.ni_vp;
797 	vrele(old_cdir);
798 	return (0);
799 }
800 
801 /*
802  * Change notion of root (``/'') directory.
803  */
804 int
805 sys_chroot(struct proc *p, void *v, register_t *retval)
806 {
807 	struct sys_chroot_args /* {
808 		syscallarg(const char *) path;
809 	} */ *uap = v;
810 	struct filedesc *fdp = p->p_fd;
811 	struct vnode *old_cdir, *old_rdir;
812 	int error;
813 	struct nameidata nd;
814 
815 	if ((error = suser(p)) != 0)
816 		return (error);
817 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
818 	    SCARG(uap, path), p);
819 	if ((error = change_dir(&nd, p)) != 0)
820 		return (error);
821 	if (fdp->fd_rdir != NULL) {
822 		/*
823 		 * A chroot() done inside a changed root environment does
824 		 * an automatic chdir to avoid the out-of-tree experience.
825 		 */
826 		vref(nd.ni_vp);
827 		old_rdir = fdp->fd_rdir;
828 		old_cdir = fdp->fd_cdir;
829 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
830 		vrele(old_rdir);
831 		vrele(old_cdir);
832 	} else
833 		fdp->fd_rdir = nd.ni_vp;
834 	return (0);
835 }
836 
837 /*
838  * Common routine for chroot and chdir.
839  */
840 static int
841 change_dir(struct nameidata *ndp, struct proc *p)
842 {
843 	struct vnode *vp;
844 	int error;
845 
846 	if ((error = namei(ndp)) != 0)
847 		return (error);
848 	vp = ndp->ni_vp;
849 	if (vp->v_type != VDIR)
850 		error = ENOTDIR;
851 	else
852 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
853 	if (error)
854 		vput(vp);
855 	else
856 		VOP_UNLOCK(vp);
857 	return (error);
858 }
859 
860 int
861 sys___realpath(struct proc *p, void *v, register_t *retval)
862 {
863 	struct sys___realpath_args /* {
864 		syscallarg(const char *) pathname;
865 		syscallarg(char *) resolved;
866 	} */ *uap = v;
867 	char *pathname, *c;
868 	char *rpbuf;
869 	struct nameidata nd;
870 	size_t pathlen;
871 	int error = 0;
872 
873 	if (SCARG(uap, pathname) == NULL)
874 		return (EINVAL);
875 
876 	pathname = pool_get(&namei_pool, PR_WAITOK);
877 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
878 
879 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
880 	    &pathlen)))
881 		goto end;
882 
883 	if (pathlen == 1) { /* empty string "" */
884 		error = ENOENT;
885 		goto end;
886 	}
887 	if (pathlen < 2) {
888 		error = EINVAL;
889 		goto end;
890 	}
891 
892 	/* Get cwd for relative path if needed, prepend to rpbuf */
893 	rpbuf[0] = '\0';
894 	if (pathname[0] != '/') {
895 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
896 		char *cwdbuf, *bp;
897 
898 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
899 
900 		/* vfs_getcwd_common fills this in backwards */
901 		bp = &cwdbuf[cwdlen - 1];
902 		*bp = '\0';
903 
904 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
905 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
906 
907 		if (error) {
908 			free(cwdbuf, M_TEMP, cwdlen);
909 			goto end;
910 		}
911 
912 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
913 			free(cwdbuf, M_TEMP, cwdlen);
914 			error = ENAMETOOLONG;
915 			goto end;
916 		}
917 
918 		free(cwdbuf, M_TEMP, cwdlen);
919 	}
920 
921 	/* find root "/" or "//" */
922 	for (c = pathname; *c != '\0'; c++) {
923 		if (*c != '/')
924 			break;
925 	}
926 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME | REALPATH,
927 	    UIO_SYSSPACE, pathname, p);
928 
929 	nd.ni_cnd.cn_rpbuf = rpbuf;
930 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
931 
932 	nd.ni_pledge = PLEDGE_RPATH;
933 	nd.ni_unveil = UNVEIL_READ;
934 	if ((error = namei(&nd)) != 0)
935 		goto end;
936 
937 	/* release lock and reference from namei */
938 	if (nd.ni_vp) {
939 		VOP_UNLOCK(nd.ni_vp);
940 		vrele(nd.ni_vp);
941 	}
942 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
943 	    MAXPATHLEN, NULL);
944 
945 #ifdef KTRACE
946 	if (KTRPOINT(p, KTR_NAMEI))
947 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
948 #endif
949 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
950 end:
951 	pool_put(&namei_pool, rpbuf);
952 	pool_put(&namei_pool, pathname);
953 	return (error);
954 }
955 
956 int
957 sys_unveil(struct proc *p, void *v, register_t *retval)
958 {
959 	struct sys_unveil_args /* {
960 		syscallarg(const char *) path;
961 		syscallarg(const char *) permissions;
962 	} */ *uap = v;
963 	struct process *pr = p->p_p;
964 	char *pathname, *c;
965 	struct nameidata nd;
966 	size_t pathlen;
967 	char permissions[5];
968 	int error, allow;
969 
970 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
971 		pr->ps_uvdone = 1;
972 		return (0);
973 	}
974 
975 	if (pr->ps_uvdone != 0)
976 		return EPERM;
977 
978 	error = copyinstr(SCARG(uap, permissions), permissions,
979 	    sizeof(permissions), NULL);
980 	if (error)
981 		return (error);
982 	pathname = pool_get(&namei_pool, PR_WAITOK);
983 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
984 	if (error)
985 		goto end;
986 
987 #ifdef KTRACE
988 	if (KTRPOINT(p, KTR_STRUCT))
989 		ktrstruct(p, "unveil", permissions, strlen(permissions));
990 #endif
991 	if (pathlen < 2) {
992 		error = EINVAL;
993 		goto end;
994 	}
995 
996 	/* find root "/" or "//" */
997 	for (c = pathname; *c != '\0'; c++) {
998 		if (*c != '/')
999 			break;
1000 	}
1001 	if (*c == '\0')
1002 		/* root directory */
1003 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
1004 		    UIO_SYSSPACE, pathname, p);
1005 	else
1006 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
1007 		    UIO_SYSSPACE, pathname, p);
1008 
1009 	nd.ni_pledge = PLEDGE_UNVEIL;
1010 	if ((error = namei(&nd)) != 0)
1011 		goto ndfree;
1012 
1013 	/*
1014 	 * XXX Any access to the file or directory will allow us to
1015 	 * pledge path it
1016 	 */
1017 	allow = ((nd.ni_vp &&
1018 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1019 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1020 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1021 	    (nd.ni_dvp &&
1022 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1023 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1024 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1025 
1026 	/* release lock from namei, but keep ref */
1027 	if (nd.ni_vp)
1028 		VOP_UNLOCK(nd.ni_vp);
1029 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1030 		VOP_UNLOCK(nd.ni_dvp);
1031 
1032 	if (allow)
1033 		error = unveil_add(p, &nd, permissions);
1034 	else
1035 		error = EPERM;
1036 
1037 	/* release vref from namei, but not vref from unveil_add */
1038 	if (nd.ni_vp)
1039 		vrele(nd.ni_vp);
1040 	if (nd.ni_dvp)
1041 		vrele(nd.ni_dvp);
1042 
1043 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1044 ndfree:
1045 	unveil_free_traversed_vnodes(&nd);
1046 end:
1047 	pool_put(&namei_pool, pathname);
1048 
1049 	return (error);
1050 }
1051 
1052 /*
1053  * Check permissions, allocate an open file structure,
1054  * and call the device open routine if any.
1055  */
1056 int
1057 sys_open(struct proc *p, void *v, register_t *retval)
1058 {
1059 	struct sys_open_args /* {
1060 		syscallarg(const char *) path;
1061 		syscallarg(int) flags;
1062 		syscallarg(mode_t) mode;
1063 	} */ *uap = v;
1064 
1065 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1066 	    SCARG(uap, mode), retval));
1067 }
1068 
1069 int
1070 sys_openat(struct proc *p, void *v, register_t *retval)
1071 {
1072 	struct sys_openat_args /* {
1073 		syscallarg(int) fd;
1074 		syscallarg(const char *) path;
1075 		syscallarg(int) flags;
1076 		syscallarg(mode_t) mode;
1077 	} */ *uap = v;
1078 
1079 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1080 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1081 }
1082 
1083 int
1084 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1085     register_t *retval)
1086 {
1087 	struct filedesc *fdp = p->p_fd;
1088 	struct file *fp;
1089 	struct vnode *vp;
1090 	struct vattr vattr;
1091 	int flags, cloexec, cmode;
1092 	int type, indx, error, localtrunc = 0;
1093 	struct flock lf;
1094 	struct nameidata nd;
1095 	uint64_t ni_pledge = 0;
1096 	u_char ni_unveil = 0;
1097 
1098 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1099 		error = pledge_flock(p);
1100 		if (error != 0)
1101 			return (error);
1102 	}
1103 
1104 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1105 
1106 	fdplock(fdp);
1107 	if ((error = falloc(p, &fp, &indx)) != 0) {
1108 		fdpunlock(fdp);
1109 		return (error);
1110 	}
1111 	fdpunlock(fdp);
1112 
1113 	flags = FFLAGS(oflags);
1114 	if (flags & FREAD) {
1115 		ni_pledge |= PLEDGE_RPATH;
1116 		ni_unveil |= UNVEIL_READ;
1117 	}
1118 	if (flags & FWRITE) {
1119 		ni_pledge |= PLEDGE_WPATH;
1120 		ni_unveil |= UNVEIL_WRITE;
1121 	}
1122 	if (oflags & O_CREAT) {
1123 		ni_pledge |= PLEDGE_CPATH;
1124 		ni_unveil |= UNVEIL_CREATE;
1125 	}
1126 
1127 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1128 	if ((p->p_p->ps_flags & PS_PLEDGE))
1129 		cmode &= ACCESSPERMS;
1130 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1131 	nd.ni_pledge = ni_pledge;
1132 	nd.ni_unveil = ni_unveil;
1133 	p->p_dupfd = -1;			/* XXX check for fdopen */
1134 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1135 		localtrunc = 1;
1136 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1137 	}
1138 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1139 		fdplock(fdp);
1140 		if (error == ENODEV &&
1141 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1142 		    (error =
1143 			dupfdopen(p, indx, flags)) == 0) {
1144 			fdpunlock(fdp);
1145 			closef(fp, p);
1146 			*retval = indx;
1147 			return (error);
1148 		}
1149 		if (error == ERESTART)
1150 			error = EINTR;
1151 		fdremove(fdp, indx);
1152 		fdpunlock(fdp);
1153 		closef(fp, p);
1154 		return (error);
1155 	}
1156 	p->p_dupfd = 0;
1157 	vp = nd.ni_vp;
1158 	fp->f_flag = flags & FMASK;
1159 	fp->f_type = DTYPE_VNODE;
1160 	fp->f_ops = &vnops;
1161 	fp->f_data = vp;
1162 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1163 		lf.l_whence = SEEK_SET;
1164 		lf.l_start = 0;
1165 		lf.l_len = 0;
1166 		if (flags & O_EXLOCK)
1167 			lf.l_type = F_WRLCK;
1168 		else
1169 			lf.l_type = F_RDLCK;
1170 		type = F_FLOCK;
1171 		if ((flags & FNONBLOCK) == 0)
1172 			type |= F_WAIT;
1173 		VOP_UNLOCK(vp);
1174 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1175 		if (error) {
1176 			fdplock(fdp);
1177 			/* closef will vn_close the file for us. */
1178 			fdremove(fdp, indx);
1179 			fdpunlock(fdp);
1180 			closef(fp, p);
1181 			return (error);
1182 		}
1183 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1184 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1185 	}
1186 	if (localtrunc) {
1187 		if ((fp->f_flag & FWRITE) == 0)
1188 			error = EACCES;
1189 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1190 			error = EROFS;
1191 		else if (vp->v_type == VDIR)
1192 			error = EISDIR;
1193 		else if ((error = vn_writechk(vp)) == 0) {
1194 			VATTR_NULL(&vattr);
1195 			vattr.va_size = 0;
1196 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1197 		}
1198 		if (error) {
1199 			VOP_UNLOCK(vp);
1200 			fdplock(fdp);
1201 			/* closef will close the file for us. */
1202 			fdremove(fdp, indx);
1203 			fdpunlock(fdp);
1204 			closef(fp, p);
1205 			return (error);
1206 		}
1207 	}
1208 	VOP_UNLOCK(vp);
1209 	*retval = indx;
1210 	fdplock(fdp);
1211 	fdinsert(fdp, indx, cloexec, fp);
1212 	fdpunlock(fdp);
1213 	FRELE(fp, p);
1214 	return (error);
1215 }
1216 
1217 /*
1218  * Open a new created file (in /tmp) suitable for mmaping.
1219  */
1220 int
1221 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1222 {
1223 	struct sys___tmpfd_args /* {
1224 		syscallarg(int) flags;
1225 	} */ *uap = v;
1226 	struct filedesc *fdp = p->p_fd;
1227 	struct file *fp;
1228 	struct vnode *vp;
1229 	int oflags = SCARG(uap, flags);
1230 	int flags, cloexec, cmode;
1231 	int indx, error;
1232 	unsigned int i;
1233 	struct nameidata nd;
1234 	char path[64];
1235 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1236 
1237 	/* most flags are hardwired */
1238 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1239 
1240 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1241 
1242 	fdplock(fdp);
1243 	if ((error = falloc(p, &fp, &indx)) != 0) {
1244 		fdpunlock(fdp);
1245 		return (error);
1246 	}
1247 	fdpunlock(fdp);
1248 
1249 	flags = FFLAGS(oflags);
1250 
1251 	arc4random_buf(path, sizeof(path));
1252 	memcpy(path, "/tmp/", 5);
1253 	for (i = 5; i < sizeof(path) - 1; i++)
1254 		path[i] = letters[(unsigned char)path[i] & 63];
1255 	path[sizeof(path)-1] = 0;
1256 
1257 	cmode = 0600;
1258 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1259 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1260 		if (error == ERESTART)
1261 			error = EINTR;
1262 		fdplock(fdp);
1263 		fdremove(fdp, indx);
1264 		fdpunlock(fdp);
1265 		closef(fp, p);
1266 		return (error);
1267 	}
1268 	vp = nd.ni_vp;
1269 	fp->f_flag = flags & FMASK;
1270 	fp->f_type = DTYPE_VNODE;
1271 	fp->f_ops = &vnops;
1272 	fp->f_data = vp;
1273 	VOP_UNLOCK(vp);
1274 	*retval = indx;
1275 	fdplock(fdp);
1276 	fdinsert(fdp, indx, cloexec, fp);
1277 	fdpunlock(fdp);
1278 	FRELE(fp, p);
1279 
1280 	/* unlink it */
1281 	/* XXX
1282 	 * there is a wee race here, although it is mostly inconsequential.
1283 	 * perhaps someday we can create a file like object without a name...
1284 	 */
1285 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1286 	    AT_FDCWD, path, p);
1287 	if ((error = namei(&nd)) != 0) {
1288 		printf("can't unlink temp file! %d\n", error);
1289 		error = 0;
1290 	} else {
1291 		vp = nd.ni_vp;
1292 		uvm_vnp_uncache(vp);
1293 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1294 		if (error) {
1295 			printf("error removing vop: %d\n", error);
1296 			error = 0;
1297 		}
1298 	}
1299 
1300 	return (error);
1301 }
1302 
1303 /*
1304  * Get file handle system call
1305  */
1306 int
1307 sys_getfh(struct proc *p, void *v, register_t *retval)
1308 {
1309 	struct sys_getfh_args /* {
1310 		syscallarg(const char *) fname;
1311 		syscallarg(fhandle_t *) fhp;
1312 	} */ *uap = v;
1313 	struct vnode *vp;
1314 	fhandle_t fh;
1315 	int error;
1316 	struct nameidata nd;
1317 
1318 	/*
1319 	 * Must be super user
1320 	 */
1321 	error = suser(p);
1322 	if (error)
1323 		return (error);
1324 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1325 	    SCARG(uap, fname), p);
1326 	error = namei(&nd);
1327 	if (error)
1328 		return (error);
1329 	vp = nd.ni_vp;
1330 	memset(&fh, 0, sizeof(fh));
1331 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1332 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1333 	vput(vp);
1334 	if (error)
1335 		return (error);
1336 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1337 	return (error);
1338 }
1339 
1340 /*
1341  * Open a file given a file handle.
1342  *
1343  * Check permissions, allocate an open file structure,
1344  * and call the device open routine if any.
1345  */
1346 int
1347 sys_fhopen(struct proc *p, void *v, register_t *retval)
1348 {
1349 	struct sys_fhopen_args /* {
1350 		syscallarg(const fhandle_t *) fhp;
1351 		syscallarg(int) flags;
1352 	} */ *uap = v;
1353 	struct filedesc *fdp = p->p_fd;
1354 	struct file *fp;
1355 	struct vnode *vp = NULL;
1356 	struct mount *mp;
1357 	struct ucred *cred = p->p_ucred;
1358 	int flags, cloexec;
1359 	int type, indx, error=0;
1360 	struct flock lf;
1361 	struct vattr va;
1362 	fhandle_t fh;
1363 
1364 	/*
1365 	 * Must be super user
1366 	 */
1367 	if ((error = suser(p)))
1368 		return (error);
1369 
1370 	flags = FFLAGS(SCARG(uap, flags));
1371 	if ((flags & (FREAD | FWRITE)) == 0)
1372 		return (EINVAL);
1373 	if ((flags & O_CREAT))
1374 		return (EINVAL);
1375 
1376 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1377 
1378 	fdplock(fdp);
1379 	if ((error = falloc(p, &fp, &indx)) != 0) {
1380 		fdpunlock(fdp);
1381 		fp = NULL;
1382 		goto bad;
1383 	}
1384 	fdpunlock(fdp);
1385 
1386 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1387 		goto bad;
1388 
1389 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1390 		error = ESTALE;
1391 		goto bad;
1392 	}
1393 
1394 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1395 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1396 		goto bad;
1397 	}
1398 
1399 	/* Now do an effective vn_open */
1400 
1401 	if (vp->v_type == VSOCK) {
1402 		error = EOPNOTSUPP;
1403 		goto bad;
1404 	}
1405 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1406 		error = ENOTDIR;
1407 		goto bad;
1408 	}
1409 	if (flags & FREAD) {
1410 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1411 			goto bad;
1412 	}
1413 	if (flags & (FWRITE | O_TRUNC)) {
1414 		if (vp->v_type == VDIR) {
1415 			error = EISDIR;
1416 			goto bad;
1417 		}
1418 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1419 		    (error = vn_writechk(vp)) != 0)
1420 			goto bad;
1421 	}
1422 	if (flags & O_TRUNC) {
1423 		VATTR_NULL(&va);
1424 		va.va_size = 0;
1425 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1426 			goto bad;
1427 	}
1428 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1429 		goto bad;
1430 	if (flags & FWRITE)
1431 		vp->v_writecount++;
1432 
1433 	/* done with modified vn_open, now finish what sys_open does. */
1434 
1435 	fp->f_flag = flags & FMASK;
1436 	fp->f_type = DTYPE_VNODE;
1437 	fp->f_ops = &vnops;
1438 	fp->f_data = vp;
1439 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1440 		lf.l_whence = SEEK_SET;
1441 		lf.l_start = 0;
1442 		lf.l_len = 0;
1443 		if (flags & O_EXLOCK)
1444 			lf.l_type = F_WRLCK;
1445 		else
1446 			lf.l_type = F_RDLCK;
1447 		type = F_FLOCK;
1448 		if ((flags & FNONBLOCK) == 0)
1449 			type |= F_WAIT;
1450 		VOP_UNLOCK(vp);
1451 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1452 		if (error) {
1453 			vp = NULL;	/* closef will vn_close the file */
1454 			goto bad;
1455 		}
1456 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1457 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1458 	}
1459 	VOP_UNLOCK(vp);
1460 	*retval = indx;
1461 	fdplock(fdp);
1462 	fdinsert(fdp, indx, cloexec, fp);
1463 	fdpunlock(fdp);
1464 	FRELE(fp, p);
1465 	return (0);
1466 
1467 bad:
1468 	if (fp) {
1469 		fdplock(fdp);
1470 		fdremove(fdp, indx);
1471 		fdpunlock(fdp);
1472 		closef(fp, p);
1473 		if (vp != NULL)
1474 			vput(vp);
1475 	}
1476 	return (error);
1477 }
1478 
1479 int
1480 sys_fhstat(struct proc *p, void *v, register_t *retval)
1481 {
1482 	struct sys_fhstat_args /* {
1483 		syscallarg(const fhandle_t *) fhp;
1484 		syscallarg(struct stat *) sb;
1485 	} */ *uap = v;
1486 	struct stat sb;
1487 	int error;
1488 	fhandle_t fh;
1489 	struct mount *mp;
1490 	struct vnode *vp;
1491 
1492 	/*
1493 	 * Must be super user
1494 	 */
1495 	if ((error = suser(p)))
1496 		return (error);
1497 
1498 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1499 		return (error);
1500 
1501 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1502 		return (ESTALE);
1503 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1504 		return (error);
1505 	error = vn_stat(vp, &sb, p);
1506 	vput(vp);
1507 	if (error)
1508 		return (error);
1509 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1510 	return (error);
1511 }
1512 
1513 int
1514 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1515 {
1516 	struct sys_fhstatfs_args /* {
1517 		syscallarg(const fhandle_t *) fhp;
1518 		syscallarg(struct statfs *) buf;
1519 	} */ *uap = v;
1520 	struct statfs *sp;
1521 	fhandle_t fh;
1522 	struct mount *mp;
1523 	struct vnode *vp;
1524 	int error;
1525 
1526 	/*
1527 	 * Must be super user
1528 	 */
1529 	if ((error = suser(p)))
1530 		return (error);
1531 
1532 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1533 		return (error);
1534 
1535 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1536 		return (ESTALE);
1537 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1538 		return (error);
1539 	mp = vp->v_mount;
1540 	sp = &mp->mnt_stat;
1541 	vput(vp);
1542 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1543 		return (error);
1544 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1545 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1546 }
1547 
1548 /*
1549  * Create a special file or named pipe.
1550  */
1551 int
1552 sys_mknod(struct proc *p, void *v, register_t *retval)
1553 {
1554 	struct sys_mknod_args /* {
1555 		syscallarg(const char *) path;
1556 		syscallarg(mode_t) mode;
1557 		syscallarg(int) dev;
1558 	} */ *uap = v;
1559 
1560 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1561 	    SCARG(uap, dev)));
1562 }
1563 
1564 int
1565 sys_mknodat(struct proc *p, void *v, register_t *retval)
1566 {
1567 	struct sys_mknodat_args /* {
1568 		syscallarg(int) fd;
1569 		syscallarg(const char *) path;
1570 		syscallarg(mode_t) mode;
1571 		syscallarg(dev_t) dev;
1572 	} */ *uap = v;
1573 
1574 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1575 	    SCARG(uap, mode), SCARG(uap, dev)));
1576 }
1577 
1578 int
1579 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1580 {
1581 	struct vnode *vp;
1582 	struct vattr vattr;
1583 	int error;
1584 	struct nameidata nd;
1585 
1586 	if (dev == VNOVAL)
1587 		return (EINVAL);
1588 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1589 	nd.ni_pledge = PLEDGE_DPATH;
1590 	nd.ni_unveil = UNVEIL_CREATE;
1591 	if ((error = namei(&nd)) != 0)
1592 		return (error);
1593 	vp = nd.ni_vp;
1594 	if (!S_ISFIFO(mode) || dev != 0) {
1595 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1596 			goto out;
1597 		if (p->p_fd->fd_rdir) {
1598 			error = EINVAL;
1599 			goto out;
1600 		}
1601 	}
1602 	if (vp != NULL)
1603 		error = EEXIST;
1604 	else {
1605 		VATTR_NULL(&vattr);
1606 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1607 		if ((p->p_p->ps_flags & PS_PLEDGE))
1608 			vattr.va_mode &= ACCESSPERMS;
1609 		vattr.va_rdev = dev;
1610 
1611 		switch (mode & S_IFMT) {
1612 		case S_IFMT:	/* used by badsect to flag bad sectors */
1613 			vattr.va_type = VBAD;
1614 			break;
1615 		case S_IFCHR:
1616 			vattr.va_type = VCHR;
1617 			break;
1618 		case S_IFBLK:
1619 			vattr.va_type = VBLK;
1620 			break;
1621 		case S_IFIFO:
1622 #ifndef FIFO
1623 			error = EOPNOTSUPP;
1624 			break;
1625 #else
1626 			if (dev == 0) {
1627 				vattr.va_type = VFIFO;
1628 				break;
1629 			}
1630 			/* FALLTHROUGH */
1631 #endif /* FIFO */
1632 		default:
1633 			error = EINVAL;
1634 			break;
1635 		}
1636 	}
1637 out:
1638 	if (!error) {
1639 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1640 		vput(nd.ni_dvp);
1641 	} else {
1642 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1643 		if (nd.ni_dvp == vp)
1644 			vrele(nd.ni_dvp);
1645 		else
1646 			vput(nd.ni_dvp);
1647 		if (vp)
1648 			vrele(vp);
1649 	}
1650 	return (error);
1651 }
1652 
1653 /*
1654  * Create a named pipe.
1655  */
1656 int
1657 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1658 {
1659 	struct sys_mkfifo_args /* {
1660 		syscallarg(const char *) path;
1661 		syscallarg(mode_t) mode;
1662 	} */ *uap = v;
1663 
1664 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1665 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1666 }
1667 
1668 int
1669 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1670 {
1671 	struct sys_mkfifoat_args /* {
1672 		syscallarg(int) fd;
1673 		syscallarg(const char *) path;
1674 		syscallarg(mode_t) mode;
1675 	} */ *uap = v;
1676 
1677 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1678 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1679 }
1680 
1681 /*
1682  * Make a hard file link.
1683  */
1684 int
1685 sys_link(struct proc *p, void *v, register_t *retval)
1686 {
1687 	struct sys_link_args /* {
1688 		syscallarg(const char *) path;
1689 		syscallarg(const char *) link;
1690 	} */ *uap = v;
1691 
1692 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1693 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1694 }
1695 
1696 int
1697 sys_linkat(struct proc *p, void *v, register_t *retval)
1698 {
1699 	struct sys_linkat_args /* {
1700 		syscallarg(int) fd1;
1701 		syscallarg(const char *) path1;
1702 		syscallarg(int) fd2;
1703 		syscallarg(const char *) path2;
1704 		syscallarg(int) flag;
1705 	} */ *uap = v;
1706 
1707 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1708 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1709 }
1710 
1711 int
1712 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1713     const char *path2, int flag)
1714 {
1715 	struct vnode *vp;
1716 	struct nameidata nd;
1717 	int error, follow;
1718 	int flags;
1719 
1720 	if (flag & ~AT_SYMLINK_FOLLOW)
1721 		return (EINVAL);
1722 
1723 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1724 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1725 	nd.ni_pledge = PLEDGE_RPATH;
1726 	nd.ni_unveil = UNVEIL_READ;
1727 	if ((error = namei(&nd)) != 0)
1728 		return (error);
1729 	vp = nd.ni_vp;
1730 
1731 	flags = LOCKPARENT;
1732 	if (vp->v_type == VDIR) {
1733 		flags |= STRIPSLASHES;
1734 	}
1735 
1736 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1737 	nd.ni_pledge = PLEDGE_CPATH;
1738 	nd.ni_unveil = UNVEIL_CREATE;
1739 	if ((error = namei(&nd)) != 0)
1740 		goto out;
1741 	if (nd.ni_vp) {
1742 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1743 		if (nd.ni_dvp == nd.ni_vp)
1744 			vrele(nd.ni_dvp);
1745 		else
1746 			vput(nd.ni_dvp);
1747 		vrele(nd.ni_vp);
1748 		error = EEXIST;
1749 		goto out;
1750 	}
1751 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1752 out:
1753 	vrele(vp);
1754 	return (error);
1755 }
1756 
1757 /*
1758  * Make a symbolic link.
1759  */
1760 int
1761 sys_symlink(struct proc *p, void *v, register_t *retval)
1762 {
1763 	struct sys_symlink_args /* {
1764 		syscallarg(const char *) path;
1765 		syscallarg(const char *) link;
1766 	} */ *uap = v;
1767 
1768 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1769 }
1770 
1771 int
1772 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1773 {
1774 	struct sys_symlinkat_args /* {
1775 		syscallarg(const char *) path;
1776 		syscallarg(int) fd;
1777 		syscallarg(const char *) link;
1778 	} */ *uap = v;
1779 
1780 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1781 	    SCARG(uap, link)));
1782 }
1783 
1784 int
1785 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1786 {
1787 	struct vattr vattr;
1788 	char *path;
1789 	int error;
1790 	struct nameidata nd;
1791 
1792 	path = pool_get(&namei_pool, PR_WAITOK);
1793 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1794 	if (error)
1795 		goto out;
1796 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1797 	nd.ni_pledge = PLEDGE_CPATH;
1798 	nd.ni_unveil = UNVEIL_CREATE;
1799 	if ((error = namei(&nd)) != 0)
1800 		goto out;
1801 	if (nd.ni_vp) {
1802 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1803 		if (nd.ni_dvp == nd.ni_vp)
1804 			vrele(nd.ni_dvp);
1805 		else
1806 			vput(nd.ni_dvp);
1807 		vrele(nd.ni_vp);
1808 		error = EEXIST;
1809 		goto out;
1810 	}
1811 	VATTR_NULL(&vattr);
1812 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1813 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1814 out:
1815 	pool_put(&namei_pool, path);
1816 	return (error);
1817 }
1818 
1819 /*
1820  * Delete a name from the filesystem.
1821  */
1822 int
1823 sys_unlink(struct proc *p, void *v, register_t *retval)
1824 {
1825 	struct sys_unlink_args /* {
1826 		syscallarg(const char *) path;
1827 	} */ *uap = v;
1828 
1829 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1830 }
1831 
1832 int
1833 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1834 {
1835 	struct sys_unlinkat_args /* {
1836 		syscallarg(int) fd;
1837 		syscallarg(const char *) path;
1838 		syscallarg(int) flag;
1839 	} */ *uap = v;
1840 
1841 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1842 	    SCARG(uap, flag)));
1843 }
1844 
1845 int
1846 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1847 {
1848 	struct vnode *vp;
1849 	int error;
1850 	struct nameidata nd;
1851 
1852 	if (flag & ~AT_REMOVEDIR)
1853 		return (EINVAL);
1854 
1855 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1856 	    fd, path, p);
1857 	nd.ni_pledge = PLEDGE_CPATH;
1858 	nd.ni_unveil = UNVEIL_CREATE;
1859 	if ((error = namei(&nd)) != 0)
1860 		return (error);
1861 	vp = nd.ni_vp;
1862 
1863 	if (flag & AT_REMOVEDIR) {
1864 		if (vp->v_type != VDIR) {
1865 			error = ENOTDIR;
1866 			goto out;
1867 		}
1868 		/*
1869 		 * No rmdir "." please.
1870 		 */
1871 		if (nd.ni_dvp == vp) {
1872 			error = EINVAL;
1873 			goto out;
1874 		}
1875 		/*
1876 		 * A mounted on directory cannot be deleted.
1877 		 */
1878 		if (vp->v_mountedhere != NULL) {
1879 			error = EBUSY;
1880 			goto out;
1881 		}
1882 	}
1883 
1884 	/*
1885 	 * The root of a mounted filesystem cannot be deleted.
1886 	 */
1887 	if (vp->v_flag & VROOT)
1888 		error = EBUSY;
1889 out:
1890 	if (!error) {
1891 		if (flag & AT_REMOVEDIR) {
1892 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1893 		} else {
1894 			(void)uvm_vnp_uncache(vp);
1895 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1896 		}
1897 	} else {
1898 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1899 		if (nd.ni_dvp == vp)
1900 			vrele(nd.ni_dvp);
1901 		else
1902 			vput(nd.ni_dvp);
1903 		vput(vp);
1904 	}
1905 	return (error);
1906 }
1907 
1908 /*
1909  * Reposition read/write file offset.
1910  */
1911 int
1912 sys_lseek(struct proc *p, void *v, register_t *retval)
1913 {
1914 	struct sys_lseek_args /* {
1915 		syscallarg(int) fd;
1916 		syscallarg(int) pad;
1917 		syscallarg(off_t) offset;
1918 		syscallarg(int) whence;
1919 	} */ *uap = v;
1920 	struct filedesc *fdp = p->p_fd;
1921 	struct file *fp;
1922 	off_t offset;
1923 	int error;
1924 
1925 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1926 		return (EBADF);
1927 	if (fp->f_ops->fo_seek == NULL) {
1928 		error = ESPIPE;
1929 		goto bad;
1930 	}
1931 	offset = SCARG(uap, offset);
1932 
1933 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1934 	if (error)
1935 		goto bad;
1936 
1937 	*(off_t *)retval = offset;
1938 	mtx_enter(&fp->f_mtx);
1939 	fp->f_seek++;
1940 	mtx_leave(&fp->f_mtx);
1941 	error = 0;
1942  bad:
1943 	FRELE(fp, p);
1944 	return (error);
1945 }
1946 
1947 /*
1948  * Check access permissions.
1949  */
1950 int
1951 sys_access(struct proc *p, void *v, register_t *retval)
1952 {
1953 	struct sys_access_args /* {
1954 		syscallarg(const char *) path;
1955 		syscallarg(int) amode;
1956 	} */ *uap = v;
1957 
1958 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1959 	    SCARG(uap, amode), 0));
1960 }
1961 
1962 int
1963 sys_faccessat(struct proc *p, void *v, register_t *retval)
1964 {
1965 	struct sys_faccessat_args /* {
1966 		syscallarg(int) fd;
1967 		syscallarg(const char *) path;
1968 		syscallarg(int) amode;
1969 		syscallarg(int) flag;
1970 	} */ *uap = v;
1971 
1972 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1973 	    SCARG(uap, amode), SCARG(uap, flag)));
1974 }
1975 
1976 int
1977 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1978 {
1979 	struct vnode *vp;
1980 	struct ucred *newcred, *oldcred;
1981 	struct nameidata nd;
1982 	int error;
1983 
1984 	if (amode & ~(R_OK | W_OK | X_OK))
1985 		return (EINVAL);
1986 	if (flag & ~AT_EACCESS)
1987 		return (EINVAL);
1988 
1989 	newcred = NULL;
1990 	oldcred = p->p_ucred;
1991 
1992 	/*
1993 	 * If access as real ids was requested and they really differ,
1994 	 * give the thread new creds with them reset
1995 	 */
1996 	if ((flag & AT_EACCESS) == 0 &&
1997 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1998 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1999 		p->p_ucred = newcred = crdup(oldcred);
2000 		newcred->cr_uid = newcred->cr_ruid;
2001 		newcred->cr_gid = newcred->cr_rgid;
2002 	}
2003 
2004 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2005 	nd.ni_pledge = PLEDGE_RPATH;
2006 	nd.ni_unveil = UNVEIL_READ;
2007 	if ((error = namei(&nd)) != 0)
2008 		goto out;
2009 	vp = nd.ni_vp;
2010 
2011 	/* Flags == 0 means only check for existence. */
2012 	if (amode) {
2013 		int vflags = 0;
2014 
2015 		if (amode & R_OK)
2016 			vflags |= VREAD;
2017 		if (amode & W_OK)
2018 			vflags |= VWRITE;
2019 		if (amode & X_OK)
2020 			vflags |= VEXEC;
2021 
2022 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2023 		if (!error && (vflags & VWRITE))
2024 			error = vn_writechk(vp);
2025 	}
2026 	vput(vp);
2027 out:
2028 	if (newcred != NULL) {
2029 		p->p_ucred = oldcred;
2030 		crfree(newcred);
2031 	}
2032 	return (error);
2033 }
2034 
2035 /*
2036  * Get file status; this version follows links.
2037  */
2038 int
2039 sys_stat(struct proc *p, void *v, register_t *retval)
2040 {
2041 	struct sys_stat_args /* {
2042 		syscallarg(const char *) path;
2043 		syscallarg(struct stat *) ub;
2044 	} */ *uap = v;
2045 
2046 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2047 }
2048 
2049 int
2050 sys_fstatat(struct proc *p, void *v, register_t *retval)
2051 {
2052 	struct sys_fstatat_args /* {
2053 		syscallarg(int) fd;
2054 		syscallarg(const char *) path;
2055 		syscallarg(struct stat *) buf;
2056 		syscallarg(int) flag;
2057 	} */ *uap = v;
2058 
2059 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2060 	    SCARG(uap, buf), SCARG(uap, flag)));
2061 }
2062 
2063 int
2064 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2065 {
2066 	struct stat sb;
2067 	int error, follow;
2068 	struct nameidata nd;
2069 
2070 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2071 		return (EINVAL);
2072 
2073 
2074 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2075 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2076 	nd.ni_pledge = PLEDGE_RPATH;
2077 	nd.ni_unveil = UNVEIL_READ;
2078 	if ((error = namei(&nd)) != 0)
2079 		return (error);
2080 	error = vn_stat(nd.ni_vp, &sb, p);
2081 	vput(nd.ni_vp);
2082 	if (error)
2083 		return (error);
2084 	/* Don't let non-root see generation numbers (for NFS security) */
2085 	if (suser(p))
2086 		sb.st_gen = 0;
2087 	error = copyout(&sb, buf, sizeof(sb));
2088 #ifdef KTRACE
2089 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2090 		ktrstat(p, &sb);
2091 #endif
2092 	return (error);
2093 }
2094 
2095 /*
2096  * Get file status; this version does not follow links.
2097  */
2098 int
2099 sys_lstat(struct proc *p, void *v, register_t *retval)
2100 {
2101 	struct sys_lstat_args /* {
2102 		syscallarg(const char *) path;
2103 		syscallarg(struct stat *) ub;
2104 	} */ *uap = v;
2105 
2106 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2107 	    AT_SYMLINK_NOFOLLOW));
2108 }
2109 
2110 /*
2111  * Get configurable pathname variables.
2112  */
2113 int
2114 sys_pathconf(struct proc *p, void *v, register_t *retval)
2115 {
2116 	struct sys_pathconf_args /* {
2117 		syscallarg(const char *) path;
2118 		syscallarg(int) name;
2119 	} */ *uap = v;
2120 	int error;
2121 	struct nameidata nd;
2122 
2123 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2124 	    SCARG(uap, path), p);
2125 	nd.ni_pledge = PLEDGE_RPATH;
2126 	nd.ni_unveil = UNVEIL_READ;
2127 	if ((error = namei(&nd)) != 0)
2128 		return (error);
2129 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2130 	vput(nd.ni_vp);
2131 	return (error);
2132 }
2133 
2134 /*
2135  * Return target name of a symbolic link.
2136  */
2137 int
2138 sys_readlink(struct proc *p, void *v, register_t *retval)
2139 {
2140 	struct sys_readlink_args /* {
2141 		syscallarg(const char *) path;
2142 		syscallarg(char *) buf;
2143 		syscallarg(size_t) count;
2144 	} */ *uap = v;
2145 
2146 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2147 	    SCARG(uap, count), retval));
2148 }
2149 
2150 int
2151 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2152 {
2153 	struct sys_readlinkat_args /* {
2154 		syscallarg(int) fd;
2155 		syscallarg(const char *) path;
2156 		syscallarg(char *) buf;
2157 		syscallarg(size_t) count;
2158 	} */ *uap = v;
2159 
2160 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2161 	    SCARG(uap, buf), SCARG(uap, count), retval));
2162 }
2163 
2164 int
2165 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2166     size_t count, register_t *retval)
2167 {
2168 	struct vnode *vp;
2169 	struct iovec aiov;
2170 	struct uio auio;
2171 	int error;
2172 	struct nameidata nd;
2173 
2174 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2175 	nd.ni_pledge = PLEDGE_RPATH;
2176 	nd.ni_unveil = UNVEIL_READ;
2177 	if ((error = namei(&nd)) != 0)
2178 		return (error);
2179 	vp = nd.ni_vp;
2180 	if (vp->v_type != VLNK)
2181 		error = EINVAL;
2182 	else {
2183 		aiov.iov_base = buf;
2184 		aiov.iov_len = count;
2185 		auio.uio_iov = &aiov;
2186 		auio.uio_iovcnt = 1;
2187 		auio.uio_offset = 0;
2188 		auio.uio_rw = UIO_READ;
2189 		auio.uio_segflg = UIO_USERSPACE;
2190 		auio.uio_procp = p;
2191 		auio.uio_resid = count;
2192 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2193 		*retval = count - auio.uio_resid;
2194 	}
2195 	vput(vp);
2196 	return (error);
2197 }
2198 
2199 /*
2200  * Change flags of a file given a path name.
2201  */
2202 int
2203 sys_chflags(struct proc *p, void *v, register_t *retval)
2204 {
2205 	struct sys_chflags_args /* {
2206 		syscallarg(const char *) path;
2207 		syscallarg(u_int) flags;
2208 	} */ *uap = v;
2209 
2210 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2211 	    SCARG(uap, flags), 0));
2212 }
2213 
2214 int
2215 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2216 {
2217 	struct sys_chflagsat_args /* {
2218 		syscallarg(int) fd;
2219 		syscallarg(const char *) path;
2220 		syscallarg(u_int) flags;
2221 		syscallarg(int) atflags;
2222 	} */ *uap = v;
2223 
2224 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2225 	    SCARG(uap, flags), SCARG(uap, atflags)));
2226 }
2227 
2228 int
2229 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2230 {
2231 	struct nameidata nd;
2232 	int error, follow;
2233 
2234 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2235 		return (EINVAL);
2236 
2237 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2238 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2239 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2240 	nd.ni_unveil = UNVEIL_WRITE;
2241 	if ((error = namei(&nd)) != 0)
2242 		return (error);
2243 	return (dovchflags(p, nd.ni_vp, flags));
2244 }
2245 
2246 /*
2247  * Change flags of a file given a file descriptor.
2248  */
2249 int
2250 sys_fchflags(struct proc *p, void *v, register_t *retval)
2251 {
2252 	struct sys_fchflags_args /* {
2253 		syscallarg(int) fd;
2254 		syscallarg(u_int) flags;
2255 	} */ *uap = v;
2256 	struct file *fp;
2257 	struct vnode *vp;
2258 	int error;
2259 
2260 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2261 		return (error);
2262 	vp = fp->f_data;
2263 	vref(vp);
2264 	FRELE(fp, p);
2265 	return (dovchflags(p, vp, SCARG(uap, flags)));
2266 }
2267 
2268 int
2269 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2270 {
2271 	struct vattr vattr;
2272 	int error;
2273 
2274 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2275 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2276 		error = EROFS;
2277 	else if (flags == VNOVAL)
2278 		error = EINVAL;
2279 	else {
2280 		if (suser(p)) {
2281 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2282 			    != 0)
2283 				goto out;
2284 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2285 				error = EINVAL;
2286 				goto out;
2287 			}
2288 		}
2289 		VATTR_NULL(&vattr);
2290 		vattr.va_flags = flags;
2291 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2292 	}
2293 out:
2294 	vput(vp);
2295 	return (error);
2296 }
2297 
2298 /*
2299  * Change mode of a file given path name.
2300  */
2301 int
2302 sys_chmod(struct proc *p, void *v, register_t *retval)
2303 {
2304 	struct sys_chmod_args /* {
2305 		syscallarg(const char *) path;
2306 		syscallarg(mode_t) mode;
2307 	} */ *uap = v;
2308 
2309 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2310 }
2311 
2312 int
2313 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2314 {
2315 	struct sys_fchmodat_args /* {
2316 		syscallarg(int) fd;
2317 		syscallarg(const char *) path;
2318 		syscallarg(mode_t) mode;
2319 		syscallarg(int) flag;
2320 	} */ *uap = v;
2321 
2322 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2323 	    SCARG(uap, mode), SCARG(uap, flag)));
2324 }
2325 
2326 int
2327 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2328 {
2329 	struct vnode *vp;
2330 	struct vattr vattr;
2331 	int error, follow;
2332 	struct nameidata nd;
2333 
2334 	if (mode & ~(S_IFMT | ALLPERMS))
2335 		return (EINVAL);
2336 	if ((p->p_p->ps_flags & PS_PLEDGE))
2337 		mode &= ACCESSPERMS;
2338 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2339 		return (EINVAL);
2340 
2341 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2342 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2343 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2344 	nd.ni_unveil = UNVEIL_WRITE;
2345 	if ((error = namei(&nd)) != 0)
2346 		return (error);
2347 	vp = nd.ni_vp;
2348 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2349 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2350 		error = EROFS;
2351 	else {
2352 		VATTR_NULL(&vattr);
2353 		vattr.va_mode = mode & ALLPERMS;
2354 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2355 	}
2356 	vput(vp);
2357 	return (error);
2358 }
2359 
2360 /*
2361  * Change mode of a file given a file descriptor.
2362  */
2363 int
2364 sys_fchmod(struct proc *p, void *v, register_t *retval)
2365 {
2366 	struct sys_fchmod_args /* {
2367 		syscallarg(int) fd;
2368 		syscallarg(mode_t) mode;
2369 	} */ *uap = v;
2370 	struct vattr vattr;
2371 	struct vnode *vp;
2372 	struct file *fp;
2373 	mode_t mode = SCARG(uap, mode);
2374 	int error;
2375 
2376 	if (mode & ~(S_IFMT | ALLPERMS))
2377 		return (EINVAL);
2378 	if ((p->p_p->ps_flags & PS_PLEDGE))
2379 		mode &= ACCESSPERMS;
2380 
2381 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2382 		return (error);
2383 	vp = fp->f_data;
2384 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2385 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2386 		error = EROFS;
2387 	else {
2388 		VATTR_NULL(&vattr);
2389 		vattr.va_mode = mode & ALLPERMS;
2390 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2391 	}
2392 	VOP_UNLOCK(vp);
2393 	FRELE(fp, p);
2394 	return (error);
2395 }
2396 
2397 /*
2398  * Set ownership given a path name.
2399  */
2400 int
2401 sys_chown(struct proc *p, void *v, register_t *retval)
2402 {
2403 	struct sys_chown_args /* {
2404 		syscallarg(const char *) path;
2405 		syscallarg(uid_t) uid;
2406 		syscallarg(gid_t) gid;
2407 	} */ *uap = v;
2408 
2409 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2410 	    SCARG(uap, gid), 0));
2411 }
2412 
2413 int
2414 sys_fchownat(struct proc *p, void *v, register_t *retval)
2415 {
2416 	struct sys_fchownat_args /* {
2417 		syscallarg(int) fd;
2418 		syscallarg(const char *) path;
2419 		syscallarg(uid_t) uid;
2420 		syscallarg(gid_t) gid;
2421 		syscallarg(int) flag;
2422 	} */ *uap = v;
2423 
2424 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2425 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2426 }
2427 
2428 int
2429 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2430     int flag)
2431 {
2432 	struct vnode *vp;
2433 	struct vattr vattr;
2434 	int error, follow;
2435 	struct nameidata nd;
2436 	mode_t mode;
2437 
2438 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2439 		return (EINVAL);
2440 
2441 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2442 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2443 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2444 	nd.ni_unveil = UNVEIL_WRITE;
2445 	if ((error = namei(&nd)) != 0)
2446 		return (error);
2447 	vp = nd.ni_vp;
2448 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2449 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2450 		error = EROFS;
2451 	else {
2452 		if ((error = pledge_chown(p, uid, gid)))
2453 			goto out;
2454 		if ((uid != -1 || gid != -1) &&
2455 		    !vnoperm(vp) &&
2456 		    (suser(p) || suid_clear)) {
2457 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2458 			if (error)
2459 				goto out;
2460 			mode = vattr.va_mode & ~(VSUID | VSGID);
2461 			if (mode == vattr.va_mode)
2462 				mode = VNOVAL;
2463 		} else
2464 			mode = VNOVAL;
2465 		VATTR_NULL(&vattr);
2466 		vattr.va_uid = uid;
2467 		vattr.va_gid = gid;
2468 		vattr.va_mode = mode;
2469 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2470 	}
2471 out:
2472 	vput(vp);
2473 	return (error);
2474 }
2475 
2476 /*
2477  * Set ownership given a path name, without following links.
2478  */
2479 int
2480 sys_lchown(struct proc *p, void *v, register_t *retval)
2481 {
2482 	struct sys_lchown_args /* {
2483 		syscallarg(const char *) path;
2484 		syscallarg(uid_t) uid;
2485 		syscallarg(gid_t) gid;
2486 	} */ *uap = v;
2487 	struct vnode *vp;
2488 	struct vattr vattr;
2489 	int error;
2490 	struct nameidata nd;
2491 	mode_t mode;
2492 	uid_t uid = SCARG(uap, uid);
2493 	gid_t gid = SCARG(uap, gid);
2494 
2495 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2496 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2497 	nd.ni_unveil = UNVEIL_WRITE;
2498 	if ((error = namei(&nd)) != 0)
2499 		return (error);
2500 	vp = nd.ni_vp;
2501 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2502 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2503 		error = EROFS;
2504 	else {
2505 		if ((error = pledge_chown(p, uid, gid)))
2506 			goto out;
2507 		if ((uid != -1 || gid != -1) &&
2508 		    !vnoperm(vp) &&
2509 		    (suser(p) || suid_clear)) {
2510 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2511 			if (error)
2512 				goto out;
2513 			mode = vattr.va_mode & ~(VSUID | VSGID);
2514 			if (mode == vattr.va_mode)
2515 				mode = VNOVAL;
2516 		} else
2517 			mode = VNOVAL;
2518 		VATTR_NULL(&vattr);
2519 		vattr.va_uid = uid;
2520 		vattr.va_gid = gid;
2521 		vattr.va_mode = mode;
2522 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2523 	}
2524 out:
2525 	vput(vp);
2526 	return (error);
2527 }
2528 
2529 /*
2530  * Set ownership given a file descriptor.
2531  */
2532 int
2533 sys_fchown(struct proc *p, void *v, register_t *retval)
2534 {
2535 	struct sys_fchown_args /* {
2536 		syscallarg(int) fd;
2537 		syscallarg(uid_t) uid;
2538 		syscallarg(gid_t) gid;
2539 	} */ *uap = v;
2540 	struct vnode *vp;
2541 	struct vattr vattr;
2542 	int error;
2543 	struct file *fp;
2544 	mode_t mode;
2545 	uid_t uid = SCARG(uap, uid);
2546 	gid_t gid = SCARG(uap, gid);
2547 
2548 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2549 		return (error);
2550 	vp = fp->f_data;
2551 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2552 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2553 		error = EROFS;
2554 	else {
2555 		if ((error = pledge_chown(p, uid, gid)))
2556 			goto out;
2557 		if ((uid != -1 || gid != -1) &&
2558 		    !vnoperm(vp) &&
2559 		    (suser(p) || suid_clear)) {
2560 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2561 			if (error)
2562 				goto out;
2563 			mode = vattr.va_mode & ~(VSUID | VSGID);
2564 			if (mode == vattr.va_mode)
2565 				mode = VNOVAL;
2566 		} else
2567 			mode = VNOVAL;
2568 		VATTR_NULL(&vattr);
2569 		vattr.va_uid = uid;
2570 		vattr.va_gid = gid;
2571 		vattr.va_mode = mode;
2572 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2573 	}
2574 out:
2575 	VOP_UNLOCK(vp);
2576 	FRELE(fp, p);
2577 	return (error);
2578 }
2579 
2580 /*
2581  * Set the access and modification times given a path name.
2582  */
2583 int
2584 sys_utimes(struct proc *p, void *v, register_t *retval)
2585 {
2586 	struct sys_utimes_args /* {
2587 		syscallarg(const char *) path;
2588 		syscallarg(const struct timeval *) tptr;
2589 	} */ *uap = v;
2590 
2591 	struct timespec ts[2];
2592 	struct timeval tv[2];
2593 	const struct timeval *tvp;
2594 	int error;
2595 
2596 	tvp = SCARG(uap, tptr);
2597 	if (tvp != NULL) {
2598 		error = copyin(tvp, tv, sizeof(tv));
2599 		if (error)
2600 			return (error);
2601 #ifdef KTRACE
2602 		if (KTRPOINT(p, KTR_STRUCT))
2603 			ktrabstimeval(p, &tv);
2604 #endif
2605 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2606 			return (EINVAL);
2607 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2608 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2609 	} else
2610 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2611 
2612 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2613 }
2614 
2615 int
2616 sys_utimensat(struct proc *p, void *v, register_t *retval)
2617 {
2618 	struct sys_utimensat_args /* {
2619 		syscallarg(int) fd;
2620 		syscallarg(const char *) path;
2621 		syscallarg(const struct timespec *) times;
2622 		syscallarg(int) flag;
2623 	} */ *uap = v;
2624 
2625 	struct timespec ts[2];
2626 	const struct timespec *tsp;
2627 	int error, i;
2628 
2629 	tsp = SCARG(uap, times);
2630 	if (tsp != NULL) {
2631 		error = copyin(tsp, ts, sizeof(ts));
2632 		if (error)
2633 			return (error);
2634 		for (i = 0; i < nitems(ts); i++) {
2635 			if (ts[i].tv_nsec == UTIME_NOW)
2636 				continue;
2637 			if (ts[i].tv_nsec == UTIME_OMIT)
2638 				continue;
2639 #ifdef KTRACE
2640 			if (KTRPOINT(p, KTR_STRUCT))
2641 				ktrabstimespec(p, &ts[i]);
2642 #endif
2643 			if (!timespecisvalid(&ts[i]))
2644 				return (EINVAL);
2645 		}
2646 	} else
2647 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2648 
2649 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2650 	    SCARG(uap, flag)));
2651 }
2652 
2653 int
2654 doutimensat(struct proc *p, int fd, const char *path,
2655     struct timespec ts[2], int flag)
2656 {
2657 	struct vnode *vp;
2658 	int error, follow;
2659 	struct nameidata nd;
2660 
2661 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2662 		return (EINVAL);
2663 
2664 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2665 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2666 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2667 	nd.ni_unveil = UNVEIL_WRITE;
2668 	if ((error = namei(&nd)) != 0)
2669 		return (error);
2670 	vp = nd.ni_vp;
2671 
2672 	return (dovutimens(p, vp, ts));
2673 }
2674 
2675 int
2676 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2677 {
2678 	struct vattr vattr;
2679 	struct timespec now;
2680 	int error;
2681 
2682 #ifdef KTRACE
2683 	/* if they're both UTIME_NOW, then don't report either */
2684 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2685 	    KTRPOINT(p, KTR_STRUCT)) {
2686 		ktrabstimespec(p, &ts[0]);
2687 		ktrabstimespec(p, &ts[1]);
2688 	}
2689 #endif
2690 
2691 	VATTR_NULL(&vattr);
2692 
2693 	/*  make sure ctime is updated even if neither mtime nor atime is */
2694 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2695 
2696 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2697 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2698 			vattr.va_vaflags |= VA_UTIMES_NULL;
2699 
2700 		getnanotime(&now);
2701 		if (ts[0].tv_nsec == UTIME_NOW)
2702 			ts[0] = now;
2703 		if (ts[1].tv_nsec == UTIME_NOW)
2704 			ts[1] = now;
2705 	}
2706 
2707 	if (ts[0].tv_nsec != UTIME_OMIT)
2708 		vattr.va_atime = ts[0];
2709 	if (ts[1].tv_nsec != UTIME_OMIT)
2710 		vattr.va_mtime = ts[1];
2711 
2712 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2713 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2714 		error = EROFS;
2715 	else
2716 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2717 	vput(vp);
2718 	return (error);
2719 }
2720 
2721 /*
2722  * Set the access and modification times given a file descriptor.
2723  */
2724 int
2725 sys_futimes(struct proc *p, void *v, register_t *retval)
2726 {
2727 	struct sys_futimes_args /* {
2728 		syscallarg(int) fd;
2729 		syscallarg(const struct timeval *) tptr;
2730 	} */ *uap = v;
2731 	struct timeval tv[2];
2732 	struct timespec ts[2];
2733 	const struct timeval *tvp;
2734 	int error;
2735 
2736 	tvp = SCARG(uap, tptr);
2737 	if (tvp != NULL) {
2738 		error = copyin(tvp, tv, sizeof(tv));
2739 		if (error)
2740 			return (error);
2741 #ifdef KTRACE
2742 		if (KTRPOINT(p, KTR_STRUCT)) {
2743 			ktrabstimeval(p, &tv[0]);
2744 			ktrabstimeval(p, &tv[1]);
2745 		}
2746 #endif
2747 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2748 			return (EINVAL);
2749 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2750 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2751 	} else
2752 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2753 
2754 	return (dofutimens(p, SCARG(uap, fd), ts));
2755 }
2756 
2757 int
2758 sys_futimens(struct proc *p, void *v, register_t *retval)
2759 {
2760 	struct sys_futimens_args /* {
2761 		syscallarg(int) fd;
2762 		syscallarg(const struct timespec *) times;
2763 	} */ *uap = v;
2764 	struct timespec ts[2];
2765 	const struct timespec *tsp;
2766 	int error, i;
2767 
2768 	tsp = SCARG(uap, times);
2769 	if (tsp != NULL) {
2770 		error = copyin(tsp, ts, sizeof(ts));
2771 		if (error)
2772 			return (error);
2773 		for (i = 0; i < nitems(ts); i++) {
2774 			if (ts[i].tv_nsec == UTIME_NOW)
2775 				continue;
2776 			if (ts[i].tv_nsec == UTIME_OMIT)
2777 				continue;
2778 #ifdef KTRACE
2779 			if (KTRPOINT(p, KTR_STRUCT))
2780 				ktrabstimespec(p, &ts[i]);
2781 #endif
2782 			if (!timespecisvalid(&ts[i]))
2783 				return (EINVAL);
2784 		}
2785 	} else
2786 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2787 
2788 	return (dofutimens(p, SCARG(uap, fd), ts));
2789 }
2790 
2791 int
2792 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2793 {
2794 	struct file *fp;
2795 	struct vnode *vp;
2796 	int error;
2797 
2798 	if ((error = getvnode(p, fd, &fp)) != 0)
2799 		return (error);
2800 	vp = fp->f_data;
2801 	vref(vp);
2802 	FRELE(fp, p);
2803 
2804 	return (dovutimens(p, vp, ts));
2805 }
2806 
2807 /*
2808  * Truncate a file given its path name.
2809  */
2810 int
2811 sys_truncate(struct proc *p, void *v, register_t *retval)
2812 {
2813 	struct sys_truncate_args /* {
2814 		syscallarg(const char *) path;
2815 		syscallarg(int) pad;
2816 		syscallarg(off_t) length;
2817 	} */ *uap = v;
2818 	struct vnode *vp;
2819 	struct vattr vattr;
2820 	int error;
2821 	struct nameidata nd;
2822 
2823 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2824 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2825 	nd.ni_unveil = UNVEIL_WRITE;
2826 	if ((error = namei(&nd)) != 0)
2827 		return (error);
2828 	vp = nd.ni_vp;
2829 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2830 	if (vp->v_type == VDIR)
2831 		error = EISDIR;
2832 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2833 	    (error = vn_writechk(vp)) == 0) {
2834 		VATTR_NULL(&vattr);
2835 		vattr.va_size = SCARG(uap, length);
2836 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2837 	}
2838 	vput(vp);
2839 	return (error);
2840 }
2841 
2842 /*
2843  * Truncate a file given a file descriptor.
2844  */
2845 int
2846 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2847 {
2848 	struct sys_ftruncate_args /* {
2849 		syscallarg(int) fd;
2850 		syscallarg(int) pad;
2851 		syscallarg(off_t) length;
2852 	} */ *uap = v;
2853 	struct vattr vattr;
2854 	struct vnode *vp;
2855 	struct file *fp;
2856 	off_t len;
2857 	int error;
2858 
2859 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2860 		return (error);
2861 	len = SCARG(uap, length);
2862 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2863 		error = EINVAL;
2864 		goto bad;
2865 	}
2866 	vp = fp->f_data;
2867 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2868 	if (vp->v_type == VDIR)
2869 		error = EISDIR;
2870 	else if ((error = vn_writechk(vp)) == 0) {
2871 		VATTR_NULL(&vattr);
2872 		vattr.va_size = len;
2873 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2874 	}
2875 	VOP_UNLOCK(vp);
2876 bad:
2877 	FRELE(fp, p);
2878 	return (error);
2879 }
2880 
2881 /*
2882  * Sync an open file.
2883  */
2884 int
2885 sys_fsync(struct proc *p, void *v, register_t *retval)
2886 {
2887 	struct sys_fsync_args /* {
2888 		syscallarg(int) fd;
2889 	} */ *uap = v;
2890 	struct vnode *vp;
2891 	struct file *fp;
2892 	int error;
2893 
2894 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2895 		return (error);
2896 	vp = fp->f_data;
2897 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2898 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2899 #ifdef FFS_SOFTUPDATES
2900 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2901 		error = softdep_fsync(vp);
2902 #endif
2903 
2904 	VOP_UNLOCK(vp);
2905 	FRELE(fp, p);
2906 	return (error);
2907 }
2908 
2909 /*
2910  * Rename files.  Source and destination must either both be directories,
2911  * or both not be directories.  If target is a directory, it must be empty.
2912  */
2913 int
2914 sys_rename(struct proc *p, void *v, register_t *retval)
2915 {
2916 	struct sys_rename_args /* {
2917 		syscallarg(const char *) from;
2918 		syscallarg(const char *) to;
2919 	} */ *uap = v;
2920 
2921 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2922 	    SCARG(uap, to)));
2923 }
2924 
2925 int
2926 sys_renameat(struct proc *p, void *v, register_t *retval)
2927 {
2928 	struct sys_renameat_args /* {
2929 		syscallarg(int) fromfd;
2930 		syscallarg(const char *) from;
2931 		syscallarg(int) tofd;
2932 		syscallarg(const char *) to;
2933 	} */ *uap = v;
2934 
2935 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2936 	    SCARG(uap, tofd), SCARG(uap, to)));
2937 }
2938 
2939 int
2940 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2941     const char *to)
2942 {
2943 	struct vnode *tvp, *fvp, *tdvp;
2944 	struct nameidata fromnd, tond;
2945 	int error;
2946 	int flags;
2947 
2948 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2949 	    fromfd, from, p);
2950 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2951 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2952 	if ((error = namei(&fromnd)) != 0)
2953 		return (error);
2954 	fvp = fromnd.ni_vp;
2955 
2956 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2957 	/*
2958 	 * rename("foo/", "bar/");  is  OK
2959 	 */
2960 	if (fvp->v_type == VDIR)
2961 		flags |= STRIPSLASHES;
2962 
2963 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2964 	tond.ni_pledge = PLEDGE_CPATH;
2965 	tond.ni_unveil = UNVEIL_CREATE;
2966 	if ((error = namei(&tond)) != 0) {
2967 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2968 		vrele(fromnd.ni_dvp);
2969 		vrele(fvp);
2970 		goto out1;
2971 	}
2972 	tdvp = tond.ni_dvp;
2973 	tvp = tond.ni_vp;
2974 	if (tvp != NULL) {
2975 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2976 			error = ENOTDIR;
2977 			goto out;
2978 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2979 			error = EISDIR;
2980 			goto out;
2981 		}
2982 	}
2983 	if (fvp == tdvp)
2984 		error = EINVAL;
2985 	/*
2986 	 * If source is the same as the destination (that is the
2987 	 * same inode number)
2988 	 */
2989 	if (fvp == tvp)
2990 		error = -1;
2991 out:
2992 	if (!error) {
2993 		if (tvp) {
2994 			(void)uvm_vnp_uncache(tvp);
2995 		}
2996 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2997 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2998 	} else {
2999 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3000 		if (tdvp == tvp)
3001 			vrele(tdvp);
3002 		else
3003 			vput(tdvp);
3004 		if (tvp)
3005 			vput(tvp);
3006 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3007 		vrele(fromnd.ni_dvp);
3008 		vrele(fvp);
3009 	}
3010 	vrele(tond.ni_startdir);
3011 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3012 out1:
3013 	if (fromnd.ni_startdir)
3014 		vrele(fromnd.ni_startdir);
3015 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3016 	if (error == -1)
3017 		return (0);
3018 	return (error);
3019 }
3020 
3021 /*
3022  * Make a directory file.
3023  */
3024 int
3025 sys_mkdir(struct proc *p, void *v, register_t *retval)
3026 {
3027 	struct sys_mkdir_args /* {
3028 		syscallarg(const char *) path;
3029 		syscallarg(mode_t) mode;
3030 	} */ *uap = v;
3031 
3032 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3033 }
3034 
3035 int
3036 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3037 {
3038 	struct sys_mkdirat_args /* {
3039 		syscallarg(int) fd;
3040 		syscallarg(const char *) path;
3041 		syscallarg(mode_t) mode;
3042 	} */ *uap = v;
3043 
3044 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3045 	    SCARG(uap, mode)));
3046 }
3047 
3048 int
3049 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3050 {
3051 	struct vnode *vp;
3052 	struct vattr vattr;
3053 	int error;
3054 	struct nameidata nd;
3055 
3056 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3057 	    fd, path, p);
3058 	nd.ni_pledge = PLEDGE_CPATH;
3059 	nd.ni_unveil = UNVEIL_CREATE;
3060 	if ((error = namei(&nd)) != 0)
3061 		return (error);
3062 	vp = nd.ni_vp;
3063 	if (vp != NULL) {
3064 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3065 		if (nd.ni_dvp == vp)
3066 			vrele(nd.ni_dvp);
3067 		else
3068 			vput(nd.ni_dvp);
3069 		vrele(vp);
3070 		return (EEXIST);
3071 	}
3072 	VATTR_NULL(&vattr);
3073 	vattr.va_type = VDIR;
3074 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3075 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3076 	if (!error)
3077 		vput(nd.ni_vp);
3078 	return (error);
3079 }
3080 
3081 /*
3082  * Remove a directory file.
3083  */
3084 int
3085 sys_rmdir(struct proc *p, void *v, register_t *retval)
3086 {
3087 	struct sys_rmdir_args /* {
3088 		syscallarg(const char *) path;
3089 	} */ *uap = v;
3090 
3091 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3092 }
3093 
3094 /*
3095  * Read a block of directory entries in a file system independent format.
3096  */
3097 int
3098 sys_getdents(struct proc *p, void *v, register_t *retval)
3099 {
3100 	struct sys_getdents_args /* {
3101 		syscallarg(int) fd;
3102 		syscallarg(void *) buf;
3103 		syscallarg(size_t) buflen;
3104 	} */ *uap = v;
3105 	struct vnode *vp;
3106 	struct file *fp;
3107 	struct uio auio;
3108 	struct iovec aiov;
3109 	size_t buflen;
3110 	int error, eofflag;
3111 
3112 	buflen = SCARG(uap, buflen);
3113 
3114 	if (buflen > INT_MAX)
3115 		return (EINVAL);
3116 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3117 		return (error);
3118 	if ((fp->f_flag & FREAD) == 0) {
3119 		error = EBADF;
3120 		goto bad;
3121 	}
3122 	vp = fp->f_data;
3123 	if (vp->v_type != VDIR) {
3124 		error = EINVAL;
3125 		goto bad;
3126 	}
3127 
3128 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3129 
3130 	if (fp->f_offset < 0) {
3131 		VOP_UNLOCK(vp);
3132 		error = EINVAL;
3133 		goto bad;
3134 	}
3135 
3136 	aiov.iov_base = SCARG(uap, buf);
3137 	aiov.iov_len = buflen;
3138 	auio.uio_iov = &aiov;
3139 	auio.uio_iovcnt = 1;
3140 	auio.uio_rw = UIO_READ;
3141 	auio.uio_segflg = UIO_USERSPACE;
3142 	auio.uio_procp = p;
3143 	auio.uio_resid = buflen;
3144 	auio.uio_offset = fp->f_offset;
3145 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3146 	mtx_enter(&fp->f_mtx);
3147 	fp->f_offset = auio.uio_offset;
3148 	mtx_leave(&fp->f_mtx);
3149 	VOP_UNLOCK(vp);
3150 	if (error)
3151 		goto bad;
3152 	*retval = buflen - auio.uio_resid;
3153 bad:
3154 	FRELE(fp, p);
3155 	return (error);
3156 }
3157 
3158 /*
3159  * Set the mode mask for creation of filesystem nodes.
3160  */
3161 int
3162 sys_umask(struct proc *p, void *v, register_t *retval)
3163 {
3164 	struct sys_umask_args /* {
3165 		syscallarg(mode_t) newmask;
3166 	} */ *uap = v;
3167 	struct filedesc *fdp = p->p_fd;
3168 
3169 	fdplock(fdp);
3170 	*retval = fdp->fd_cmask;
3171 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3172 	fdpunlock(fdp);
3173 	return (0);
3174 }
3175 
3176 /*
3177  * Void all references to file by ripping underlying filesystem
3178  * away from vnode.
3179  */
3180 int
3181 sys_revoke(struct proc *p, void *v, register_t *retval)
3182 {
3183 	struct sys_revoke_args /* {
3184 		syscallarg(const char *) path;
3185 	} */ *uap = v;
3186 	struct vnode *vp;
3187 	struct vattr vattr;
3188 	int error;
3189 	struct nameidata nd;
3190 
3191 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3192 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3193 	nd.ni_unveil = UNVEIL_READ;
3194 	if ((error = namei(&nd)) != 0)
3195 		return (error);
3196 	vp = nd.ni_vp;
3197 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3198 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3199 		error = ENOTTY;
3200 		goto out;
3201 	}
3202 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3203 		goto out;
3204 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3205 	    (error = suser(p)))
3206 		goto out;
3207 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3208 		VOP_REVOKE(vp, REVOKEALL);
3209 out:
3210 	vrele(vp);
3211 	return (error);
3212 }
3213 
3214 /*
3215  * Convert a user file descriptor to a kernel file entry.
3216  *
3217  * On return *fpp is FREF:ed.
3218  */
3219 int
3220 getvnode(struct proc *p, int fd, struct file **fpp)
3221 {
3222 	struct file *fp;
3223 	struct vnode *vp;
3224 
3225 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3226 		return (EBADF);
3227 
3228 	if (fp->f_type != DTYPE_VNODE) {
3229 		FRELE(fp, p);
3230 		return (EINVAL);
3231 	}
3232 
3233 	vp = fp->f_data;
3234 	if (vp->v_type == VBAD) {
3235 		FRELE(fp, p);
3236 		return (EBADF);
3237 	}
3238 
3239 	*fpp = fp;
3240 
3241 	return (0);
3242 }
3243 
3244 /*
3245  * Positional read system call.
3246  */
3247 int
3248 sys_pread(struct proc *p, void *v, register_t *retval)
3249 {
3250 	struct sys_pread_args /* {
3251 		syscallarg(int) fd;
3252 		syscallarg(void *) buf;
3253 		syscallarg(size_t) nbyte;
3254 		syscallarg(int) pad;
3255 		syscallarg(off_t) offset;
3256 	} */ *uap = v;
3257 	struct iovec iov;
3258 	struct uio auio;
3259 
3260 	iov.iov_base = SCARG(uap, buf);
3261 	iov.iov_len = SCARG(uap, nbyte);
3262 	if (iov.iov_len > SSIZE_MAX)
3263 		return (EINVAL);
3264 
3265 	auio.uio_iov = &iov;
3266 	auio.uio_iovcnt = 1;
3267 	auio.uio_resid = iov.iov_len;
3268 	auio.uio_offset = SCARG(uap, offset);
3269 
3270 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3271 }
3272 
3273 /*
3274  * Positional scatter read system call.
3275  */
3276 int
3277 sys_preadv(struct proc *p, void *v, register_t *retval)
3278 {
3279 	struct sys_preadv_args /* {
3280 		syscallarg(int) fd;
3281 		syscallarg(const struct iovec *) iovp;
3282 		syscallarg(int) iovcnt;
3283 		syscallarg(int) pad;
3284 		syscallarg(off_t) offset;
3285 	} */ *uap = v;
3286 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3287 	int error, iovcnt = SCARG(uap, iovcnt);
3288 	struct uio auio;
3289 	size_t resid;
3290 
3291 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3292 	if (error)
3293 		goto done;
3294 
3295 	auio.uio_iov = iov;
3296 	auio.uio_iovcnt = iovcnt;
3297 	auio.uio_resid = resid;
3298 	auio.uio_offset = SCARG(uap, offset);
3299 
3300 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3301  done:
3302 	iovec_free(iov, iovcnt);
3303 	return (error);
3304 }
3305 
3306 /*
3307  * Positional write system call.
3308  */
3309 int
3310 sys_pwrite(struct proc *p, void *v, register_t *retval)
3311 {
3312 	struct sys_pwrite_args /* {
3313 		syscallarg(int) fd;
3314 		syscallarg(const void *) buf;
3315 		syscallarg(size_t) nbyte;
3316 		syscallarg(int) pad;
3317 		syscallarg(off_t) offset;
3318 	} */ *uap = v;
3319 	struct iovec iov;
3320 	struct uio auio;
3321 
3322 	iov.iov_base = (void *)SCARG(uap, buf);
3323 	iov.iov_len = SCARG(uap, nbyte);
3324 	if (iov.iov_len > SSIZE_MAX)
3325 		return (EINVAL);
3326 
3327 	auio.uio_iov = &iov;
3328 	auio.uio_iovcnt = 1;
3329 	auio.uio_resid = iov.iov_len;
3330 	auio.uio_offset = SCARG(uap, offset);
3331 
3332 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3333 }
3334 
3335 /*
3336  * Positional gather write system call.
3337  */
3338 int
3339 sys_pwritev(struct proc *p, void *v, register_t *retval)
3340 {
3341 	struct sys_pwritev_args /* {
3342 		syscallarg(int) fd;
3343 		syscallarg(const struct iovec *) iovp;
3344 		syscallarg(int) iovcnt;
3345 		syscallarg(int) pad;
3346 		syscallarg(off_t) offset;
3347 	} */ *uap = v;
3348 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3349 	int error, iovcnt = SCARG(uap, iovcnt);
3350 	struct uio auio;
3351 	size_t resid;
3352 
3353 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3354 	if (error)
3355 		goto done;
3356 
3357 	auio.uio_iov = iov;
3358 	auio.uio_iovcnt = iovcnt;
3359 	auio.uio_resid = resid;
3360 	auio.uio_offset = SCARG(uap, offset);
3361 
3362 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3363  done:
3364 	iovec_free(iov, iovcnt);
3365 	return (error);
3366 }
3367