xref: /openbsd-src/sys/kern/vfs_syscalls.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.265 2016/09/10 16:53:30 natano Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/conf.h>
46 #include <sys/sysctl.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/lock.h>
50 #include <sys/vnode.h>
51 #include <sys/mount.h>
52 #include <sys/proc.h>
53 #include <sys/pledge.h>
54 #include <sys/uio.h>
55 #include <sys/malloc.h>
56 #include <sys/pool.h>
57 #include <sys/dkio.h>
58 #include <sys/disklabel.h>
59 #include <sys/ktrace.h>
60 #include <sys/unistd.h>
61 #include <sys/specdev.h>
62 
63 #include <sys/syscallargs.h>
64 
65 extern int suid_clear;
66 
67 static int change_dir(struct nameidata *, struct proc *);
68 
69 void checkdirs(struct vnode *);
70 
71 int copyout_statfs(struct statfs *, void *, struct proc *);
72 
73 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
74 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
75 int dolinkat(struct proc *, int, const char *, int, const char *, int);
76 int dosymlinkat(struct proc *, const char *, int, const char *);
77 int dounlinkat(struct proc *, int, const char *, int);
78 int dofaccessat(struct proc *, int, const char *, int, int);
79 int dofstatat(struct proc *, int, const char *, struct stat *, int);
80 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
81     register_t *);
82 int dochflagsat(struct proc *, int, const char *, u_int, int);
83 int dovchflags(struct proc *, struct vnode *, u_int);
84 int dofchmodat(struct proc *, int, const char *, mode_t, int);
85 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
86 int dorenameat(struct proc *, int, const char *, int, const char *);
87 int domkdirat(struct proc *, int, const char *, mode_t);
88 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
89 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
90 int dofutimens(struct proc *, int, struct timespec [2]);
91 
92 /*
93  * Virtual File System System Calls
94  */
95 
96 /*
97  * Mount a file system.
98  */
99 int
100 sys_mount(struct proc *p, void *v, register_t *retval)
101 {
102 	struct sys_mount_args /* {
103 		syscallarg(const char *) type;
104 		syscallarg(const char *) path;
105 		syscallarg(int) flags;
106 		syscallarg(void *) data;
107 	} */ *uap = v;
108 	struct vnode *vp;
109 	struct mount *mp;
110 	int error, mntflag = 0;
111 	char fstypename[MFSNAMELEN];
112 	char fspath[MNAMELEN];
113 	struct nameidata nd;
114 	struct vfsconf *vfsp;
115 	int flags = SCARG(uap, flags);
116 
117 	if ((error = suser(p, 0)))
118 		return (error);
119 
120 	/*
121 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
122 	 */
123 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
124 	if (error)
125 		return(error);
126 
127 	/*
128 	 * Get vnode to be covered
129 	 */
130 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
131 	if ((error = namei(&nd)) != 0)
132 		return (error);
133 	vp = nd.ni_vp;
134 	if (flags & MNT_UPDATE) {
135 		if ((vp->v_flag & VROOT) == 0) {
136 			vput(vp);
137 			return (EINVAL);
138 		}
139 		mp = vp->v_mount;
140 		vfsp = mp->mnt_vfc;
141 		mntflag = mp->mnt_flag;
142 		/*
143 		 * We only allow the filesystem to be reloaded if it
144 		 * is currently mounted read-only.
145 		 */
146 		if ((flags & MNT_RELOAD) &&
147 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
148 			vput(vp);
149 			return (EOPNOTSUPP);	/* Needs translation */
150 		}
151 
152 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
153 			vput(vp);
154 			return (error);
155 		}
156 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
157 		goto update;
158 	}
159 	/*
160 	 * Do not allow disabling of permission checks unless exec and access to
161 	 * device files is disabled too.
162 	 */
163 	if ((flags & MNT_NOPERM) &&
164 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
165 		vput(vp);
166 		return (EPERM);
167 	}
168 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
169 		vput(vp);
170 		return (error);
171 	}
172 	if (vp->v_type != VDIR) {
173 		vput(vp);
174 		return (ENOTDIR);
175 	}
176 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
177 	if (error) {
178 		vput(vp);
179 		return (error);
180 	}
181 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
182 		if (!strcmp(vfsp->vfc_name, fstypename))
183 			break;
184 	}
185 
186 	if (vfsp == NULL) {
187 		vput(vp);
188 		return (EOPNOTSUPP);
189 	}
190 
191 	if (vp->v_mountedhere != NULL) {
192 		vput(vp);
193 		return (EBUSY);
194 	}
195 
196 	/*
197 	 * Allocate and initialize the file system.
198 	 */
199 	mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
200 	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
201 	mp->mnt_op = vfsp->vfc_vfsops;
202 	mp->mnt_vfc = vfsp;
203 	mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
204 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
205 	mp->mnt_vnodecovered = vp;
206 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
207 update:
208 	/*
209 	 * Set the mount level flags.
210 	 */
211 	if (flags & MNT_RDONLY)
212 		mp->mnt_flag |= MNT_RDONLY;
213 	else if (mp->mnt_flag & MNT_RDONLY)
214 		mp->mnt_flag |= MNT_WANTRDWR;
215 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
216 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
217 	    MNT_NOPERM | MNT_FORCE);
218 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
219 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
220 	    MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
221 	/*
222 	 * Mount the filesystem.
223 	 */
224 	error = VFS_MOUNT(mp, fspath, SCARG(uap, data), &nd, p);
225 	if (!error) {
226 		mp->mnt_stat.f_ctime = time_second;
227 	}
228 	if (mp->mnt_flag & MNT_UPDATE) {
229 		vput(vp);
230 		if (mp->mnt_flag & MNT_WANTRDWR)
231 			mp->mnt_flag &= ~MNT_RDONLY;
232 		mp->mnt_flag &=~
233 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
234 		if (error)
235 			mp->mnt_flag = mntflag;
236 
237 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
238 			if (mp->mnt_syncer == NULL)
239 				error = vfs_allocate_syncvnode(mp);
240 		} else {
241 			if (mp->mnt_syncer != NULL)
242 				vgone(mp->mnt_syncer);
243 			mp->mnt_syncer = NULL;
244 		}
245 
246 		vfs_unbusy(mp);
247 		return (error);
248 	}
249 
250 	vp->v_mountedhere = mp;
251 
252 	/*
253 	 * Put the new filesystem on the mount list after root.
254 	 */
255 	cache_purge(vp);
256 	if (!error) {
257 		vfsp->vfc_refcount++;
258 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
259 		checkdirs(vp);
260 		VOP_UNLOCK(vp, p);
261  		if ((mp->mnt_flag & MNT_RDONLY) == 0)
262  			error = vfs_allocate_syncvnode(mp);
263 		vfs_unbusy(mp);
264 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
265 		if ((error = VFS_START(mp, 0, p)) != 0)
266 			vrele(vp);
267 	} else {
268 		mp->mnt_vnodecovered->v_mountedhere = NULL;
269 		vfs_unbusy(mp);
270 		free(mp, M_MOUNT, sizeof(*mp));
271 		vput(vp);
272 	}
273 	return (error);
274 }
275 
276 /*
277  * Scan all active processes to see if any of them have a current
278  * or root directory onto which the new filesystem has just been
279  * mounted. If so, replace them with the new mount point, keeping
280  * track of how many were replaced.  That's the number of references
281  * the old vnode had that we've replaced, so finish by vrele()'ing
282  * it that many times.  This puts off any possible sleeping until
283  * we've finished walking the allproc list.
284  */
285 void
286 checkdirs(struct vnode *olddp)
287 {
288 	struct filedesc *fdp;
289 	struct vnode *newdp;
290 	struct proc *p;
291 	u_int  free_count = 0;
292 
293 	if (olddp->v_usecount == 1)
294 		return;
295 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
296 		panic("mount: lost mount");
297 	LIST_FOREACH(p, &allproc, p_list) {
298 		fdp = p->p_fd;
299 		if (fdp->fd_cdir == olddp) {
300 			free_count++;
301 			vref(newdp);
302 			fdp->fd_cdir = newdp;
303 		}
304 		if (fdp->fd_rdir == olddp) {
305 			free_count++;
306 			vref(newdp);
307 			fdp->fd_rdir = newdp;
308 		}
309 	}
310 	if (rootvnode == olddp) {
311 		free_count++;
312 		vref(newdp);
313 		rootvnode = newdp;
314 	}
315 	while (free_count-- > 0)
316 		vrele(olddp);
317 	vput(newdp);
318 }
319 
320 /*
321  * Unmount a file system.
322  *
323  * Note: unmount takes a path to the vnode mounted on as argument,
324  * not special file (as before).
325  */
326 int
327 sys_unmount(struct proc *p, void *v, register_t *retval)
328 {
329 	struct sys_unmount_args /* {
330 		syscallarg(const char *) path;
331 		syscallarg(int) flags;
332 	} */ *uap = v;
333 	struct vnode *vp;
334 	struct mount *mp;
335 	int error;
336 	struct nameidata nd;
337 
338 	if ((error = suser(p, 0)) != 0)
339 		return (error);
340 
341 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
342 	    SCARG(uap, path), p);
343 	if ((error = namei(&nd)) != 0)
344 		return (error);
345 	vp = nd.ni_vp;
346 	mp = vp->v_mount;
347 
348 	/*
349 	 * Don't allow unmounting the root file system.
350 	 */
351 	if (mp->mnt_flag & MNT_ROOTFS) {
352 		vput(vp);
353 		return (EINVAL);
354 	}
355 
356 	/*
357 	 * Must be the root of the filesystem
358 	 */
359 	if ((vp->v_flag & VROOT) == 0) {
360 		vput(vp);
361 		return (EINVAL);
362 	}
363 	vput(vp);
364 
365 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
366 		return (EBUSY);
367 
368 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p, vp));
369 }
370 
371 /*
372  * Do the actual file system unmount.
373  */
374 int
375 dounmount(struct mount *mp, int flags, struct proc *p, struct vnode *olddp)
376 {
377 	struct vnode *coveredvp;
378 	int error;
379 	int hadsyncer = 0;
380 
381  	mp->mnt_flag &=~ MNT_ASYNC;
382  	cache_purgevfs(mp);	/* remove cache entries for this file sys */
383  	if (mp->mnt_syncer != NULL) {
384 		hadsyncer = 1;
385  		vgone(mp->mnt_syncer);
386 		mp->mnt_syncer = NULL;
387 	}
388 	if (((mp->mnt_flag & MNT_RDONLY) ||
389 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
390  	    (flags & MNT_FORCE))
391  		error = VFS_UNMOUNT(mp, flags, p);
392 
393  	if (error && !(flags & MNT_DOOMED)) {
394  		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
395  			(void) vfs_allocate_syncvnode(mp);
396 		vfs_unbusy(mp);
397 		return (error);
398 	}
399 
400 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
401 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
402 		coveredvp->v_mountedhere = NULL;
403  		vrele(coveredvp);
404  	}
405 
406 	mp->mnt_vfc->vfc_refcount--;
407 
408 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
409 		panic("unmount: dangling vnode");
410 
411 	vfs_unbusy(mp);
412 	free(mp, M_MOUNT, sizeof(*mp));
413 
414 	return (0);
415 }
416 
417 /*
418  * Sync each mounted filesystem.
419  */
420 #ifdef DEBUG
421 int syncprt = 0;
422 struct ctldebug debug0 = { "syncprt", &syncprt };
423 #endif
424 
425 int
426 sys_sync(struct proc *p, void *v, register_t *retval)
427 {
428 	struct mount *mp, *nmp;
429 	int asyncflag;
430 
431 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
432 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
433 			continue;
434 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
435 			asyncflag = mp->mnt_flag & MNT_ASYNC;
436 			mp->mnt_flag &= ~MNT_ASYNC;
437 			uvm_vnp_sync(mp);
438 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
439 			if (asyncflag)
440 				mp->mnt_flag |= MNT_ASYNC;
441 		}
442 		vfs_unbusy(mp);
443 	}
444 
445 	return (0);
446 }
447 
448 /*
449  * Change filesystem quotas.
450  */
451 int
452 sys_quotactl(struct proc *p, void *v, register_t *retval)
453 {
454 	struct sys_quotactl_args /* {
455 		syscallarg(const char *) path;
456 		syscallarg(int) cmd;
457 		syscallarg(int) uid;
458 		syscallarg(char *) arg;
459 	} */ *uap = v;
460 	struct mount *mp;
461 	int error;
462 	struct nameidata nd;
463 
464 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
465 	if ((error = namei(&nd)) != 0)
466 		return (error);
467 	mp = nd.ni_vp->v_mount;
468 	vrele(nd.ni_vp);
469 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
470 	    SCARG(uap, arg), p));
471 }
472 
473 int
474 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
475 {
476 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
477 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
478 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
479 	char *s, *d;
480 	int error;
481 
482 	/* Don't let non-root see filesystem id (for NFS security) */
483 	if (suser(p, 0)) {
484 		fsid_t fsid;
485 
486 		s = (char *)sp;
487 		d = (char *)uaddr;
488 
489 		memset(&fsid, 0, sizeof(fsid));
490 
491 		if ((error = copyout(s, d, co_sz1)) != 0)
492 			return (error);
493 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
494 			return (error);
495 		return (copyout(s + co_off2, d + co_off2, co_sz2));
496 	}
497 
498 	return (copyout(sp, uaddr, sizeof(*sp)));
499 }
500 
501 /*
502  * Get filesystem statistics.
503  */
504 int
505 sys_statfs(struct proc *p, void *v, register_t *retval)
506 {
507 	struct sys_statfs_args /* {
508 		syscallarg(const char *) path;
509 		syscallarg(struct statfs *) buf;
510 	} */ *uap = v;
511 	struct mount *mp;
512 	struct statfs *sp;
513 	int error;
514 	struct nameidata nd;
515 
516 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
517 	nd.ni_pledge = PLEDGE_RPATH;
518 	if ((error = namei(&nd)) != 0)
519 		return (error);
520 	mp = nd.ni_vp->v_mount;
521 	sp = &mp->mnt_stat;
522 	vrele(nd.ni_vp);
523 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
524 		return (error);
525 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
526 
527 	return (copyout_statfs(sp, SCARG(uap, buf), p));
528 }
529 
530 /*
531  * Get filesystem statistics.
532  */
533 int
534 sys_fstatfs(struct proc *p, void *v, register_t *retval)
535 {
536 	struct sys_fstatfs_args /* {
537 		syscallarg(int) fd;
538 		syscallarg(struct statfs *) buf;
539 	} */ *uap = v;
540 	struct file *fp;
541 	struct mount *mp;
542 	struct statfs *sp;
543 	int error;
544 
545 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
546 		return (error);
547 	mp = ((struct vnode *)fp->f_data)->v_mount;
548 	if (!mp) {
549 		FRELE(fp, p);
550 		return (ENOENT);
551 	}
552 	sp = &mp->mnt_stat;
553 	error = VFS_STATFS(mp, sp, p);
554 	FRELE(fp, p);
555 	if (error)
556 		return (error);
557 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
558 
559 	return (copyout_statfs(sp, SCARG(uap, buf), p));
560 }
561 
562 /*
563  * Get statistics on all filesystems.
564  */
565 int
566 sys_getfsstat(struct proc *p, void *v, register_t *retval)
567 {
568 	struct sys_getfsstat_args /* {
569 		syscallarg(struct statfs *) buf;
570 		syscallarg(size_t) bufsize;
571 		syscallarg(int) flags;
572 	} */ *uap = v;
573 	struct mount *mp, *nmp;
574 	struct statfs *sp;
575 	struct statfs *sfsp;
576 	size_t count, maxcount;
577 	int error, flags = SCARG(uap, flags);
578 
579 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
580 	sfsp = SCARG(uap, buf);
581 	count = 0;
582 
583 	TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, nmp) {
584 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
585 			continue;
586 		if (sfsp && count < maxcount) {
587 			sp = &mp->mnt_stat;
588 
589 			/* Refresh stats unless MNT_NOWAIT is specified */
590 			if (flags != MNT_NOWAIT &&
591 			    flags != MNT_LAZY &&
592 			    (flags == MNT_WAIT ||
593 			    flags == 0) &&
594 			    (error = VFS_STATFS(mp, sp, p))) {
595 				vfs_unbusy(mp);
596  				continue;
597 			}
598 
599 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
600 #if notyet
601 			if (mp->mnt_flag & MNT_SOFTDEP)
602 				sp->f_eflags = STATFS_SOFTUPD;
603 #endif
604 			error = (copyout_statfs(sp, sfsp, p));
605 			if (error) {
606 				vfs_unbusy(mp);
607 				return (error);
608 			}
609 			sfsp++;
610 		}
611 		count++;
612 		vfs_unbusy(mp);
613 	}
614 
615 	if (sfsp && count > maxcount)
616 		*retval = maxcount;
617 	else
618 		*retval = count;
619 
620 	return (0);
621 }
622 
623 /*
624  * Change current working directory to a given file descriptor.
625  */
626 int
627 sys_fchdir(struct proc *p, void *v, register_t *retval)
628 {
629 	struct sys_fchdir_args /* {
630 		syscallarg(int) fd;
631 	} */ *uap = v;
632 	struct filedesc *fdp = p->p_fd;
633 	struct vnode *vp, *tdp, *old_cdir;
634 	struct mount *mp;
635 	struct file *fp;
636 	int error;
637 
638 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
639 		return (EBADF);
640 	vp = fp->f_data;
641 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR)
642 		return (ENOTDIR);
643 	vref(vp);
644 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
645 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
646 
647 	while (!error && (mp = vp->v_mountedhere) != NULL) {
648 		if (vfs_busy(mp, VB_READ|VB_WAIT))
649 			continue;
650 		error = VFS_ROOT(mp, &tdp);
651 		vfs_unbusy(mp);
652 		if (error)
653 			break;
654 		vput(vp);
655 		vp = tdp;
656 	}
657 	if (error) {
658 		vput(vp);
659 		return (error);
660 	}
661 	VOP_UNLOCK(vp, p);
662 	old_cdir = fdp->fd_cdir;
663 	fdp->fd_cdir = vp;
664 	vrele(old_cdir);
665 	return (0);
666 }
667 
668 /*
669  * Change current working directory (``.'').
670  */
671 int
672 sys_chdir(struct proc *p, void *v, register_t *retval)
673 {
674 	struct sys_chdir_args /* {
675 		syscallarg(const char *) path;
676 	} */ *uap = v;
677 	struct filedesc *fdp = p->p_fd;
678 	struct vnode *old_cdir;
679 	int error;
680 	struct nameidata nd;
681 
682 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
683 	    SCARG(uap, path), p);
684 	nd.ni_pledge = PLEDGE_RPATH;
685 	if ((error = change_dir(&nd, p)) != 0)
686 		return (error);
687 	old_cdir = fdp->fd_cdir;
688 	fdp->fd_cdir = nd.ni_vp;
689 	vrele(old_cdir);
690 	return (0);
691 }
692 
693 /*
694  * Change notion of root (``/'') directory.
695  */
696 int
697 sys_chroot(struct proc *p, void *v, register_t *retval)
698 {
699 	struct sys_chroot_args /* {
700 		syscallarg(const char *) path;
701 	} */ *uap = v;
702 	struct filedesc *fdp = p->p_fd;
703 	struct vnode *old_cdir, *old_rdir;
704 	int error;
705 	struct nameidata nd;
706 
707 	if ((error = suser(p, 0)) != 0)
708 		return (error);
709 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
710 	    SCARG(uap, path), p);
711 	if ((error = change_dir(&nd, p)) != 0)
712 		return (error);
713 	if (fdp->fd_rdir != NULL) {
714 		/*
715 		 * A chroot() done inside a changed root environment does
716 		 * an automatic chdir to avoid the out-of-tree experience.
717 		 */
718 		vref(nd.ni_vp);
719 		old_rdir = fdp->fd_rdir;
720 		old_cdir = fdp->fd_cdir;
721 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
722 		vrele(old_rdir);
723 		vrele(old_cdir);
724 	} else
725 		fdp->fd_rdir = nd.ni_vp;
726 	return (0);
727 }
728 
729 /*
730  * Common routine for chroot and chdir.
731  */
732 static int
733 change_dir(struct nameidata *ndp, struct proc *p)
734 {
735 	struct vnode *vp;
736 	int error;
737 
738 	if ((error = namei(ndp)) != 0)
739 		return (error);
740 	vp = ndp->ni_vp;
741 	if (vp->v_type != VDIR)
742 		error = ENOTDIR;
743 	else
744 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
745 	if (error)
746 		vput(vp);
747 	else
748 		VOP_UNLOCK(vp, p);
749 	return (error);
750 }
751 
752 /*
753  * Check permissions, allocate an open file structure,
754  * and call the device open routine if any.
755  */
756 int
757 sys_open(struct proc *p, void *v, register_t *retval)
758 {
759 	struct sys_open_args /* {
760 		syscallarg(const char *) path;
761 		syscallarg(int) flags;
762 		syscallarg(mode_t) mode;
763 	} */ *uap = v;
764 
765 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
766 	    SCARG(uap, mode), retval));
767 }
768 
769 int
770 sys_openat(struct proc *p, void *v, register_t *retval)
771 {
772 	struct sys_openat_args /* {
773 		syscallarg(int) fd;
774 		syscallarg(const char *) path;
775 		syscallarg(int) flags;
776 		syscallarg(mode_t) mode;
777 	} */ *uap = v;
778 
779 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
780 	    SCARG(uap, flags), SCARG(uap, mode), retval));
781 }
782 
783 int
784 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
785     register_t *retval)
786 {
787 	struct filedesc *fdp = p->p_fd;
788 	struct file *fp;
789 	struct vnode *vp;
790 	struct vattr vattr;
791 	int flags, cmode;
792 	int type, indx, error, localtrunc = 0;
793 	struct flock lf;
794 	struct nameidata nd;
795 	int ni_pledge = 0;
796 
797 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
798 		error = pledge_flock(p);
799 		if (error != 0)
800 			return (error);
801 	}
802 
803 	fdplock(fdp);
804 
805 	if ((error = falloc(p, &fp, &indx)) != 0)
806 		goto out;
807 	flags = FFLAGS(oflags);
808 	if (flags & FREAD)
809 		ni_pledge |= PLEDGE_RPATH;
810 	if (flags & FWRITE)
811 		ni_pledge |= PLEDGE_WPATH;
812 	if (oflags & O_CREAT)
813 		ni_pledge |= PLEDGE_CPATH;
814 
815 	if (flags & O_CLOEXEC)
816 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
817 
818 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
819 	if ((p->p_p->ps_flags & PS_PLEDGE))
820 		cmode &= ACCESSPERMS;
821 	NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
822 	nd.ni_pledge = ni_pledge;
823 	p->p_dupfd = -1;			/* XXX check for fdopen */
824 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
825 		localtrunc = 1;
826 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
827 	}
828 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
829 		if (error == ENODEV &&
830 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
831 		    (error =
832 			dupfdopen(fdp, indx, p->p_dupfd, flags)) == 0) {
833 			closef(fp, p);
834 			*retval = indx;
835 			goto out;
836 		}
837 		if (error == ERESTART)
838 			error = EINTR;
839 		fdremove(fdp, indx);
840 		closef(fp, p);
841 		goto out;
842 	}
843 	p->p_dupfd = 0;
844 	vp = nd.ni_vp;
845 	fp->f_flag = flags & FMASK;
846 	fp->f_type = DTYPE_VNODE;
847 	fp->f_ops = &vnops;
848 	fp->f_data = vp;
849 	if (flags & (O_EXLOCK | O_SHLOCK)) {
850 		lf.l_whence = SEEK_SET;
851 		lf.l_start = 0;
852 		lf.l_len = 0;
853 		if (flags & O_EXLOCK)
854 			lf.l_type = F_WRLCK;
855 		else
856 			lf.l_type = F_RDLCK;
857 		type = F_FLOCK;
858 		if ((flags & FNONBLOCK) == 0)
859 			type |= F_WAIT;
860 		VOP_UNLOCK(vp, p);
861 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
862 		if (error) {
863 			/* closef will vn_close the file for us. */
864 			fdremove(fdp, indx);
865 			closef(fp, p);
866 			goto out;
867 		}
868 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
869 		fp->f_iflags |= FIF_HASLOCK;
870 	}
871 	if (localtrunc) {
872 		if ((fp->f_flag & FWRITE) == 0)
873 			error = EACCES;
874 		else if (vp->v_mount->mnt_flag & MNT_RDONLY)
875 			error = EROFS;
876 		else if (vp->v_type == VDIR)
877 			error = EISDIR;
878 		else if ((error = vn_writechk(vp)) == 0) {
879 			VATTR_NULL(&vattr);
880 			vattr.va_size = 0;
881 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
882 		}
883 		if (error) {
884 			VOP_UNLOCK(vp, p);
885 			/* closef will close the file for us. */
886 			fdremove(fdp, indx);
887 			closef(fp, p);
888 			goto out;
889 		}
890 	}
891 	VOP_UNLOCK(vp, p);
892 	*retval = indx;
893 	FILE_SET_MATURE(fp, p);
894 out:
895 	fdpunlock(fdp);
896 	return (error);
897 }
898 
899 /*
900  * Get file handle system call
901  */
902 int
903 sys_getfh(struct proc *p, void *v, register_t *retval)
904 {
905 	struct sys_getfh_args /* {
906 		syscallarg(const char *) fname;
907 		syscallarg(fhandle_t *) fhp;
908 	} */ *uap = v;
909 	struct vnode *vp;
910 	fhandle_t fh;
911 	int error;
912 	struct nameidata nd;
913 
914 	/*
915 	 * Must be super user
916 	 */
917 	error = suser(p, 0);
918 	if (error)
919 		return (error);
920 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
921 	    SCARG(uap, fname), p);
922 	error = namei(&nd);
923 	if (error)
924 		return (error);
925 	vp = nd.ni_vp;
926 	memset(&fh, 0, sizeof(fh));
927 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
928 	error = VFS_VPTOFH(vp, &fh.fh_fid);
929 	vput(vp);
930 	if (error)
931 		return (error);
932 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
933 	return (error);
934 }
935 
936 /*
937  * Open a file given a file handle.
938  *
939  * Check permissions, allocate an open file structure,
940  * and call the device open routine if any.
941  */
942 int
943 sys_fhopen(struct proc *p, void *v, register_t *retval)
944 {
945 	struct sys_fhopen_args /* {
946 		syscallarg(const fhandle_t *) fhp;
947 		syscallarg(int) flags;
948 	} */ *uap = v;
949 	struct filedesc *fdp = p->p_fd;
950 	struct file *fp;
951 	struct vnode *vp = NULL;
952 	struct mount *mp;
953 	struct ucred *cred = p->p_ucred;
954 	int flags;
955 	int type, indx, error=0;
956 	struct flock lf;
957 	struct vattr va;
958 	fhandle_t fh;
959 
960 	/*
961 	 * Must be super user
962 	 */
963 	if ((error = suser(p, 0)))
964 		return (error);
965 
966 	flags = FFLAGS(SCARG(uap, flags));
967 	if ((flags & (FREAD | FWRITE)) == 0)
968 		return (EINVAL);
969 	if ((flags & O_CREAT))
970 		return (EINVAL);
971 
972 	fdplock(fdp);
973 	if ((error = falloc(p, &fp, &indx)) != 0) {
974 		fp = NULL;
975 		goto bad;
976 	}
977 	if (flags & O_CLOEXEC)
978 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
979 
980 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
981 		goto bad;
982 
983 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
984 		error = ESTALE;
985 		goto bad;
986 	}
987 
988 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
989 		vp = NULL;	/* most likely unnecessary sanity for bad: */
990 		goto bad;
991 	}
992 
993 	/* Now do an effective vn_open */
994 
995 	if (vp->v_type == VSOCK) {
996 		error = EOPNOTSUPP;
997 		goto bad;
998 	}
999 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1000 		error = ENOTDIR;
1001 		goto bad;
1002 	}
1003 	if (flags & FREAD) {
1004 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1005 			goto bad;
1006 	}
1007 	if (flags & (FWRITE | O_TRUNC)) {
1008 		if (vp->v_type == VDIR) {
1009 			error = EISDIR;
1010 			goto bad;
1011 		}
1012 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1013 		    (error = vn_writechk(vp)) != 0)
1014 			goto bad;
1015 	}
1016 	if (flags & O_TRUNC) {
1017 		VATTR_NULL(&va);
1018 		va.va_size = 0;
1019 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1020 			goto bad;
1021 	}
1022 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1023 		goto bad;
1024 	if (flags & FWRITE)
1025 		vp->v_writecount++;
1026 
1027 	/* done with modified vn_open, now finish what sys_open does. */
1028 
1029 	fp->f_flag = flags & FMASK;
1030 	fp->f_type = DTYPE_VNODE;
1031 	fp->f_ops = &vnops;
1032 	fp->f_data = vp;
1033 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1034 		lf.l_whence = SEEK_SET;
1035 		lf.l_start = 0;
1036 		lf.l_len = 0;
1037 		if (flags & O_EXLOCK)
1038 			lf.l_type = F_WRLCK;
1039 		else
1040 			lf.l_type = F_RDLCK;
1041 		type = F_FLOCK;
1042 		if ((flags & FNONBLOCK) == 0)
1043 			type |= F_WAIT;
1044 		VOP_UNLOCK(vp, p);
1045 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1046 		if (error) {
1047 			vp = NULL;	/* closef will vn_close the file */
1048 			goto bad;
1049 		}
1050 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1051 		fp->f_iflags |= FIF_HASLOCK;
1052 	}
1053 	VOP_UNLOCK(vp, p);
1054 	*retval = indx;
1055 	FILE_SET_MATURE(fp, p);
1056 
1057 	fdpunlock(fdp);
1058 	return (0);
1059 
1060 bad:
1061 	if (fp) {
1062 		fdremove(fdp, indx);
1063 		closef(fp, p);
1064 		if (vp != NULL)
1065 			vput(vp);
1066 	}
1067 	fdpunlock(fdp);
1068 	return (error);
1069 }
1070 
1071 int
1072 sys_fhstat(struct proc *p, void *v, register_t *retval)
1073 {
1074 	struct sys_fhstat_args /* {
1075 		syscallarg(const fhandle_t *) fhp;
1076 		syscallarg(struct stat *) sb;
1077 	} */ *uap = v;
1078 	struct stat sb;
1079 	int error;
1080 	fhandle_t fh;
1081 	struct mount *mp;
1082 	struct vnode *vp;
1083 
1084 	/*
1085 	 * Must be super user
1086 	 */
1087 	if ((error = suser(p, 0)))
1088 		return (error);
1089 
1090 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1091 		return (error);
1092 
1093 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1094 		return (ESTALE);
1095 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1096 		return (error);
1097 	error = vn_stat(vp, &sb, p);
1098 	vput(vp);
1099 	if (error)
1100 		return (error);
1101 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1102 	return (error);
1103 }
1104 
1105 int
1106 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1107 {
1108 	struct sys_fhstatfs_args /* {
1109 		syscallarg(const fhandle_t *) fhp;
1110 		syscallarg(struct statfs *) buf;
1111 	} */ *uap = v;
1112 	struct statfs *sp;
1113 	fhandle_t fh;
1114 	struct mount *mp;
1115 	struct vnode *vp;
1116 	int error;
1117 
1118 	/*
1119 	 * Must be super user
1120 	 */
1121 	if ((error = suser(p, 0)))
1122 		return (error);
1123 
1124 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1125 		return (error);
1126 
1127 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1128 		return (ESTALE);
1129 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1130 		return (error);
1131 	mp = vp->v_mount;
1132 	sp = &mp->mnt_stat;
1133 	vput(vp);
1134 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1135 		return (error);
1136 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1137 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1138 }
1139 
1140 /*
1141  * Create a special file or named pipe.
1142  */
1143 int
1144 sys_mknod(struct proc *p, void *v, register_t *retval)
1145 {
1146 	struct sys_mknod_args /* {
1147 		syscallarg(const char *) path;
1148 		syscallarg(mode_t) mode;
1149 		syscallarg(int) dev;
1150 	} */ *uap = v;
1151 
1152 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1153 	    SCARG(uap, dev)));
1154 }
1155 
1156 int
1157 sys_mknodat(struct proc *p, void *v, register_t *retval)
1158 {
1159 	struct sys_mknodat_args /* {
1160 		syscallarg(int) fd;
1161 		syscallarg(const char *) path;
1162 		syscallarg(mode_t) mode;
1163 		syscallarg(dev_t) dev;
1164 	} */ *uap = v;
1165 
1166 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1167 	    SCARG(uap, mode), SCARG(uap, dev)));
1168 }
1169 
1170 int
1171 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1172 {
1173 	struct vnode *vp;
1174 	struct vattr vattr;
1175 	int error;
1176 	struct nameidata nd;
1177 
1178 	if (dev == VNOVAL)
1179 		return (EINVAL);
1180 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1181 	nd.ni_pledge = PLEDGE_DPATH;
1182 	if ((error = namei(&nd)) != 0)
1183 		return (error);
1184 	vp = nd.ni_vp;
1185 	if (!S_ISFIFO(mode) || dev != 0) {
1186 		if ((nd.ni_dvp->v_mount->mnt_flag & MNT_NOPERM) == 0 &&
1187 		    (error = suser(p, 0)) != 0)
1188 			goto out;
1189 		if (p->p_fd->fd_rdir) {
1190 			error = EINVAL;
1191 			goto out;
1192 		}
1193 	}
1194 	if (vp != NULL)
1195 		error = EEXIST;
1196 	else {
1197 		VATTR_NULL(&vattr);
1198 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1199 		if ((p->p_p->ps_flags & PS_PLEDGE))
1200 			vattr.va_mode &= ACCESSPERMS;
1201 		vattr.va_rdev = dev;
1202 
1203 		switch (mode & S_IFMT) {
1204 		case S_IFMT:	/* used by badsect to flag bad sectors */
1205 			vattr.va_type = VBAD;
1206 			break;
1207 		case S_IFCHR:
1208 			vattr.va_type = VCHR;
1209 			break;
1210 		case S_IFBLK:
1211 			vattr.va_type = VBLK;
1212 			break;
1213 		case S_IFIFO:
1214 #ifndef FIFO
1215 			error = EOPNOTSUPP;
1216 			break;
1217 #else
1218 			if (dev == 0) {
1219 				vattr.va_type = VFIFO;
1220 				break;
1221 			}
1222 			/* FALLTHROUGH */
1223 #endif /* FIFO */
1224 		default:
1225 			error = EINVAL;
1226 			break;
1227 		}
1228 	}
1229 out:
1230 	if (!error) {
1231 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1232 	} else {
1233 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1234 		if (nd.ni_dvp == vp)
1235 			vrele(nd.ni_dvp);
1236 		else
1237 			vput(nd.ni_dvp);
1238 		if (vp)
1239 			vrele(vp);
1240 	}
1241 	return (error);
1242 }
1243 
1244 /*
1245  * Create a named pipe.
1246  */
1247 int
1248 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1249 {
1250 	struct sys_mkfifo_args /* {
1251 		syscallarg(const char *) path;
1252 		syscallarg(mode_t) mode;
1253 	} */ *uap = v;
1254 
1255 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1256 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1257 }
1258 
1259 int
1260 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1261 {
1262 	struct sys_mkfifoat_args /* {
1263 		syscallarg(int) fd;
1264 		syscallarg(const char *) path;
1265 		syscallarg(mode_t) mode;
1266 	} */ *uap = v;
1267 
1268 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1269 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1270 }
1271 
1272 /*
1273  * Make a hard file link.
1274  */
1275 int
1276 sys_link(struct proc *p, void *v, register_t *retval)
1277 {
1278 	struct sys_link_args /* {
1279 		syscallarg(const char *) path;
1280 		syscallarg(const char *) link;
1281 	} */ *uap = v;
1282 
1283 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1284 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1285 }
1286 
1287 int
1288 sys_linkat(struct proc *p, void *v, register_t *retval)
1289 {
1290 	struct sys_linkat_args /* {
1291 		syscallarg(int) fd1;
1292 		syscallarg(const char *) path1;
1293 		syscallarg(int) fd2;
1294 		syscallarg(const char *) path2;
1295 		syscallarg(int) flag;
1296 	} */ *uap = v;
1297 
1298 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1299 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1300 }
1301 
1302 int
1303 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1304     const char *path2, int flag)
1305 {
1306 	struct vnode *vp;
1307 	struct nameidata nd;
1308 	int error, follow;
1309 	int flags;
1310 
1311 	if (flag & ~AT_SYMLINK_FOLLOW)
1312 		return (EINVAL);
1313 
1314 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1315 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1316 	nd.ni_pledge = PLEDGE_RPATH;
1317 	if ((error = namei(&nd)) != 0)
1318 		return (error);
1319 	vp = nd.ni_vp;
1320 
1321 	flags = LOCKPARENT;
1322 	if (vp->v_type == VDIR) {
1323 		flags |= STRIPSLASHES;
1324 	}
1325 
1326 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1327 	nd.ni_pledge = PLEDGE_CPATH;
1328 	if ((error = namei(&nd)) != 0)
1329 		goto out;
1330 	if (nd.ni_vp) {
1331 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1332 		if (nd.ni_dvp == nd.ni_vp)
1333 			vrele(nd.ni_dvp);
1334 		else
1335 			vput(nd.ni_dvp);
1336 		vrele(nd.ni_vp);
1337 		error = EEXIST;
1338 		goto out;
1339 	}
1340 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1341 out:
1342 	vrele(vp);
1343 	return (error);
1344 }
1345 
1346 /*
1347  * Make a symbolic link.
1348  */
1349 int
1350 sys_symlink(struct proc *p, void *v, register_t *retval)
1351 {
1352 	struct sys_symlink_args /* {
1353 		syscallarg(const char *) path;
1354 		syscallarg(const char *) link;
1355 	} */ *uap = v;
1356 
1357 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1358 }
1359 
1360 int
1361 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1362 {
1363 	struct sys_symlinkat_args /* {
1364 		syscallarg(const char *) path;
1365 		syscallarg(int) fd;
1366 		syscallarg(const char *) link;
1367 	} */ *uap = v;
1368 
1369 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1370 	    SCARG(uap, link)));
1371 }
1372 
1373 int
1374 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1375 {
1376 	struct vattr vattr;
1377 	char *path;
1378 	int error;
1379 	struct nameidata nd;
1380 
1381 	path = pool_get(&namei_pool, PR_WAITOK);
1382 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1383 	if (error)
1384 		goto out;
1385 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1386 	nd.ni_pledge = PLEDGE_CPATH;
1387 	if ((error = namei(&nd)) != 0)
1388 		goto out;
1389 	if (nd.ni_vp) {
1390 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1391 		if (nd.ni_dvp == nd.ni_vp)
1392 			vrele(nd.ni_dvp);
1393 		else
1394 			vput(nd.ni_dvp);
1395 		vrele(nd.ni_vp);
1396 		error = EEXIST;
1397 		goto out;
1398 	}
1399 	VATTR_NULL(&vattr);
1400 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1401 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1402 out:
1403 	pool_put(&namei_pool, path);
1404 	return (error);
1405 }
1406 
1407 /*
1408  * Delete a name from the filesystem.
1409  */
1410 int
1411 sys_unlink(struct proc *p, void *v, register_t *retval)
1412 {
1413 	struct sys_unlink_args /* {
1414 		syscallarg(const char *) path;
1415 	} */ *uap = v;
1416 
1417 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1418 }
1419 
1420 int
1421 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1422 {
1423 	struct sys_unlinkat_args /* {
1424 		syscallarg(int) fd;
1425 		syscallarg(const char *) path;
1426 		syscallarg(int) flag;
1427 	} */ *uap = v;
1428 
1429 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1430 	    SCARG(uap, flag)));
1431 }
1432 
1433 int
1434 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1435 {
1436 	struct vnode *vp;
1437 	int error;
1438 	struct nameidata nd;
1439 
1440 	if (flag & ~AT_REMOVEDIR)
1441 		return (EINVAL);
1442 
1443 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1444 	    fd, path, p);
1445 	nd.ni_pledge = PLEDGE_CPATH;
1446 	if ((error = namei(&nd)) != 0)
1447 		return (error);
1448 	vp = nd.ni_vp;
1449 
1450 	if (flag & AT_REMOVEDIR) {
1451 		if (vp->v_type != VDIR) {
1452 			error = ENOTDIR;
1453 			goto out;
1454 		}
1455 		/*
1456 		 * No rmdir "." please.
1457 		 */
1458 		if (nd.ni_dvp == vp) {
1459 			error = EINVAL;
1460 			goto out;
1461 		}
1462 	}
1463 
1464 	/*
1465 	 * The root of a mounted filesystem cannot be deleted.
1466 	 */
1467 	if (vp->v_flag & VROOT)
1468 		error = EBUSY;
1469 out:
1470 	if (!error) {
1471 		if (flag & AT_REMOVEDIR) {
1472 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1473 		} else {
1474 			(void)uvm_vnp_uncache(vp);
1475 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1476 		}
1477 	} else {
1478 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1479 		if (nd.ni_dvp == vp)
1480 			vrele(nd.ni_dvp);
1481 		else
1482 			vput(nd.ni_dvp);
1483 		vput(vp);
1484 	}
1485 	return (error);
1486 }
1487 
1488 /*
1489  * Reposition read/write file offset.
1490  */
1491 int
1492 sys_lseek(struct proc *p, void *v, register_t *retval)
1493 {
1494 	struct sys_lseek_args /* {
1495 		syscallarg(int) fd;
1496 		syscallarg(int) pad;
1497 		syscallarg(off_t) offset;
1498 		syscallarg(int) whence;
1499 	} */ *uap = v;
1500 	struct ucred *cred = p->p_ucred;
1501 	struct filedesc *fdp = p->p_fd;
1502 	struct file *fp;
1503 	struct vattr vattr;
1504 	struct vnode *vp;
1505 	off_t offarg, newoff;
1506 	int error, special;
1507 
1508 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1509 		return (EBADF);
1510 	if (fp->f_type != DTYPE_VNODE)
1511 		return (ESPIPE);
1512 	vp = fp->f_data;
1513 	if (vp->v_type == VFIFO)
1514 		return (ESPIPE);
1515 	FREF(fp);
1516 	if (vp->v_type == VCHR)
1517 		special = 1;
1518 	else
1519 		special = 0;
1520 	offarg = SCARG(uap, offset);
1521 
1522 	switch (SCARG(uap, whence)) {
1523 	case SEEK_CUR:
1524 		newoff = fp->f_offset + offarg;
1525 		break;
1526 	case SEEK_END:
1527 		error = VOP_GETATTR(vp, &vattr, cred, p);
1528 		if (error)
1529 			goto bad;
1530 		newoff = offarg + (off_t)vattr.va_size;
1531 		break;
1532 	case SEEK_SET:
1533 		newoff = offarg;
1534 		break;
1535 	default:
1536 		error = EINVAL;
1537 		goto bad;
1538 	}
1539 	if (!special) {
1540 		if (newoff < 0) {
1541 			error = EINVAL;
1542 			goto bad;
1543 		}
1544 	}
1545 	*(off_t *)retval = fp->f_offset = newoff;
1546 	fp->f_seek++;
1547 	error = 0;
1548  bad:
1549 	FRELE(fp, p);
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Check access permissions.
1555  */
1556 int
1557 sys_access(struct proc *p, void *v, register_t *retval)
1558 {
1559 	struct sys_access_args /* {
1560 		syscallarg(const char *) path;
1561 		syscallarg(int) amode;
1562 	} */ *uap = v;
1563 
1564 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1565 	    SCARG(uap, amode), 0));
1566 }
1567 
1568 int
1569 sys_faccessat(struct proc *p, void *v, register_t *retval)
1570 {
1571 	struct sys_faccessat_args /* {
1572 		syscallarg(int) fd;
1573 		syscallarg(const char *) path;
1574 		syscallarg(int) amode;
1575 		syscallarg(int) flag;
1576 	} */ *uap = v;
1577 
1578 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1579 	    SCARG(uap, amode), SCARG(uap, flag)));
1580 }
1581 
1582 int
1583 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1584 {
1585 	struct vnode *vp;
1586 	struct ucred *newcred, *oldcred;
1587 	struct nameidata nd;
1588 	int error;
1589 
1590 	if (amode & ~(R_OK | W_OK | X_OK))
1591 		return (EINVAL);
1592 	if (flag & ~AT_EACCESS)
1593 		return (EINVAL);
1594 
1595 	newcred = NULL;
1596 	oldcred = p->p_ucred;
1597 
1598 	/*
1599 	 * If access as real ids was requested and they really differ,
1600 	 * give the thread new creds with them reset
1601 	 */
1602 	if ((flag & AT_EACCESS) == 0 &&
1603 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1604 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1605 		p->p_ucred = newcred = crdup(oldcred);
1606 		newcred->cr_uid = newcred->cr_ruid;
1607 		newcred->cr_gid = newcred->cr_rgid;
1608 	}
1609 
1610 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1611 	nd.ni_pledge = PLEDGE_RPATH;
1612 	if ((error = namei(&nd)) != 0)
1613 		goto out;
1614 	vp = nd.ni_vp;
1615 
1616 	/* Flags == 0 means only check for existence. */
1617 	if (amode) {
1618 		int vflags = 0;
1619 
1620 		if (amode & R_OK)
1621 			vflags |= VREAD;
1622 		if (amode & W_OK)
1623 			vflags |= VWRITE;
1624 		if (amode & X_OK)
1625 			vflags |= VEXEC;
1626 
1627 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
1628 		if (!error && (vflags & VWRITE))
1629 			error = vn_writechk(vp);
1630 	}
1631 	vput(vp);
1632 out:
1633 	if (newcred != NULL) {
1634 		p->p_ucred = oldcred;
1635 		crfree(newcred);
1636 	}
1637 	return (error);
1638 }
1639 
1640 /*
1641  * Get file status; this version follows links.
1642  */
1643 int
1644 sys_stat(struct proc *p, void *v, register_t *retval)
1645 {
1646 	struct sys_stat_args /* {
1647 		syscallarg(const char *) path;
1648 		syscallarg(struct stat *) ub;
1649 	} */ *uap = v;
1650 
1651 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
1652 }
1653 
1654 int
1655 sys_fstatat(struct proc *p, void *v, register_t *retval)
1656 {
1657 	struct sys_fstatat_args /* {
1658 		syscallarg(int) fd;
1659 		syscallarg(const char *) path;
1660 		syscallarg(struct stat *) buf;
1661 		syscallarg(int) flag;
1662 	} */ *uap = v;
1663 
1664 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
1665 	    SCARG(uap, buf), SCARG(uap, flag)));
1666 }
1667 
1668 int
1669 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
1670 {
1671 	struct stat sb;
1672 	int error, follow;
1673 	struct nameidata nd;
1674 
1675 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1676 		return (EINVAL);
1677 
1678 
1679 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1680 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1681 	nd.ni_pledge = PLEDGE_RPATH;
1682 	if ((error = namei(&nd)) != 0)
1683 		return (error);
1684 	error = vn_stat(nd.ni_vp, &sb, p);
1685 	vput(nd.ni_vp);
1686 	if (error)
1687 		return (error);
1688 	if (nd.ni_pledge & PLEDGE_STATLIE) {
1689 		if (S_ISDIR(sb.st_mode)) {
1690 			sb.st_mode &= ~ALLPERMS;
1691 			sb.st_mode |= S_IXUSR | S_IXGRP | S_IXOTH;
1692 			sb.st_uid = 0;
1693 			sb.st_gid = 0;
1694 			sb.st_gen = 0;
1695 		} else
1696 			return (ENOENT);
1697 	}
1698 	/* Don't let non-root see generation numbers (for NFS security) */
1699 	if (suser(p, 0))
1700 		sb.st_gen = 0;
1701 	error = copyout(&sb, buf, sizeof(sb));
1702 #ifdef KTRACE
1703 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
1704 		ktrstat(p, &sb);
1705 #endif
1706 	return (error);
1707 }
1708 
1709 /*
1710  * Get file status; this version does not follow links.
1711  */
1712 int
1713 sys_lstat(struct proc *p, void *v, register_t *retval)
1714 {
1715 	struct sys_lstat_args /* {
1716 		syscallarg(const char *) path;
1717 		syscallarg(struct stat *) ub;
1718 	} */ *uap = v;
1719 
1720 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
1721 	    AT_SYMLINK_NOFOLLOW));
1722 }
1723 
1724 /*
1725  * Get configurable pathname variables.
1726  */
1727 int
1728 sys_pathconf(struct proc *p, void *v, register_t *retval)
1729 {
1730 	struct sys_pathconf_args /* {
1731 		syscallarg(const char *) path;
1732 		syscallarg(int) name;
1733 	} */ *uap = v;
1734 	int error;
1735 	struct nameidata nd;
1736 
1737 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1738 	    SCARG(uap, path), p);
1739 	nd.ni_pledge = PLEDGE_RPATH;
1740 	if ((error = namei(&nd)) != 0)
1741 		return (error);
1742 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1743 	vput(nd.ni_vp);
1744 	return (error);
1745 }
1746 
1747 /*
1748  * Return target name of a symbolic link.
1749  */
1750 int
1751 sys_readlink(struct proc *p, void *v, register_t *retval)
1752 {
1753 	struct sys_readlink_args /* {
1754 		syscallarg(const char *) path;
1755 		syscallarg(char *) buf;
1756 		syscallarg(size_t) count;
1757 	} */ *uap = v;
1758 
1759 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
1760 	    SCARG(uap, count), retval));
1761 }
1762 
1763 int
1764 sys_readlinkat(struct proc *p, void *v, register_t *retval)
1765 {
1766 	struct sys_readlinkat_args /* {
1767 		syscallarg(int) fd;
1768 		syscallarg(const char *) path;
1769 		syscallarg(char *) buf;
1770 		syscallarg(size_t) count;
1771 	} */ *uap = v;
1772 
1773 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1774 	    SCARG(uap, buf), SCARG(uap, count), retval));
1775 }
1776 
1777 int
1778 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
1779     size_t count, register_t *retval)
1780 {
1781 	struct vnode *vp;
1782 	struct iovec aiov;
1783 	struct uio auio;
1784 	int error;
1785 	struct nameidata nd;
1786 
1787 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1788 	nd.ni_pledge = PLEDGE_RPATH;
1789 	if ((error = namei(&nd)) != 0)
1790 		return (error);
1791 	vp = nd.ni_vp;
1792 	if (vp->v_type != VLNK)
1793 		error = EINVAL;
1794 	else {
1795 		aiov.iov_base = buf;
1796 		aiov.iov_len = count;
1797 		auio.uio_iov = &aiov;
1798 		auio.uio_iovcnt = 1;
1799 		auio.uio_offset = 0;
1800 		auio.uio_rw = UIO_READ;
1801 		auio.uio_segflg = UIO_USERSPACE;
1802 		auio.uio_procp = p;
1803 		auio.uio_resid = count;
1804 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1805 		*retval = count - auio.uio_resid;
1806 	}
1807 	vput(vp);
1808 	return (error);
1809 }
1810 
1811 /*
1812  * Change flags of a file given a path name.
1813  */
1814 int
1815 sys_chflags(struct proc *p, void *v, register_t *retval)
1816 {
1817 	struct sys_chflags_args /* {
1818 		syscallarg(const char *) path;
1819 		syscallarg(u_int) flags;
1820 	} */ *uap = v;
1821 
1822 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
1823 	    SCARG(uap, flags), 0));
1824 }
1825 
1826 int
1827 sys_chflagsat(struct proc *p, void *v, register_t *retval)
1828 {
1829 	struct sys_chflagsat_args /* {
1830 		syscallarg(int) fd;
1831 		syscallarg(const char *) path;
1832 		syscallarg(u_int) flags;
1833 		syscallarg(int) atflags;
1834 	} */ *uap = v;
1835 
1836 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
1837 	    SCARG(uap, flags), SCARG(uap, atflags)));
1838 }
1839 
1840 int
1841 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
1842 {
1843 	struct nameidata nd;
1844 	int error, follow;
1845 
1846 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
1847 		return (EINVAL);
1848 
1849 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1850 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
1851 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
1852 	if ((error = namei(&nd)) != 0)
1853 		return (error);
1854 	return (dovchflags(p, nd.ni_vp, flags));
1855 }
1856 
1857 /*
1858  * Change flags of a file given a file descriptor.
1859  */
1860 int
1861 sys_fchflags(struct proc *p, void *v, register_t *retval)
1862 {
1863 	struct sys_fchflags_args /* {
1864 		syscallarg(int) fd;
1865 		syscallarg(u_int) flags;
1866 	} */ *uap = v;
1867 	struct file *fp;
1868 	struct vnode *vp;
1869 	int error;
1870 
1871 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
1872 		return (error);
1873 	vp = fp->f_data;
1874 	vref(vp);
1875 	FRELE(fp, p);
1876 	return (dovchflags(p, vp, SCARG(uap, flags)));
1877 }
1878 
1879 int
1880 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
1881 {
1882 	struct vattr vattr;
1883 	int error;
1884 
1885 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1886 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1887 		error = EROFS;
1888 	else if (flags == VNOVAL)
1889 		error = EINVAL;
1890 	else {
1891 		if (suser(p, 0)) {
1892 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
1893 			    != 0)
1894 				goto out;
1895 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1896 				error = EINVAL;
1897 				goto out;
1898 			}
1899 		}
1900 		VATTR_NULL(&vattr);
1901 		vattr.va_flags = flags;
1902 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1903 	}
1904 out:
1905 	vput(vp);
1906 	return (error);
1907 }
1908 
1909 /*
1910  * Change mode of a file given path name.
1911  */
1912 int
1913 sys_chmod(struct proc *p, void *v, register_t *retval)
1914 {
1915 	struct sys_chmod_args /* {
1916 		syscallarg(const char *) path;
1917 		syscallarg(mode_t) mode;
1918 	} */ *uap = v;
1919 
1920 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
1921 }
1922 
1923 int
1924 sys_fchmodat(struct proc *p, void *v, register_t *retval)
1925 {
1926 	struct sys_fchmodat_args /* {
1927 		syscallarg(int) fd;
1928 		syscallarg(const char *) path;
1929 		syscallarg(mode_t) mode;
1930 		syscallarg(int) flag;
1931 	} */ *uap = v;
1932 
1933 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
1934 	    SCARG(uap, mode), SCARG(uap, flag)));
1935 }
1936 
1937 int
1938 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
1939 {
1940 	struct vnode *vp;
1941 	struct vattr vattr;
1942 	int error, follow;
1943 	struct nameidata nd;
1944 
1945 	if (mode & ~(S_IFMT | ALLPERMS))
1946 		return (EINVAL);
1947 	if ((p->p_p->ps_flags & PS_PLEDGE))
1948 		mode &= ACCESSPERMS;
1949 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1950 		return (EINVAL);
1951 
1952 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1953 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
1954 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
1955 	if ((error = namei(&nd)) != 0)
1956 		return (error);
1957 	vp = nd.ni_vp;
1958 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1959 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1960 		error = EROFS;
1961 	else {
1962 		VATTR_NULL(&vattr);
1963 		vattr.va_mode = mode & ALLPERMS;
1964 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1965 	}
1966 	vput(vp);
1967 	return (error);
1968 }
1969 
1970 /*
1971  * Change mode of a file given a file descriptor.
1972  */
1973 int
1974 sys_fchmod(struct proc *p, void *v, register_t *retval)
1975 {
1976 	struct sys_fchmod_args /* {
1977 		syscallarg(int) fd;
1978 		syscallarg(mode_t) mode;
1979 	} */ *uap = v;
1980 	struct vattr vattr;
1981 	struct vnode *vp;
1982 	struct file *fp;
1983 	mode_t mode = SCARG(uap, mode);
1984 	int error;
1985 
1986 	if (mode & ~(S_IFMT | ALLPERMS))
1987 		return (EINVAL);
1988 	if ((p->p_p->ps_flags & PS_PLEDGE))
1989 		mode &= ACCESSPERMS;
1990 
1991 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
1992 		return (error);
1993 	vp = fp->f_data;
1994 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1995 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1996 		error = EROFS;
1997 	else {
1998 		VATTR_NULL(&vattr);
1999 		vattr.va_mode = mode & ALLPERMS;
2000 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2001 	}
2002 	VOP_UNLOCK(vp, p);
2003 	FRELE(fp, p);
2004 	return (error);
2005 }
2006 
2007 /*
2008  * Set ownership given a path name.
2009  */
2010 int
2011 sys_chown(struct proc *p, void *v, register_t *retval)
2012 {
2013 	struct sys_chown_args /* {
2014 		syscallarg(const char *) path;
2015 		syscallarg(uid_t) uid;
2016 		syscallarg(gid_t) gid;
2017 	} */ *uap = v;
2018 
2019 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2020 	    SCARG(uap, gid), 0));
2021 }
2022 
2023 int
2024 sys_fchownat(struct proc *p, void *v, register_t *retval)
2025 {
2026 	struct sys_fchownat_args /* {
2027 		syscallarg(int) fd;
2028 		syscallarg(const char *) path;
2029 		syscallarg(uid_t) uid;
2030 		syscallarg(gid_t) gid;
2031 		syscallarg(int) flag;
2032 	} */ *uap = v;
2033 
2034 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2035 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2036 }
2037 
2038 int
2039 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2040     int flag)
2041 {
2042 	struct vnode *vp;
2043 	struct vattr vattr;
2044 	int error, follow;
2045 	struct nameidata nd;
2046 	mode_t mode;
2047 
2048 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2049 		return (EINVAL);
2050 
2051 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2052 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2053 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2054 	if ((error = namei(&nd)) != 0)
2055 		return (error);
2056 	vp = nd.ni_vp;
2057 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2058 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2059 		error = EROFS;
2060 	else {
2061 		if ((error = pledge_chown(p, uid, gid)))
2062 			goto out;
2063 		if ((uid != -1 || gid != -1) &&
2064 		    (vp->v_mount->mnt_flag & MNT_NOPERM) == 0 &&
2065 		    (suser(p, 0) || suid_clear)) {
2066 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2067 			if (error)
2068 				goto out;
2069 			mode = vattr.va_mode & ~(VSUID | VSGID);
2070 			if (mode == vattr.va_mode)
2071 				mode = VNOVAL;
2072 		} else
2073 			mode = VNOVAL;
2074 		VATTR_NULL(&vattr);
2075 		vattr.va_uid = uid;
2076 		vattr.va_gid = gid;
2077 		vattr.va_mode = mode;
2078 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2079 	}
2080 out:
2081 	vput(vp);
2082 	return (error);
2083 }
2084 
2085 /*
2086  * Set ownership given a path name, without following links.
2087  */
2088 int
2089 sys_lchown(struct proc *p, void *v, register_t *retval)
2090 {
2091 	struct sys_lchown_args /* {
2092 		syscallarg(const char *) path;
2093 		syscallarg(uid_t) uid;
2094 		syscallarg(gid_t) gid;
2095 	} */ *uap = v;
2096 	struct vnode *vp;
2097 	struct vattr vattr;
2098 	int error;
2099 	struct nameidata nd;
2100 	mode_t mode;
2101 	uid_t uid = SCARG(uap, uid);
2102 	gid_t gid = SCARG(uap, gid);
2103 
2104 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2105 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2106 	if ((error = namei(&nd)) != 0)
2107 		return (error);
2108 	vp = nd.ni_vp;
2109 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2110 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2111 		error = EROFS;
2112 	else {
2113 		if ((error = pledge_chown(p, uid, gid)))
2114 			goto out;
2115 		if ((uid != -1 || gid != -1) &&
2116 		    (vp->v_mount->mnt_flag & MNT_NOPERM) == 0 &&
2117 		    (suser(p, 0) || suid_clear)) {
2118 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2119 			if (error)
2120 				goto out;
2121 			mode = vattr.va_mode & ~(VSUID | VSGID);
2122 			if (mode == vattr.va_mode)
2123 				mode = VNOVAL;
2124 		} else
2125 			mode = VNOVAL;
2126 		VATTR_NULL(&vattr);
2127 		vattr.va_uid = uid;
2128 		vattr.va_gid = gid;
2129 		vattr.va_mode = mode;
2130 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2131 	}
2132 out:
2133 	vput(vp);
2134 	return (error);
2135 }
2136 
2137 /*
2138  * Set ownership given a file descriptor.
2139  */
2140 int
2141 sys_fchown(struct proc *p, void *v, register_t *retval)
2142 {
2143 	struct sys_fchown_args /* {
2144 		syscallarg(int) fd;
2145 		syscallarg(uid_t) uid;
2146 		syscallarg(gid_t) gid;
2147 	} */ *uap = v;
2148 	struct vnode *vp;
2149 	struct vattr vattr;
2150 	int error;
2151 	struct file *fp;
2152 	mode_t mode;
2153 	uid_t uid = SCARG(uap, uid);
2154 	gid_t gid = SCARG(uap, gid);
2155 
2156 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2157 		return (error);
2158 	vp = fp->f_data;
2159 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2160 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2161 		error = EROFS;
2162 	else {
2163 		if ((error = pledge_chown(p, uid, gid)))
2164 			goto out;
2165 		if ((uid != -1 || gid != -1) &&
2166 		    (vp->v_mount->mnt_flag & MNT_NOPERM) == 0 &&
2167 		    (suser(p, 0) || suid_clear)) {
2168 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2169 			if (error)
2170 				goto out;
2171 			mode = vattr.va_mode & ~(VSUID | VSGID);
2172 			if (mode == vattr.va_mode)
2173 				mode = VNOVAL;
2174 		} else
2175 			mode = VNOVAL;
2176 		VATTR_NULL(&vattr);
2177 		vattr.va_uid = uid;
2178 		vattr.va_gid = gid;
2179 		vattr.va_mode = mode;
2180 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2181 	}
2182 out:
2183 	VOP_UNLOCK(vp, p);
2184 	FRELE(fp, p);
2185 	return (error);
2186 }
2187 
2188 /*
2189  * Set the access and modification times given a path name.
2190  */
2191 int
2192 sys_utimes(struct proc *p, void *v, register_t *retval)
2193 {
2194 	struct sys_utimes_args /* {
2195 		syscallarg(const char *) path;
2196 		syscallarg(const struct timeval *) tptr;
2197 	} */ *uap = v;
2198 
2199 	struct timespec ts[2];
2200 	struct timeval tv[2];
2201 	const struct timeval *tvp;
2202 	int error;
2203 
2204 	tvp = SCARG(uap, tptr);
2205 	if (tvp != NULL) {
2206 		error = copyin(tvp, tv, sizeof(tv));
2207 		if (error)
2208 			return (error);
2209 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2210 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2211 	} else
2212 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2213 
2214 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2215 }
2216 
2217 int
2218 sys_utimensat(struct proc *p, void *v, register_t *retval)
2219 {
2220 	struct sys_utimensat_args /* {
2221 		syscallarg(int) fd;
2222 		syscallarg(const char *) path;
2223 		syscallarg(const struct timespec *) times;
2224 		syscallarg(int) flag;
2225 	} */ *uap = v;
2226 
2227 	struct timespec ts[2];
2228 	const struct timespec *tsp;
2229 	int error;
2230 
2231 	tsp = SCARG(uap, times);
2232 	if (tsp != NULL) {
2233 		error = copyin(tsp, ts, sizeof(ts));
2234 		if (error)
2235 			return (error);
2236 	} else
2237 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2238 
2239 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2240 	    SCARG(uap, flag)));
2241 }
2242 
2243 int
2244 doutimensat(struct proc *p, int fd, const char *path,
2245     struct timespec ts[2], int flag)
2246 {
2247 	struct vnode *vp;
2248 	int error, follow;
2249 	struct nameidata nd;
2250 
2251 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2252 		return (EINVAL);
2253 
2254 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2255 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2256 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2257 	if ((error = namei(&nd)) != 0)
2258 		return (error);
2259 	vp = nd.ni_vp;
2260 
2261 	return (dovutimens(p, vp, ts));
2262 }
2263 
2264 int
2265 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2266 {
2267 	struct vattr vattr;
2268 	struct timespec now;
2269 	int error;
2270 
2271 #ifdef KTRACE
2272 	/* if they're both UTIME_NOW, then don't report either */
2273 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2274 	    KTRPOINT(p, KTR_STRUCT)) {
2275 		ktrabstimespec(p, &ts[0]);
2276 		ktrabstimespec(p, &ts[1]);
2277 	}
2278 #endif
2279 
2280 	VATTR_NULL(&vattr);
2281 
2282 	/*  make sure ctime is updated even if neither mtime nor atime is */
2283 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2284 
2285 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2286 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2287 			vattr.va_vaflags |= VA_UTIMES_NULL;
2288 
2289 		getnanotime(&now);
2290 		if (ts[0].tv_nsec == UTIME_NOW)
2291 			ts[0] = now;
2292 		if (ts[1].tv_nsec == UTIME_NOW)
2293 			ts[1] = now;
2294 	}
2295 
2296 	if (ts[0].tv_nsec != UTIME_OMIT) {
2297 		if (ts[0].tv_nsec < 0 || ts[0].tv_nsec >= 1000000000) {
2298 			vrele(vp);
2299 			return (EINVAL);
2300 		}
2301 		vattr.va_atime = ts[0];
2302 	}
2303 	if (ts[1].tv_nsec != UTIME_OMIT) {
2304 		if (ts[1].tv_nsec < 0 || ts[1].tv_nsec >= 1000000000) {
2305 			vrele(vp);
2306 			return (EINVAL);
2307 		}
2308 		vattr.va_mtime = ts[1];
2309 	}
2310 
2311 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2312 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2313 		error = EROFS;
2314 	else
2315 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2316 	vput(vp);
2317 	return (error);
2318 }
2319 
2320 /*
2321  * Set the access and modification times given a file descriptor.
2322  */
2323 int
2324 sys_futimes(struct proc *p, void *v, register_t *retval)
2325 {
2326 	struct sys_futimes_args /* {
2327 		syscallarg(int) fd;
2328 		syscallarg(const struct timeval *) tptr;
2329 	} */ *uap = v;
2330 	struct timeval tv[2];
2331 	struct timespec ts[2];
2332 	const struct timeval *tvp;
2333 	int error;
2334 
2335 	tvp = SCARG(uap, tptr);
2336 	if (tvp != NULL) {
2337 		error = copyin(tvp, tv, sizeof(tv));
2338 		if (error)
2339 			return (error);
2340 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2341 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2342 	} else
2343 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2344 
2345 	return (dofutimens(p, SCARG(uap, fd), ts));
2346 }
2347 
2348 int
2349 sys_futimens(struct proc *p, void *v, register_t *retval)
2350 {
2351 	struct sys_futimens_args /* {
2352 		syscallarg(int) fd;
2353 		syscallarg(const struct timespec *) times;
2354 	} */ *uap = v;
2355 	struct timespec ts[2];
2356 	const struct timespec *tsp;
2357 	int error;
2358 
2359 	tsp = SCARG(uap, times);
2360 	if (tsp != NULL) {
2361 		error = copyin(tsp, ts, sizeof(ts));
2362 		if (error)
2363 			return (error);
2364 	} else
2365 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2366 
2367 	return (dofutimens(p, SCARG(uap, fd), ts));
2368 }
2369 
2370 int
2371 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2372 {
2373 	struct file *fp;
2374 	struct vnode *vp;
2375 	int error;
2376 
2377 	if ((error = getvnode(p, fd, &fp)) != 0)
2378 		return (error);
2379 	vp = fp->f_data;
2380 	vref(vp);
2381 	FRELE(fp, p);
2382 
2383 	return (dovutimens(p, vp, ts));
2384 }
2385 
2386 /*
2387  * Truncate a file given its path name.
2388  */
2389 int
2390 sys_truncate(struct proc *p, void *v, register_t *retval)
2391 {
2392 	struct sys_truncate_args /* {
2393 		syscallarg(const char *) path;
2394 		syscallarg(int) pad;
2395 		syscallarg(off_t) length;
2396 	} */ *uap = v;
2397 	struct vnode *vp;
2398 	struct vattr vattr;
2399 	int error;
2400 	struct nameidata nd;
2401 
2402 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2403 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2404 	if ((error = namei(&nd)) != 0)
2405 		return (error);
2406 	vp = nd.ni_vp;
2407 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2408 	if (vp->v_type == VDIR)
2409 		error = EISDIR;
2410 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2411 	    (error = vn_writechk(vp)) == 0) {
2412 		VATTR_NULL(&vattr);
2413 		vattr.va_size = SCARG(uap, length);
2414 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2415 	}
2416 	vput(vp);
2417 	return (error);
2418 }
2419 
2420 /*
2421  * Truncate a file given a file descriptor.
2422  */
2423 int
2424 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2425 {
2426 	struct sys_ftruncate_args /* {
2427 		syscallarg(int) fd;
2428 		syscallarg(int) pad;
2429 		syscallarg(off_t) length;
2430 	} */ *uap = v;
2431 	struct vattr vattr;
2432 	struct vnode *vp;
2433 	struct file *fp;
2434 	off_t len;
2435 	int error;
2436 
2437 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2438 		return (error);
2439 	len = SCARG(uap, length);
2440 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2441 		error = EINVAL;
2442 		goto bad;
2443 	}
2444 	vp = fp->f_data;
2445 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2446 	if (vp->v_type == VDIR)
2447 		error = EISDIR;
2448 	else if ((error = vn_writechk(vp)) == 0) {
2449 		VATTR_NULL(&vattr);
2450 		vattr.va_size = len;
2451 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2452 	}
2453 	VOP_UNLOCK(vp, p);
2454 bad:
2455 	FRELE(fp, p);
2456 	return (error);
2457 }
2458 
2459 /*
2460  * Sync an open file.
2461  */
2462 int
2463 sys_fsync(struct proc *p, void *v, register_t *retval)
2464 {
2465 	struct sys_fsync_args /* {
2466 		syscallarg(int) fd;
2467 	} */ *uap = v;
2468 	struct vnode *vp;
2469 	struct file *fp;
2470 	int error;
2471 
2472 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2473 		return (error);
2474 	vp = fp->f_data;
2475 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2476 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2477 #ifdef FFS_SOFTUPDATES
2478 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2479 		error = softdep_fsync(vp);
2480 #endif
2481 
2482 	VOP_UNLOCK(vp, p);
2483 	FRELE(fp, p);
2484 	return (error);
2485 }
2486 
2487 /*
2488  * Rename files.  Source and destination must either both be directories,
2489  * or both not be directories.  If target is a directory, it must be empty.
2490  */
2491 int
2492 sys_rename(struct proc *p, void *v, register_t *retval)
2493 {
2494 	struct sys_rename_args /* {
2495 		syscallarg(const char *) from;
2496 		syscallarg(const char *) to;
2497 	} */ *uap = v;
2498 
2499 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2500 	    SCARG(uap, to)));
2501 }
2502 
2503 int
2504 sys_renameat(struct proc *p, void *v, register_t *retval)
2505 {
2506 	struct sys_renameat_args /* {
2507 		syscallarg(int) fromfd;
2508 		syscallarg(const char *) from;
2509 		syscallarg(int) tofd;
2510 		syscallarg(const char *) to;
2511 	} */ *uap = v;
2512 
2513 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2514 	    SCARG(uap, tofd), SCARG(uap, to)));
2515 }
2516 
2517 int
2518 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2519     const char *to)
2520 {
2521 	struct vnode *tvp, *fvp, *tdvp;
2522 	struct nameidata fromnd, tond;
2523 	int error;
2524 	int flags;
2525 
2526 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2527 	    fromfd, from, p);
2528 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2529 	if ((error = namei(&fromnd)) != 0)
2530 		return (error);
2531 	fvp = fromnd.ni_vp;
2532 
2533 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2534 	/*
2535 	 * rename("foo/", "bar/");  is  OK
2536 	 */
2537 	if (fvp->v_type == VDIR)
2538 		flags |= STRIPSLASHES;
2539 
2540 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2541 	tond.ni_pledge = PLEDGE_CPATH;
2542 	if ((error = namei(&tond)) != 0) {
2543 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2544 		vrele(fromnd.ni_dvp);
2545 		vrele(fvp);
2546 		goto out1;
2547 	}
2548 	tdvp = tond.ni_dvp;
2549 	tvp = tond.ni_vp;
2550 	if (tvp != NULL) {
2551 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2552 			error = ENOTDIR;
2553 			goto out;
2554 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2555 			error = EISDIR;
2556 			goto out;
2557 		}
2558 	}
2559 	if (fvp == tdvp)
2560 		error = EINVAL;
2561 	/*
2562 	 * If source is the same as the destination (that is the
2563 	 * same inode number)
2564 	 */
2565 	if (fvp == tvp)
2566 		error = -1;
2567 out:
2568 	if (!error) {
2569 		if (tvp) {
2570 			(void)uvm_vnp_uncache(tvp);
2571 		}
2572 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2573 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2574 	} else {
2575 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2576 		if (tdvp == tvp)
2577 			vrele(tdvp);
2578 		else
2579 			vput(tdvp);
2580 		if (tvp)
2581 			vput(tvp);
2582 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2583 		vrele(fromnd.ni_dvp);
2584 		vrele(fvp);
2585 	}
2586 	vrele(tond.ni_startdir);
2587 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2588 out1:
2589 	if (fromnd.ni_startdir)
2590 		vrele(fromnd.ni_startdir);
2591 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2592 	if (error == -1)
2593 		return (0);
2594 	return (error);
2595 }
2596 
2597 /*
2598  * Make a directory file.
2599  */
2600 int
2601 sys_mkdir(struct proc *p, void *v, register_t *retval)
2602 {
2603 	struct sys_mkdir_args /* {
2604 		syscallarg(const char *) path;
2605 		syscallarg(mode_t) mode;
2606 	} */ *uap = v;
2607 
2608 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
2609 }
2610 
2611 int
2612 sys_mkdirat(struct proc *p, void *v, register_t *retval)
2613 {
2614 	struct sys_mkdirat_args /* {
2615 		syscallarg(int) fd;
2616 		syscallarg(const char *) path;
2617 		syscallarg(mode_t) mode;
2618 	} */ *uap = v;
2619 
2620 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
2621 	    SCARG(uap, mode)));
2622 }
2623 
2624 int
2625 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
2626 {
2627 	struct vnode *vp;
2628 	struct vattr vattr;
2629 	int error;
2630 	struct nameidata nd;
2631 
2632 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
2633 	    fd, path, p);
2634 	nd.ni_pledge = PLEDGE_CPATH;
2635 	if ((error = namei(&nd)) != 0)
2636 		return (error);
2637 	vp = nd.ni_vp;
2638 	if (vp != NULL) {
2639 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2640 		if (nd.ni_dvp == vp)
2641 			vrele(nd.ni_dvp);
2642 		else
2643 			vput(nd.ni_dvp);
2644 		vrele(vp);
2645 		return (EEXIST);
2646 	}
2647 	VATTR_NULL(&vattr);
2648 	vattr.va_type = VDIR;
2649 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2650 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2651 	if (!error)
2652 		vput(nd.ni_vp);
2653 	return (error);
2654 }
2655 
2656 /*
2657  * Remove a directory file.
2658  */
2659 int
2660 sys_rmdir(struct proc *p, void *v, register_t *retval)
2661 {
2662 	struct sys_rmdir_args /* {
2663 		syscallarg(const char *) path;
2664 	} */ *uap = v;
2665 
2666 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
2667 }
2668 
2669 /*
2670  * Read a block of directory entries in a file system independent format.
2671  */
2672 int
2673 sys_getdents(struct proc *p, void *v, register_t *retval)
2674 {
2675 	struct sys_getdents_args /* {
2676 		syscallarg(int) fd;
2677 		syscallarg(void *) buf;
2678 		syscallarg(size_t) buflen;
2679 	} */ *uap = v;
2680 	struct vnode *vp;
2681 	struct file *fp;
2682 	struct uio auio;
2683 	struct iovec aiov;
2684 	size_t buflen;
2685 	int error, eofflag;
2686 
2687 	buflen = SCARG(uap, buflen);
2688 
2689 	if (buflen > INT_MAX)
2690 		return EINVAL;
2691 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2692 		return (error);
2693 	if ((fp->f_flag & FREAD) == 0) {
2694 		error = EBADF;
2695 		goto bad;
2696 	}
2697 	if (fp->f_offset < 0) {
2698 		error = EINVAL;
2699 		goto bad;
2700 	}
2701 	vp = fp->f_data;
2702 	if (vp->v_type != VDIR) {
2703 		error = EINVAL;
2704 		goto bad;
2705 	}
2706 	aiov.iov_base = SCARG(uap, buf);
2707 	aiov.iov_len = buflen;
2708 	auio.uio_iov = &aiov;
2709 	auio.uio_iovcnt = 1;
2710 	auio.uio_rw = UIO_READ;
2711 	auio.uio_segflg = UIO_USERSPACE;
2712 	auio.uio_procp = p;
2713 	auio.uio_resid = buflen;
2714 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2715 	auio.uio_offset = fp->f_offset;
2716 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
2717 	fp->f_offset = auio.uio_offset;
2718 	VOP_UNLOCK(vp, p);
2719 	if (error)
2720 		goto bad;
2721 	*retval = buflen - auio.uio_resid;
2722 bad:
2723 	FRELE(fp, p);
2724 	return (error);
2725 }
2726 
2727 /*
2728  * Set the mode mask for creation of filesystem nodes.
2729  */
2730 int
2731 sys_umask(struct proc *p, void *v, register_t *retval)
2732 {
2733 	struct sys_umask_args /* {
2734 		syscallarg(mode_t) newmask;
2735 	} */ *uap = v;
2736 	struct filedesc *fdp;
2737 
2738 	fdp = p->p_fd;
2739 	*retval = fdp->fd_cmask;
2740 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2741 	return (0);
2742 }
2743 
2744 /*
2745  * Void all references to file by ripping underlying filesystem
2746  * away from vnode.
2747  */
2748 int
2749 sys_revoke(struct proc *p, void *v, register_t *retval)
2750 {
2751 	struct sys_revoke_args /* {
2752 		syscallarg(const char *) path;
2753 	} */ *uap = v;
2754 	struct vnode *vp;
2755 	struct vattr vattr;
2756 	int error;
2757 	struct nameidata nd;
2758 
2759 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2760 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
2761 	if ((error = namei(&nd)) != 0)
2762 		return (error);
2763 	vp = nd.ni_vp;
2764 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
2765 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
2766 		error = ENOTTY;
2767 		goto out;
2768 	}
2769 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2770 		goto out;
2771 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2772 	    (error = suser(p, 0)))
2773 		goto out;
2774 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
2775 		VOP_REVOKE(vp, REVOKEALL);
2776 out:
2777 	vrele(vp);
2778 	return (error);
2779 }
2780 
2781 /*
2782  * Convert a user file descriptor to a kernel file entry.
2783  *
2784  * On return *fpp is FREF:ed.
2785  */
2786 int
2787 getvnode(struct proc *p, int fd, struct file **fpp)
2788 {
2789 	struct file *fp;
2790 	struct vnode *vp;
2791 
2792 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
2793 		return (EBADF);
2794 
2795 	if (fp->f_type != DTYPE_VNODE)
2796 		return (EINVAL);
2797 
2798 	vp = fp->f_data;
2799 	if (vp->v_type == VBAD)
2800 		return (EBADF);
2801 
2802 	FREF(fp);
2803 	*fpp = fp;
2804 
2805 	return (0);
2806 }
2807 
2808 /*
2809  * Positional read system call.
2810  */
2811 int
2812 sys_pread(struct proc *p, void *v, register_t *retval)
2813 {
2814 	struct sys_pread_args /* {
2815 		syscallarg(int) fd;
2816 		syscallarg(void *) buf;
2817 		syscallarg(size_t) nbyte;
2818 		syscallarg(int) pad;
2819 		syscallarg(off_t) offset;
2820 	} */ *uap = v;
2821 	struct iovec iov;
2822 	struct filedesc *fdp = p->p_fd;
2823 	struct file *fp;
2824 	struct vnode *vp;
2825 	off_t offset;
2826 	int fd = SCARG(uap, fd);
2827 
2828 	if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL)
2829 		return (EBADF);
2830 
2831 	vp = fp->f_data;
2832 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2833 	    (vp->v_flag & VISTTY)) {
2834 		return (ESPIPE);
2835 	}
2836 
2837 	iov.iov_base = SCARG(uap, buf);
2838 	iov.iov_len = SCARG(uap, nbyte);
2839 
2840 	offset = SCARG(uap, offset);
2841 	if (offset < 0 && vp->v_type != VCHR)
2842 		return (EINVAL);
2843 
2844 	FREF(fp);
2845 
2846 	/* dofilereadv() will FRELE the descriptor for us */
2847 	return (dofilereadv(p, fd, fp, &iov, 1, 0, &offset, retval));
2848 }
2849 
2850 /*
2851  * Positional scatter read system call.
2852  */
2853 int
2854 sys_preadv(struct proc *p, void *v, register_t *retval)
2855 {
2856 	struct sys_preadv_args /* {
2857 		syscallarg(int) fd;
2858 		syscallarg(const struct iovec *) iovp;
2859 		syscallarg(int) iovcnt;
2860 		syscallarg(int) pad;
2861 		syscallarg(off_t) offset;
2862 	} */ *uap = v;
2863 	struct filedesc *fdp = p->p_fd;
2864 	struct file *fp;
2865 	struct vnode *vp;
2866 	off_t offset;
2867 	int fd = SCARG(uap, fd);
2868 
2869 	if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL)
2870 		return (EBADF);
2871 
2872 	vp = fp->f_data;
2873 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2874 	    (vp->v_flag & VISTTY)) {
2875 		return (ESPIPE);
2876 	}
2877 
2878 	offset = SCARG(uap, offset);
2879 	if (offset < 0 && vp->v_type != VCHR)
2880 		return (EINVAL);
2881 
2882 	FREF(fp);
2883 
2884 	/* dofilereadv() will FRELE the descriptor for us */
2885 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1,
2886 	    &offset, retval));
2887 }
2888 
2889 /*
2890  * Positional write system call.
2891  */
2892 int
2893 sys_pwrite(struct proc *p, void *v, register_t *retval)
2894 {
2895 	struct sys_pwrite_args /* {
2896 		syscallarg(int) fd;
2897 		syscallarg(const void *) buf;
2898 		syscallarg(size_t) nbyte;
2899 		syscallarg(int) pad;
2900 		syscallarg(off_t) offset;
2901 	} */ *uap = v;
2902 	struct iovec iov;
2903 	struct filedesc *fdp = p->p_fd;
2904 	struct file *fp;
2905 	struct vnode *vp;
2906 	off_t offset;
2907 	int fd = SCARG(uap, fd);
2908 
2909 	if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL)
2910 		return (EBADF);
2911 
2912 	vp = fp->f_data;
2913 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2914 	    (vp->v_flag & VISTTY)) {
2915 		return (ESPIPE);
2916 	}
2917 
2918 	iov.iov_base = (void *)SCARG(uap, buf);
2919 	iov.iov_len = SCARG(uap, nbyte);
2920 
2921 	offset = SCARG(uap, offset);
2922 	if (offset < 0 && vp->v_type != VCHR)
2923 		return (EINVAL);
2924 
2925 	FREF(fp);
2926 
2927 	/* dofilewritev() will FRELE the descriptor for us */
2928 	return (dofilewritev(p, fd, fp, &iov, 1, 0, &offset, retval));
2929 }
2930 
2931 /*
2932  * Positional gather write system call.
2933  */
2934 int
2935 sys_pwritev(struct proc *p, void *v, register_t *retval)
2936 {
2937 	struct sys_pwritev_args /* {
2938 		syscallarg(int) fd;
2939 		syscallarg(const struct iovec *) iovp;
2940 		syscallarg(int) iovcnt;
2941 		syscallarg(int) pad;
2942 		syscallarg(off_t) offset;
2943 	} */ *uap = v;
2944 	struct filedesc *fdp = p->p_fd;
2945 	struct file *fp;
2946 	struct vnode *vp;
2947 	off_t offset;
2948 	int fd = SCARG(uap, fd);
2949 
2950 	if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL)
2951 		return (EBADF);
2952 
2953 	vp = fp->f_data;
2954 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
2955 	    (vp->v_flag & VISTTY)) {
2956 		return (ESPIPE);
2957 	}
2958 
2959 	offset = SCARG(uap, offset);
2960 	if (offset < 0 && vp->v_type != VCHR)
2961 		return (EINVAL);
2962 
2963 	FREF(fp);
2964 
2965 	/* dofilewritev() will FRELE the descriptor for us */
2966 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2967 	    1, &offset, retval));
2968 }
2969 
2970