xref: /openbsd-src/sys/kern/kern_descrip.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: kern_descrip.c,v 1.200 2020/02/26 13:54:52 visa Exp $	*/
2 /*	$NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/filedesc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/proc.h>
46 #include <sys/file.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/stat.h>
50 #include <sys/ioctl.h>
51 #include <sys/fcntl.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/syslog.h>
55 #include <sys/ucred.h>
56 #include <sys/unistd.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mount.h>
59 #include <sys/syscallargs.h>
60 #include <sys/event.h>
61 #include <sys/pool.h>
62 #include <sys/ktrace.h>
63 #include <sys/pledge.h>
64 
65 #include <sys/pipe.h>
66 
67 /*
68  * Descriptor management.
69  *
70  * We need to block interrupts as long as `fhdlk' is being taken
71  * with and without the KERNEL_LOCK().
72  */
73 struct mutex fhdlk = MUTEX_INITIALIZER(IPL_MPFLOOR);
74 struct filelist filehead;	/* head of list of open files */
75 int numfiles;			/* actual number of open files */
76 
77 static __inline void fd_used(struct filedesc *, int);
78 static __inline void fd_unused(struct filedesc *, int);
79 static __inline int find_next_zero(u_int *, int, u_int);
80 static __inline int fd_inuse(struct filedesc *, int);
81 int finishdup(struct proc *, struct file *, int, int, register_t *, int);
82 int find_last_set(struct filedesc *, int);
83 int dodup3(struct proc *, int, int, int, register_t *);
84 
85 #define DUPF_CLOEXEC	0x01
86 #define DUPF_DUP2	0x02
87 
88 struct pool file_pool;
89 struct pool fdesc_pool;
90 
91 void
92 filedesc_init(void)
93 {
94 	pool_init(&file_pool, sizeof(struct file), 0, IPL_MPFLOOR,
95 	    PR_WAITOK, "filepl", NULL);
96 	pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, IPL_NONE,
97 	    PR_WAITOK, "fdescpl", NULL);
98 	LIST_INIT(&filehead);
99 }
100 
101 static __inline int
102 find_next_zero (u_int *bitmap, int want, u_int bits)
103 {
104 	int i, off, maxoff;
105 	u_int sub;
106 
107 	if (want > bits)
108 		return -1;
109 
110 	off = want >> NDENTRYSHIFT;
111 	i = want & NDENTRYMASK;
112 	if (i) {
113 		sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
114 		if (sub != ~0)
115 			goto found;
116 		off++;
117 	}
118 
119 	maxoff = NDLOSLOTS(bits);
120 	while (off < maxoff) {
121 		if ((sub = bitmap[off]) != ~0)
122 			goto found;
123 		off++;
124 	}
125 
126 	return -1;
127 
128  found:
129 	return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
130 }
131 
132 int
133 find_last_set(struct filedesc *fd, int last)
134 {
135 	int off, i;
136 	u_int *bitmap = fd->fd_lomap;
137 
138 	off = (last - 1) >> NDENTRYSHIFT;
139 
140 	while (off >= 0 && !bitmap[off])
141 		off--;
142 	if (off < 0)
143 		return 0;
144 
145 	i = ((off + 1) << NDENTRYSHIFT) - 1;
146 	if (i >= last)
147 		i = last - 1;
148 
149 	while (i > 0 && !fd_inuse(fd, i))
150 		i--;
151 	return i;
152 }
153 
154 static __inline int
155 fd_inuse(struct filedesc *fdp, int fd)
156 {
157 	u_int off = fd >> NDENTRYSHIFT;
158 
159 	if (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK)))
160 		return 1;
161 
162 	return 0;
163 }
164 
165 static __inline void
166 fd_used(struct filedesc *fdp, int fd)
167 {
168 	u_int off = fd >> NDENTRYSHIFT;
169 
170 	fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
171 	if (fdp->fd_lomap[off] == ~0)
172 		fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
173 
174 	if (fd > fdp->fd_lastfile)
175 		fdp->fd_lastfile = fd;
176 	fdp->fd_openfd++;
177 }
178 
179 static __inline void
180 fd_unused(struct filedesc *fdp, int fd)
181 {
182 	u_int off = fd >> NDENTRYSHIFT;
183 
184 	if (fd < fdp->fd_freefile)
185 		fdp->fd_freefile = fd;
186 
187 	if (fdp->fd_lomap[off] == ~0)
188 		fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
189 	fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
190 
191 #ifdef DIAGNOSTIC
192 	if (fd > fdp->fd_lastfile)
193 		panic("fd_unused: fd_lastfile inconsistent");
194 #endif
195 	if (fd == fdp->fd_lastfile)
196 		fdp->fd_lastfile = find_last_set(fdp, fd);
197 	fdp->fd_openfd--;
198 }
199 
200 struct file *
201 fd_iterfile(struct file *fp, struct proc *p)
202 {
203 	struct file *nfp;
204 	unsigned int count;
205 
206 	mtx_enter(&fhdlk);
207 	if (fp == NULL)
208 		nfp = LIST_FIRST(&filehead);
209 	else
210 		nfp = LIST_NEXT(fp, f_list);
211 
212 	/* don't refcount when f_count == 0 to avoid race in fdrop() */
213 	while (nfp != NULL) {
214 		count = nfp->f_count;
215 		if (count == 0) {
216 			nfp = LIST_NEXT(nfp, f_list);
217 			continue;
218 		}
219 		if (atomic_cas_uint(&nfp->f_count, count, count + 1) == count)
220 			break;
221 	}
222 	mtx_leave(&fhdlk);
223 
224 	if (fp != NULL)
225 		FRELE(fp, p);
226 
227 	return nfp;
228 }
229 
230 struct file *
231 fd_getfile(struct filedesc *fdp, int fd)
232 {
233 	struct file *fp;
234 
235 	vfs_stall_barrier();
236 
237 	if ((u_int)fd >= fdp->fd_nfiles)
238 		return (NULL);
239 
240 	mtx_enter(&fdp->fd_fplock);
241 	fp = fdp->fd_ofiles[fd];
242 	if (fp != NULL)
243 		atomic_inc_int(&fp->f_count);
244 	mtx_leave(&fdp->fd_fplock);
245 
246 	return (fp);
247 }
248 
249 struct file *
250 fd_getfile_mode(struct filedesc *fdp, int fd, int mode)
251 {
252 	struct file *fp;
253 
254 	KASSERT(mode != 0);
255 
256 	fp = fd_getfile(fdp, fd);
257 	if (fp == NULL)
258 		return (NULL);
259 
260 	if ((fp->f_flag & mode) == 0) {
261 		FRELE(fp, curproc);
262 		return (NULL);
263 	}
264 
265 	return (fp);
266 }
267 
268 int
269 fd_checkclosed(struct filedesc *fdp, int fd, struct file *fp)
270 {
271 	int closed;
272 
273 	mtx_enter(&fdp->fd_fplock);
274 	KASSERT(fd < fdp->fd_nfiles);
275 	closed = (fdp->fd_ofiles[fd] != fp);
276 	mtx_leave(&fdp->fd_fplock);
277 	return (closed);
278 }
279 
280 /*
281  * System calls on descriptors.
282  */
283 
284 /*
285  * Duplicate a file descriptor.
286  */
287 int
288 sys_dup(struct proc *p, void *v, register_t *retval)
289 {
290 	struct sys_dup_args /* {
291 		syscallarg(int) fd;
292 	} */ *uap = v;
293 	struct filedesc *fdp = p->p_fd;
294 	int old = SCARG(uap, fd);
295 	struct file *fp;
296 	int new;
297 	int error;
298 
299 restart:
300 	if ((fp = fd_getfile(fdp, old)) == NULL)
301 		return (EBADF);
302 	fdplock(fdp);
303 	if ((error = fdalloc(p, 0, &new)) != 0) {
304 		FRELE(fp, p);
305 		if (error == ENOSPC) {
306 			fdexpand(p);
307 			fdpunlock(fdp);
308 			goto restart;
309 		}
310 		fdpunlock(fdp);
311 		return (error);
312 	}
313 	/* No need for FRELE(), finishdup() uses current ref. */
314 	return (finishdup(p, fp, old, new, retval, 0));
315 }
316 
317 /*
318  * Duplicate a file descriptor to a particular value.
319  */
320 int
321 sys_dup2(struct proc *p, void *v, register_t *retval)
322 {
323 	struct sys_dup2_args /* {
324 		syscallarg(int) from;
325 		syscallarg(int) to;
326 	} */ *uap = v;
327 
328 	return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval));
329 }
330 
331 int
332 sys_dup3(struct proc *p, void *v, register_t *retval)
333 {
334 	struct sys_dup3_args /* {
335 		syscallarg(int) from;
336 		syscallarg(int) to;
337 		syscallarg(int) flags;
338 	} */ *uap = v;
339 
340 	if (SCARG(uap, from) == SCARG(uap, to))
341 		return (EINVAL);
342 	if (SCARG(uap, flags) & ~O_CLOEXEC)
343 		return (EINVAL);
344 	return (dodup3(p, SCARG(uap, from), SCARG(uap, to),
345 	    SCARG(uap, flags), retval));
346 }
347 
348 int
349 dodup3(struct proc *p, int old, int new, int flags, register_t *retval)
350 {
351 	struct filedesc *fdp = p->p_fd;
352 	struct file *fp;
353 	int dupflags, error, i;
354 
355 restart:
356 	if ((fp = fd_getfile(fdp, old)) == NULL)
357 		return (EBADF);
358 	if (old == new) {
359 		/*
360 		 * NOTE! This doesn't clear the close-on-exec flag. This might
361 		 * or might not be the intended behavior from the start, but
362 		 * this is what everyone else does.
363 		 */
364 		*retval = new;
365 		FRELE(fp, p);
366 		return (0);
367 	}
368 	if ((u_int)new >= lim_cur(RLIMIT_NOFILE) ||
369 	    (u_int)new >= maxfiles) {
370 		FRELE(fp, p);
371 		return (EBADF);
372 	}
373 	fdplock(fdp);
374 	if (new >= fdp->fd_nfiles) {
375 		if ((error = fdalloc(p, new, &i)) != 0) {
376 			FRELE(fp, p);
377 			if (error == ENOSPC) {
378 				fdexpand(p);
379 				fdpunlock(fdp);
380 				goto restart;
381 			}
382 			fdpunlock(fdp);
383 			return (error);
384 		}
385 		if (new != i)
386 			panic("dup2: fdalloc");
387 		fd_unused(fdp, new);
388 	}
389 
390 	dupflags = DUPF_DUP2;
391 	if (flags & O_CLOEXEC)
392 		dupflags |= DUPF_CLOEXEC;
393 
394 	/* No need for FRELE(), finishdup() uses current ref. */
395 	return (finishdup(p, fp, old, new, retval, dupflags));
396 }
397 
398 /*
399  * The file control system call.
400  */
401 int
402 sys_fcntl(struct proc *p, void *v, register_t *retval)
403 {
404 	struct sys_fcntl_args /* {
405 		syscallarg(int) fd;
406 		syscallarg(int) cmd;
407 		syscallarg(void *) arg;
408 	} */ *uap = v;
409 	int fd = SCARG(uap, fd);
410 	struct filedesc *fdp = p->p_fd;
411 	struct file *fp;
412 	struct vnode *vp;
413 	int i, prev, tmp, newmin, flg = F_POSIX;
414 	struct flock fl;
415 	int error = 0;
416 
417 	error = pledge_fcntl(p, SCARG(uap, cmd));
418 	if (error)
419 		return (error);
420 
421 restart:
422 	if ((fp = fd_getfile(fdp, fd)) == NULL)
423 		return (EBADF);
424 	switch (SCARG(uap, cmd)) {
425 
426 	case F_DUPFD:
427 	case F_DUPFD_CLOEXEC:
428 		newmin = (long)SCARG(uap, arg);
429 		if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) ||
430 		    (u_int)newmin >= maxfiles) {
431 			error = EINVAL;
432 			break;
433 		}
434 		fdplock(fdp);
435 		if ((error = fdalloc(p, newmin, &i)) != 0) {
436 			FRELE(fp, p);
437 			if (error == ENOSPC) {
438 				fdexpand(p);
439 				fdpunlock(fdp);
440 				goto restart;
441 			}
442 			fdpunlock(fdp);
443 		} else {
444 			int dupflags = 0;
445 
446 			if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC)
447 				dupflags |= DUPF_CLOEXEC;
448 
449 			/* No need for FRELE(), finishdup() uses current ref. */
450 			error = finishdup(p, fp, fd, i, retval, dupflags);
451 		}
452 		return (error);
453 
454 	case F_GETFD:
455 		fdplock(fdp);
456 		*retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
457 		fdpunlock(fdp);
458 		break;
459 
460 	case F_SETFD:
461 		fdplock(fdp);
462 		if ((long)SCARG(uap, arg) & 1)
463 			fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
464 		else
465 			fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
466 		fdpunlock(fdp);
467 		break;
468 
469 	case F_GETFL:
470 		*retval = OFLAGS(fp->f_flag);
471 		break;
472 
473 	case F_ISATTY:
474 		vp = fp->f_data;
475 	        if (fp->f_type == DTYPE_VNODE && (vp->v_flag & VISTTY))
476 			*retval = 1;
477 		else {
478 			*retval = 0;
479 			error = ENOTTY;
480 		}
481 		break;
482 
483 	case F_SETFL:
484 		do {
485 			tmp = prev = fp->f_flag;
486 			tmp &= ~FCNTLFLAGS;
487 			tmp |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
488 		} while (atomic_cas_uint(&fp->f_flag, prev, tmp) != prev);
489 		tmp = fp->f_flag & FNONBLOCK;
490 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
491 		if (error)
492 			break;
493 		tmp = fp->f_flag & FASYNC;
494 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
495 		if (!error)
496 			break;
497 		atomic_clearbits_int(&fp->f_flag, FNONBLOCK);
498 		tmp = 0;
499 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
500 		break;
501 
502 	case F_GETOWN:
503 		tmp = 0;
504 		error = (*fp->f_ops->fo_ioctl)
505 			(fp, FIOGETOWN, (caddr_t)&tmp, p);
506 		*retval = tmp;
507 		break;
508 
509 	case F_SETOWN:
510 		tmp = (long)SCARG(uap, arg);
511 		error = ((*fp->f_ops->fo_ioctl)
512 			(fp, FIOSETOWN, (caddr_t)&tmp, p));
513 		break;
514 
515 	case F_SETLKW:
516 		flg |= F_WAIT;
517 		/* FALLTHROUGH */
518 
519 	case F_SETLK:
520 		error = pledge_flock(p);
521 		if (error != 0)
522 			break;
523 
524 		if (fp->f_type != DTYPE_VNODE) {
525 			error = EINVAL;
526 			break;
527 		}
528 		vp = fp->f_data;
529 		/* Copy in the lock structure */
530 		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
531 		    sizeof (fl));
532 		if (error)
533 			break;
534 #ifdef KTRACE
535 		if (KTRPOINT(p, KTR_STRUCT))
536 			ktrflock(p, &fl);
537 #endif
538 		if (fl.l_whence == SEEK_CUR) {
539 			off_t offset = foffset(fp);
540 
541 			if (fl.l_start == 0 && fl.l_len < 0) {
542 				/* lockf(3) compliance hack */
543 				fl.l_len = -fl.l_len;
544 				fl.l_start = offset - fl.l_len;
545 			} else
546 				fl.l_start += offset;
547 		}
548 		switch (fl.l_type) {
549 
550 		case F_RDLCK:
551 			if ((fp->f_flag & FREAD) == 0) {
552 				error = EBADF;
553 				goto out;
554 			}
555 			atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK);
556 			error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg);
557 			break;
558 
559 		case F_WRLCK:
560 			if ((fp->f_flag & FWRITE) == 0) {
561 				error = EBADF;
562 				goto out;
563 			}
564 			atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK);
565 			error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg);
566 			break;
567 
568 		case F_UNLCK:
569 			error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX);
570 			goto out;
571 
572 		default:
573 			error = EINVAL;
574 			goto out;
575 		}
576 
577 		if (fd_checkclosed(fdp, fd, fp)) {
578 			/*
579 			 * We have lost the race with close() or dup2();
580 			 * unlock, pretend that we've won the race and that
581 			 * lock had been removed by close()
582 			 */
583 			fl.l_whence = SEEK_SET;
584 			fl.l_start = 0;
585 			fl.l_len = 0;
586 			VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX);
587 			fl.l_type = F_UNLCK;
588 		}
589 		goto out;
590 
591 
592 	case F_GETLK:
593 		error = pledge_flock(p);
594 		if (error != 0)
595 			break;
596 
597 		if (fp->f_type != DTYPE_VNODE) {
598 			error = EINVAL;
599 			break;
600 		}
601 		vp = fp->f_data;
602 		/* Copy in the lock structure */
603 		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
604 		    sizeof (fl));
605 		if (error)
606 			break;
607 		if (fl.l_whence == SEEK_CUR) {
608 			off_t offset = foffset(fp);
609 
610 			if (fl.l_start == 0 && fl.l_len < 0) {
611 				/* lockf(3) compliance hack */
612 				fl.l_len = -fl.l_len;
613 				fl.l_start = offset - fl.l_len;
614 			} else
615 				fl.l_start += offset;
616 		}
617 		if (fl.l_type != F_RDLCK &&
618 		    fl.l_type != F_WRLCK &&
619 		    fl.l_type != F_UNLCK &&
620 		    fl.l_type != 0) {
621 			error = EINVAL;
622 			break;
623 		}
624 		error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX);
625 		if (error)
626 			break;
627 #ifdef KTRACE
628 		if (KTRPOINT(p, KTR_STRUCT))
629 			ktrflock(p, &fl);
630 #endif
631 		error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
632 		    sizeof (fl)));
633 		break;
634 
635 	default:
636 		error = EINVAL;
637 		break;
638 	}
639 out:
640 	FRELE(fp, p);
641 	return (error);
642 }
643 
644 /*
645  * Common code for dup, dup2, and fcntl(F_DUPFD).
646  */
647 int
648 finishdup(struct proc *p, struct file *fp, int old, int new,
649     register_t *retval, int dupflags)
650 {
651 	struct file *oldfp;
652 	struct filedesc *fdp = p->p_fd;
653 	int error;
654 
655 	fdpassertlocked(fdp);
656 	KASSERT(fp->f_iflags & FIF_INSERTED);
657 
658 	if (fp->f_count >= FDUP_MAX_COUNT) {
659 		error = EDEADLK;
660 		goto fail;
661 	}
662 
663 	oldfp = fd_getfile(fdp, new);
664 	if ((dupflags & DUPF_DUP2) && oldfp == NULL) {
665 		if (fd_inuse(fdp, new)) {
666 			error = EBUSY;
667 			goto fail;
668 		}
669 		fd_used(fdp, new);
670 	}
671 
672 	/*
673 	 * Use `fd_fplock' to synchronize with fd_getfile() so that
674 	 * the function no longer creates a new reference to the old file.
675 	 */
676 	mtx_enter(&fdp->fd_fplock);
677 	fdp->fd_ofiles[new] = fp;
678 	mtx_leave(&fdp->fd_fplock);
679 
680 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
681 	if (dupflags & DUPF_CLOEXEC)
682 		fdp->fd_ofileflags[new] |= UF_EXCLOSE;
683 	*retval = new;
684 
685 	if (oldfp != NULL) {
686 		knote_fdclose(p, new);
687 		fdpunlock(fdp);
688 		closef(oldfp, p);
689 	} else {
690 		fdpunlock(fdp);
691 	}
692 
693 	return (0);
694 
695 fail:
696 	fdpunlock(fdp);
697 	FRELE(fp, p);
698 	return (error);
699 }
700 
701 void
702 fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
703 {
704 	struct file *fq;
705 
706 	fdpassertlocked(fdp);
707 
708 	mtx_enter(&fhdlk);
709 	if ((fp->f_iflags & FIF_INSERTED) == 0) {
710 		fp->f_iflags |= FIF_INSERTED;
711 		if ((fq = fdp->fd_ofiles[0]) != NULL) {
712 			LIST_INSERT_AFTER(fq, fp, f_list);
713 		} else {
714 			LIST_INSERT_HEAD(&filehead, fp, f_list);
715 		}
716 	}
717 	mtx_leave(&fhdlk);
718 
719 	mtx_enter(&fdp->fd_fplock);
720 	KASSERT(fdp->fd_ofiles[fd] == NULL);
721 	fdp->fd_ofiles[fd] = fp;
722 	mtx_leave(&fdp->fd_fplock);
723 
724 	fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
725 }
726 
727 void
728 fdremove(struct filedesc *fdp, int fd)
729 {
730 	fdpassertlocked(fdp);
731 
732 	/*
733 	 * Use `fd_fplock' to synchronize with fd_getfile() so that
734 	 * the function no longer creates a new reference to the file.
735 	 */
736 	mtx_enter(&fdp->fd_fplock);
737 	fdp->fd_ofiles[fd] = NULL;
738 	mtx_leave(&fdp->fd_fplock);
739 
740 	fdp->fd_ofileflags[fd] = 0;
741 
742 	fd_unused(fdp, fd);
743 }
744 
745 int
746 fdrelease(struct proc *p, int fd)
747 {
748 	struct filedesc *fdp = p->p_fd;
749 	struct file *fp;
750 
751 	fdpassertlocked(fdp);
752 
753 	fp = fd_getfile(fdp, fd);
754 	if (fp == NULL) {
755 		fdpunlock(fdp);
756 		return (EBADF);
757 	}
758 	fdremove(fdp, fd);
759 	knote_fdclose(p, fd);
760 	fdpunlock(fdp);
761 	return (closef(fp, p));
762 }
763 
764 /*
765  * Close a file descriptor.
766  */
767 int
768 sys_close(struct proc *p, void *v, register_t *retval)
769 {
770 	struct sys_close_args /* {
771 		syscallarg(int) fd;
772 	} */ *uap = v;
773 	int fd = SCARG(uap, fd), error;
774 	struct filedesc *fdp = p->p_fd;
775 
776 	fdplock(fdp);
777 	/* fdrelease unlocks fdp. */
778 	error = fdrelease(p, fd);
779 
780 	return (error);
781 }
782 
783 /*
784  * Return status information about a file descriptor.
785  */
786 int
787 sys_fstat(struct proc *p, void *v, register_t *retval)
788 {
789 	struct sys_fstat_args /* {
790 		syscallarg(int) fd;
791 		syscallarg(struct stat *) sb;
792 	} */ *uap = v;
793 	int fd = SCARG(uap, fd);
794 	struct filedesc *fdp = p->p_fd;
795 	struct file *fp;
796 	struct stat ub;
797 	int error;
798 
799 	if ((fp = fd_getfile(fdp, fd)) == NULL)
800 		return (EBADF);
801 	error = (*fp->f_ops->fo_stat)(fp, &ub, p);
802 	FRELE(fp, p);
803 	if (error == 0) {
804 		/*
805 		 * Don't let non-root see generation numbers
806 		 * (for NFS security)
807 		 */
808 		if (suser(p))
809 			ub.st_gen = 0;
810 		error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb),
811 		    sizeof (ub));
812 	}
813 #ifdef KTRACE
814 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
815 		ktrstat(p, &ub);
816 #endif
817 	return (error);
818 }
819 
820 /*
821  * Return pathconf information about a file descriptor.
822  */
823 int
824 sys_fpathconf(struct proc *p, void *v, register_t *retval)
825 {
826 	struct sys_fpathconf_args /* {
827 		syscallarg(int) fd;
828 		syscallarg(int) name;
829 	} */ *uap = v;
830 	int fd = SCARG(uap, fd);
831 	struct filedesc *fdp = p->p_fd;
832 	struct file *fp;
833 	struct vnode *vp;
834 	int error;
835 
836 	if ((fp = fd_getfile(fdp, fd)) == NULL)
837 		return (EBADF);
838 	switch (fp->f_type) {
839 	case DTYPE_PIPE:
840 	case DTYPE_SOCKET:
841 		if (SCARG(uap, name) != _PC_PIPE_BUF) {
842 			error = EINVAL;
843 			break;
844 		}
845 		*retval = PIPE_BUF;
846 		error = 0;
847 		break;
848 
849 	case DTYPE_VNODE:
850 		vp = fp->f_data;
851 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
852 		error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
853 		VOP_UNLOCK(vp);
854 		break;
855 
856 	default:
857 		error = EOPNOTSUPP;
858 		break;
859 	}
860 	FRELE(fp, p);
861 	return (error);
862 }
863 
864 /*
865  * Allocate a file descriptor for the process.
866  */
867 int
868 fdalloc(struct proc *p, int want, int *result)
869 {
870 	struct filedesc *fdp = p->p_fd;
871 	int lim, last, i;
872 	u_int new, off;
873 
874 	fdpassertlocked(fdp);
875 
876 	/*
877 	 * Search for a free descriptor starting at the higher
878 	 * of want or fd_freefile.  If that fails, consider
879 	 * expanding the ofile array.
880 	 */
881 restart:
882 	lim = min((int)lim_cur(RLIMIT_NOFILE), maxfiles);
883 	last = min(fdp->fd_nfiles, lim);
884 	if ((i = want) < fdp->fd_freefile)
885 		i = fdp->fd_freefile;
886 	off = i >> NDENTRYSHIFT;
887 	new = find_next_zero(fdp->fd_himap, off,
888 	    (last + NDENTRIES - 1) >> NDENTRYSHIFT);
889 	if (new != -1) {
890 		i = find_next_zero(&fdp->fd_lomap[new],
891 				   new > off ? 0 : i & NDENTRYMASK,
892 				   NDENTRIES);
893 		if (i == -1) {
894 			/*
895 			 * Free file descriptor in this block was
896 			 * below want, try again with higher want.
897 			 */
898 			want = (new + 1) << NDENTRYSHIFT;
899 			goto restart;
900 		}
901 		i += (new << NDENTRYSHIFT);
902 		if (i < last) {
903 			fd_used(fdp, i);
904 			if (want <= fdp->fd_freefile)
905 				fdp->fd_freefile = i;
906 			*result = i;
907 			fdp->fd_ofileflags[i] = 0;
908 			if (ISSET(p->p_p->ps_flags, PS_PLEDGE))
909 				fdp->fd_ofileflags[i] |= UF_PLEDGED;
910 			return (0);
911 		}
912 	}
913 	if (fdp->fd_nfiles >= lim)
914 		return (EMFILE);
915 
916 	return (ENOSPC);
917 }
918 
919 void
920 fdexpand(struct proc *p)
921 {
922 	struct filedesc *fdp = p->p_fd;
923 	int nfiles, oldnfiles;
924 	size_t copylen;
925 	struct file **newofile, **oldofile;
926 	char *newofileflags;
927 	u_int *newhimap, *newlomap;
928 
929 	fdpassertlocked(fdp);
930 
931 	oldnfiles = fdp->fd_nfiles;
932 	oldofile = fdp->fd_ofiles;
933 
934 	/*
935 	 * No space in current array.
936 	 */
937 	if (fdp->fd_nfiles < NDEXTENT)
938 		nfiles = NDEXTENT;
939 	else
940 		nfiles = 2 * fdp->fd_nfiles;
941 
942 	newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK);
943 	/*
944 	 * Allocate all required chunks before calling free(9) to make
945 	 * sure that ``fd_ofiles'' stays valid if we go to sleep.
946 	 */
947 	if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
948 		newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int),
949 		    M_FILEDESC, M_WAITOK);
950 		newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int),
951 		    M_FILEDESC, M_WAITOK);
952 	}
953 	newofileflags = (char *) &newofile[nfiles];
954 
955 	/*
956 	 * Copy the existing ofile and ofileflags arrays
957 	 * and zero the new portion of each array.
958 	 */
959 	copylen = sizeof(struct file *) * fdp->fd_nfiles;
960 	memcpy(newofile, fdp->fd_ofiles, copylen);
961 	memset((char *)newofile + copylen, 0,
962 	    nfiles * sizeof(struct file *) - copylen);
963 	copylen = sizeof(char) * fdp->fd_nfiles;
964 	memcpy(newofileflags, fdp->fd_ofileflags, copylen);
965 	memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen);
966 
967 	if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
968 		copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int);
969 		memcpy(newhimap, fdp->fd_himap, copylen);
970 		memset((char *)newhimap + copylen, 0,
971 		    NDHISLOTS(nfiles) * sizeof(u_int) - copylen);
972 
973 		copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int);
974 		memcpy(newlomap, fdp->fd_lomap, copylen);
975 		memset((char *)newlomap + copylen, 0,
976 		    NDLOSLOTS(nfiles) * sizeof(u_int) - copylen);
977 
978 		if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
979 			free(fdp->fd_himap, M_FILEDESC,
980 			    NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int));
981 			free(fdp->fd_lomap, M_FILEDESC,
982 			    NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int));
983 		}
984 		fdp->fd_himap = newhimap;
985 		fdp->fd_lomap = newlomap;
986 	}
987 
988 	mtx_enter(&fdp->fd_fplock);
989 	fdp->fd_ofiles = newofile;
990 	mtx_leave(&fdp->fd_fplock);
991 
992 	fdp->fd_ofileflags = newofileflags;
993 	fdp->fd_nfiles = nfiles;
994 
995 	if (oldnfiles > NDFILE)
996 		free(oldofile, M_FILEDESC, oldnfiles * OFILESIZE);
997 }
998 
999 /*
1000  * Create a new open file structure and allocate
1001  * a file descriptor for the process that refers to it.
1002  */
1003 int
1004 falloc(struct proc *p, struct file **resultfp, int *resultfd)
1005 {
1006 	struct file *fp;
1007 	int error, i;
1008 
1009 	KASSERT(resultfp != NULL);
1010 	KASSERT(resultfd != NULL);
1011 
1012 	fdpassertlocked(p->p_fd);
1013 restart:
1014 	if ((error = fdalloc(p, 0, &i)) != 0) {
1015 		if (error == ENOSPC) {
1016 			fdexpand(p);
1017 			goto restart;
1018 		}
1019 		return (error);
1020 	}
1021 
1022 	fp = fnew(p);
1023 	if (fp == NULL) {
1024 		fd_unused(p->p_fd, i);
1025 		return (ENFILE);
1026 	}
1027 
1028 	FREF(fp);
1029 	*resultfp = fp;
1030 	*resultfd = i;
1031 
1032 	return (0);
1033 }
1034 
1035 struct file *
1036 fnew(struct proc *p)
1037 {
1038 	struct file *fp;
1039 	int nfiles;
1040 
1041 	nfiles = atomic_inc_int_nv(&numfiles);
1042 	if (nfiles > maxfiles) {
1043 		atomic_dec_int(&numfiles);
1044 		tablefull("file");
1045 		return (NULL);
1046 	}
1047 
1048 	fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
1049 	/*
1050 	 * We need to block interrupts as long as `f_mtx' is being taken
1051 	 * with and without the KERNEL_LOCK().
1052 	 */
1053 	mtx_init(&fp->f_mtx, IPL_MPFLOOR);
1054 	fp->f_count = 1;
1055 	fp->f_cred = p->p_ucred;
1056 	crhold(fp->f_cred);
1057 
1058 	return (fp);
1059 }
1060 
1061 /*
1062  * Build a new filedesc structure.
1063  */
1064 struct filedesc *
1065 fdinit(void)
1066 {
1067 	struct filedesc0 *newfdp;
1068 
1069 	newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO);
1070 	rw_init(&newfdp->fd_fd.fd_lock, "fdlock");
1071 	mtx_init(&newfdp->fd_fd.fd_fplock, IPL_MPFLOOR);
1072 	LIST_INIT(&newfdp->fd_fd.fd_kqlist);
1073 
1074 	/* Create the file descriptor table. */
1075 	newfdp->fd_fd.fd_refcnt = 1;
1076 	newfdp->fd_fd.fd_cmask = S_IWGRP|S_IWOTH;
1077 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1078 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1079 	newfdp->fd_fd.fd_nfiles = NDFILE;
1080 	newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
1081 	newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
1082 
1083 	newfdp->fd_fd.fd_freefile = 0;
1084 	newfdp->fd_fd.fd_lastfile = 0;
1085 
1086 	return (&newfdp->fd_fd);
1087 }
1088 
1089 /*
1090  * Share a filedesc structure.
1091  */
1092 struct filedesc *
1093 fdshare(struct process *pr)
1094 {
1095 	pr->ps_fd->fd_refcnt++;
1096 	return (pr->ps_fd);
1097 }
1098 
1099 /*
1100  * Copy a filedesc structure.
1101  */
1102 struct filedesc *
1103 fdcopy(struct process *pr)
1104 {
1105 	struct filedesc *newfdp, *fdp = pr->ps_fd;
1106 	int i;
1107 
1108 	newfdp = fdinit();
1109 
1110 	fdplock(fdp);
1111 	if (fdp->fd_cdir) {
1112 		vref(fdp->fd_cdir);
1113 		newfdp->fd_cdir = fdp->fd_cdir;
1114 	}
1115 	if (fdp->fd_rdir) {
1116 		vref(fdp->fd_rdir);
1117 		newfdp->fd_rdir = fdp->fd_rdir;
1118 	}
1119 
1120 	/*
1121 	 * If the number of open files fits in the internal arrays
1122 	 * of the open file structure, use them, otherwise allocate
1123 	 * additional memory for the number of descriptors currently
1124 	 * in use.
1125 	 */
1126 	if (fdp->fd_lastfile >= NDFILE) {
1127 		/*
1128 		 * Compute the smallest multiple of NDEXTENT needed
1129 		 * for the file descriptors currently in use,
1130 		 * allowing the table to shrink.
1131 		 */
1132 		i = fdp->fd_nfiles;
1133 		while (i >= 2 * NDEXTENT && i > fdp->fd_lastfile * 2)
1134 			i /= 2;
1135 		newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC,
1136 		    M_WAITOK | M_ZERO);
1137 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1138 		newfdp->fd_nfiles = i;
1139 	}
1140 	if (NDHISLOTS(newfdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
1141 		newfdp->fd_himap = mallocarray(NDHISLOTS(newfdp->fd_nfiles),
1142 		    sizeof(u_int), M_FILEDESC, M_WAITOK | M_ZERO);
1143 		newfdp->fd_lomap = mallocarray(NDLOSLOTS(newfdp->fd_nfiles),
1144 		    sizeof(u_int), M_FILEDESC, M_WAITOK | M_ZERO);
1145 	}
1146 	newfdp->fd_freefile = fdp->fd_freefile;
1147 	newfdp->fd_flags = fdp->fd_flags;
1148 	newfdp->fd_cmask = fdp->fd_cmask;
1149 
1150 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1151 		struct file *fp = fdp->fd_ofiles[i];
1152 
1153 		if (fp != NULL) {
1154 			/*
1155 			 * XXX Gruesome hack. If count gets too high, fail
1156 			 * to copy an fd, since fdcopy()'s callers do not
1157 			 * permit it to indicate failure yet.
1158 			 * Meanwhile, kqueue files have to be
1159 			 * tied to the process that opened them to enforce
1160 			 * their internal consistency, so close them here.
1161 			 */
1162 			if (fp->f_count >= FDUP_MAX_COUNT ||
1163 			    fp->f_type == DTYPE_KQUEUE) {
1164 				if (i < newfdp->fd_freefile)
1165 					newfdp->fd_freefile = i;
1166 				continue;
1167 			}
1168 
1169 			FREF(fp);
1170 			newfdp->fd_ofiles[i] = fp;
1171 			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
1172 			fd_used(newfdp, i);
1173 		}
1174 	}
1175 	fdpunlock(fdp);
1176 
1177 	return (newfdp);
1178 }
1179 
1180 /*
1181  * Release a filedesc structure.
1182  */
1183 void
1184 fdfree(struct proc *p)
1185 {
1186 	struct filedesc *fdp = p->p_fd;
1187 	struct file *fp;
1188 	int fd;
1189 
1190 	if (--fdp->fd_refcnt > 0)
1191 		return;
1192 	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
1193 		fp = fdp->fd_ofiles[fd];
1194 		if (fp != NULL) {
1195 			fdp->fd_ofiles[fd] = NULL;
1196 			knote_fdclose(p, fd);
1197 			 /* closef() expects a refcount of 2 */
1198 			FREF(fp);
1199 			(void) closef(fp, p);
1200 		}
1201 	}
1202 	p->p_fd = NULL;
1203 	if (fdp->fd_nfiles > NDFILE)
1204 		free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE);
1205 	if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
1206 		free(fdp->fd_himap, M_FILEDESC,
1207 		    NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int));
1208 		free(fdp->fd_lomap, M_FILEDESC,
1209 		    NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int));
1210 	}
1211 	if (fdp->fd_cdir)
1212 		vrele(fdp->fd_cdir);
1213 	if (fdp->fd_rdir)
1214 		vrele(fdp->fd_rdir);
1215 	pool_put(&fdesc_pool, fdp);
1216 }
1217 
1218 /*
1219  * Internal form of close.
1220  * Decrement reference count on file structure.
1221  * Note: p may be NULL when closing a file
1222  * that was being passed in a message.
1223  *
1224  * The fp must have its usecount bumped and will be FRELEd here.
1225  */
1226 int
1227 closef(struct file *fp, struct proc *p)
1228 {
1229 	struct filedesc *fdp;
1230 
1231 	if (fp == NULL)
1232 		return (0);
1233 
1234 	KASSERTMSG(fp->f_count >= 2, "count (%u) < 2", fp->f_count);
1235 
1236 	atomic_dec_int(&fp->f_count);
1237 
1238 	/*
1239 	 * POSIX record locking dictates that any close releases ALL
1240 	 * locks owned by this process.  This is handled by setting
1241 	 * a flag in the unlock to free ONLY locks obeying POSIX
1242 	 * semantics, and not to free BSD-style file locks.
1243 	 * If the descriptor was in a message, POSIX-style locks
1244 	 * aren't passed with the descriptor.
1245 	 */
1246 
1247 	if (p && ((fdp = p->p_fd) != NULL) &&
1248 	    (fdp->fd_flags & FD_ADVLOCK) &&
1249 	    fp->f_type == DTYPE_VNODE) {
1250 		struct vnode *vp = fp->f_data;
1251 		struct flock lf;
1252 
1253 		lf.l_whence = SEEK_SET;
1254 		lf.l_start = 0;
1255 		lf.l_len = 0;
1256 		lf.l_type = F_UNLCK;
1257 		(void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX);
1258 	}
1259 
1260 	return (FRELE(fp, p));
1261 }
1262 
1263 int
1264 fdrop(struct file *fp, struct proc *p)
1265 {
1266 	int error;
1267 
1268 	KASSERTMSG(fp->f_count == 0, "count (%u) != 0", fp->f_count);
1269 
1270 	mtx_enter(&fhdlk);
1271 	if (fp->f_iflags & FIF_INSERTED)
1272 		LIST_REMOVE(fp, f_list);
1273 	mtx_leave(&fhdlk);
1274 
1275 	if (fp->f_ops)
1276 		error = (*fp->f_ops->fo_close)(fp, p);
1277 	else
1278 		error = 0;
1279 
1280 	crfree(fp->f_cred);
1281 	atomic_dec_int(&numfiles);
1282 	pool_put(&file_pool, fp);
1283 
1284 	return (error);
1285 }
1286 
1287 /*
1288  * Apply an advisory lock on a file descriptor.
1289  *
1290  * Just attempt to get a record lock of the requested type on
1291  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1292  */
1293 int
1294 sys_flock(struct proc *p, void *v, register_t *retval)
1295 {
1296 	struct sys_flock_args /* {
1297 		syscallarg(int) fd;
1298 		syscallarg(int) how;
1299 	} */ *uap = v;
1300 	int fd = SCARG(uap, fd);
1301 	int how = SCARG(uap, how);
1302 	struct filedesc *fdp = p->p_fd;
1303 	struct file *fp;
1304 	struct vnode *vp;
1305 	struct flock lf;
1306 	int error;
1307 
1308 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1309 		return (EBADF);
1310 	if (fp->f_type != DTYPE_VNODE) {
1311 		error = EOPNOTSUPP;
1312 		goto out;
1313 	}
1314 	vp = fp->f_data;
1315 	lf.l_whence = SEEK_SET;
1316 	lf.l_start = 0;
1317 	lf.l_len = 0;
1318 	if (how & LOCK_UN) {
1319 		lf.l_type = F_UNLCK;
1320 		fp->f_iflags &= ~FIF_HASLOCK;
1321 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1322 		goto out;
1323 	}
1324 	if (how & LOCK_EX)
1325 		lf.l_type = F_WRLCK;
1326 	else if (how & LOCK_SH)
1327 		lf.l_type = F_RDLCK;
1328 	else {
1329 		error = EINVAL;
1330 		goto out;
1331 	}
1332 	fp->f_iflags |= FIF_HASLOCK;
1333 	if (how & LOCK_NB)
1334 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1335 	else
1336 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
1337 out:
1338 	FRELE(fp, p);
1339 	return (error);
1340 }
1341 
1342 /*
1343  * File Descriptor pseudo-device driver (/dev/fd/).
1344  *
1345  * Opening minor device N dup()s the file (if any) connected to file
1346  * descriptor N belonging to the calling process.  Note that this driver
1347  * consists of only the ``open()'' routine, because all subsequent
1348  * references to this file will be direct to the other driver.
1349  */
1350 int
1351 filedescopen(dev_t dev, int mode, int type, struct proc *p)
1352 {
1353 
1354 	/*
1355 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1356 	 * the file descriptor being sought for duplication. The error
1357 	 * return ensures that the vnode for this device will be released
1358 	 * by vn_open. Open will detect this special error and take the
1359 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1360 	 * will simply report the error.
1361 	 */
1362 	p->p_dupfd = minor(dev);
1363 	return (ENODEV);
1364 }
1365 
1366 /*
1367  * Duplicate the specified descriptor to a free descriptor.
1368  */
1369 int
1370 dupfdopen(struct proc *p, int indx, int mode)
1371 {
1372 	struct filedesc *fdp = p->p_fd;
1373 	int dupfd = p->p_dupfd;
1374 	struct file *wfp;
1375 
1376 	fdpassertlocked(fdp);
1377 
1378 	/*
1379 	 * Assume that the filename was user-specified; applications do
1380 	 * not tend to open /dev/fd/# when they can just call dup()
1381 	 */
1382 	if ((p->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) {
1383 		if (p->p_descfd == 255)
1384 			return (EPERM);
1385 		if (p->p_descfd != dupfd)
1386 			return (EPERM);
1387 	}
1388 
1389 	/*
1390 	 * If the to-be-dup'd fd number is greater than the allowed number
1391 	 * of file descriptors, or the fd to be dup'd has already been
1392 	 * closed, reject. Note, there is no need to check for new == old
1393 	 * because fd_getfile will return NULL if the file at indx is
1394 	 * newly created by falloc.
1395 	 */
1396 	if ((wfp = fd_getfile(fdp, dupfd)) == NULL)
1397 		return (EBADF);
1398 
1399 	/*
1400 	 * Check that the mode the file is being opened for is a
1401 	 * subset of the mode of the existing descriptor.
1402 	 */
1403 	if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1404 		FRELE(wfp, p);
1405 		return (EACCES);
1406 	}
1407 	if (wfp->f_count >= FDUP_MAX_COUNT) {
1408 		FRELE(wfp, p);
1409 		return (EDEADLK);
1410 	}
1411 
1412 	KASSERT(wfp->f_iflags & FIF_INSERTED);
1413 
1414 	mtx_enter(&fdp->fd_fplock);
1415 	KASSERT(fdp->fd_ofiles[indx] == NULL);
1416 	fdp->fd_ofiles[indx] = wfp;
1417 	mtx_leave(&fdp->fd_fplock);
1418 
1419 	fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) |
1420 	    (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE);
1421 
1422 	return (0);
1423 }
1424 
1425 /*
1426  * Close any files on exec?
1427  */
1428 void
1429 fdcloseexec(struct proc *p)
1430 {
1431 	struct filedesc *fdp = p->p_fd;
1432 	int fd;
1433 
1434 	fdplock(fdp);
1435 	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
1436 		fdp->fd_ofileflags[fd] &= ~UF_PLEDGED;
1437 		if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) {
1438 			/* fdrelease() unlocks fdp. */
1439 			(void) fdrelease(p, fd);
1440 			fdplock(fdp);
1441 		}
1442 	}
1443 	fdpunlock(fdp);
1444 }
1445 
1446 int
1447 sys_closefrom(struct proc *p, void *v, register_t *retval)
1448 {
1449 	struct sys_closefrom_args *uap = v;
1450 	struct filedesc *fdp = p->p_fd;
1451 	u_int startfd, i;
1452 
1453 	startfd = SCARG(uap, fd);
1454 	fdplock(fdp);
1455 
1456 	if (startfd > fdp->fd_lastfile) {
1457 		fdpunlock(fdp);
1458 		return (EBADF);
1459 	}
1460 
1461 	for (i = startfd; i <= fdp->fd_lastfile; i++) {
1462 		/* fdrelease() unlocks fdp. */
1463 		fdrelease(p, i);
1464 		fdplock(fdp);
1465 	}
1466 
1467 	fdpunlock(fdp);
1468 	return (0);
1469 }
1470 
1471 int
1472 sys_getdtablecount(struct proc *p, void *v, register_t *retval)
1473 {
1474 	*retval = p->p_fd->fd_openfd;
1475 	return (0);
1476 }
1477