xref: /openbsd-src/sys/kern/kern_descrip.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: kern_descrip.c,v 1.34 2001/07/28 17:12:12 gluk Exp $	*/
2 /*	$NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/kernel.h>
48 #include <sys/vnode.h>
49 #include <sys/proc.h>
50 #include <sys/file.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/stat.h>
54 #include <sys/ioctl.h>
55 #include <sys/fcntl.h>
56 #include <sys/malloc.h>
57 #include <sys/syslog.h>
58 #include <sys/ucred.h>
59 #include <sys/unistd.h>
60 #include <sys/resourcevar.h>
61 #include <sys/conf.h>
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64 #include <sys/event.h>
65 #include <sys/pool.h>
66 
67 #include <vm/vm.h>
68 
69 #include <sys/pipe.h>
70 
71 /*
72  * Descriptor management.
73  */
74 struct filelist filehead;	/* head of list of open files */
75 int nfiles;			/* actual number of open files */
76 
77 static __inline void fd_used __P((struct filedesc *, int));
78 static __inline void fd_unused __P((struct filedesc *, int));
79 static __inline int find_next_zero __P((u_int *, int, u_int));
80 int finishdup __P((struct filedesc *, int, int, register_t *));
81 int find_last_set __P((struct filedesc *, int));
82 
83 struct pool file_pool;
84 struct pool fdesc_pool;
85 
86 void
87 filedesc_init()
88 {
89 	pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
90 		0, pool_page_alloc_nointr, pool_page_free_nointr, M_PROC);
91 	pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
92 		0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
93 }
94 
95 static __inline int
96 find_next_zero (u_int *bitmap, int want, u_int bits)
97 {
98 	int i, off, maxoff;
99 	u_int sub;
100 
101 	if (want > bits)
102 		return -1;
103 
104 	off = want >> NDENTRYSHIFT;
105 	i = want & NDENTRYMASK;
106 	if (i) {
107 		sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
108 		if (sub != ~0)
109 			goto found;
110 		off++;
111 	}
112 
113 	maxoff = NDLOSLOTS(bits);
114 	while (off < maxoff) {
115 		if ((sub = bitmap[off]) != ~0)
116 			goto found;
117 		off++;
118 	}
119 
120 	return -1;
121 
122  found:
123 	return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
124 }
125 
126 int
127 find_last_set(struct filedesc *fd, int last)
128 {
129 	int off, i;
130 	struct file **ofiles = fd->fd_ofiles;
131 	u_int *bitmap = fd->fd_lomap;
132 
133 	off = (last - 1) >> NDENTRYSHIFT;
134 
135 	while (!bitmap[off] && off >= 0)
136 		off--;
137 	if (off < 0)
138 		return 0;
139 
140 	i = ((off + 1) << NDENTRYSHIFT) - 1;
141 	if (i >= last)
142 		i = last - 1;
143 
144 	while (i > 0 && ofiles[i] == NULL)
145 		i--;
146 	return i;
147 }
148 
149 static __inline void
150 fd_used(fdp, fd)
151 	register struct filedesc *fdp;
152 	register int fd;
153 {
154 	u_int off = fd >> NDENTRYSHIFT;
155 
156 	fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
157 	if (fdp->fd_lomap[off] == ~0)
158 		fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
159 
160 	if (fd > fdp->fd_lastfile)
161 		fdp->fd_lastfile = fd;
162 }
163 
164 static __inline void
165 fd_unused(fdp, fd)
166 	register struct filedesc *fdp;
167 	register int fd;
168 {
169 	u_int off = fd >> NDENTRYSHIFT;
170 
171 	if (fd < fdp->fd_freefile)
172 		fdp->fd_freefile = fd;
173 
174 	if (fdp->fd_lomap[off] == ~0)
175 		fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
176 	fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
177 
178 #ifdef DIAGNOSTIC
179 	if (fd > fdp->fd_lastfile)
180 		panic("fd_unused: fd_lastfile inconsistent");
181 #endif
182 	if (fd == fdp->fd_lastfile)
183 		fdp->fd_lastfile = find_last_set(fdp, fd);
184 }
185 
186 /*
187  * System calls on descriptors.
188  */
189 
190 /*
191  * Duplicate a file descriptor.
192  */
193 /* ARGSUSED */
194 int
195 sys_dup(p, v, retval)
196 	struct proc *p;
197 	void *v;
198 	register_t *retval;
199 {
200 	struct sys_dup_args /* {
201 		syscallarg(u_int) fd;
202 	} */ *uap = v;
203 	register struct filedesc *fdp = p->p_fd;
204 	register int old = SCARG(uap, fd);
205 	int new;
206 	int error;
207 
208 	if ((u_int)old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
209 		return (EBADF);
210 	if ((error = fdalloc(p, 0, &new)) != 0)
211 		return (error);
212 	return (finishdup(fdp, old, new, retval));
213 }
214 
215 /*
216  * Duplicate a file descriptor to a particular value.
217  */
218 /* ARGSUSED */
219 int
220 sys_dup2(p, v, retval)
221 	struct proc *p;
222 	void *v;
223 	register_t *retval;
224 {
225 	struct sys_dup2_args /* {
226 		syscallarg(u_int) from;
227 		syscallarg(u_int) to;
228 	} */ *uap = v;
229 	struct filedesc *fdp = p->p_fd;
230 	int old = SCARG(uap, from), new = SCARG(uap, to);
231 	int i, error;
232 
233 	if ((u_int)old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL ||
234 	    (u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
235 	    (u_int)new >= maxfiles)
236 		return (EBADF);
237 	if (old == new) {
238 		*retval = new;
239 		return (0);
240 	}
241 	if (new >= fdp->fd_nfiles) {
242 		if ((error = fdalloc(p, new, &i)) != 0)
243 			return (error);
244 		if (new != i)
245 			panic("dup2: fdalloc");
246 	} else {
247 		(void) fdrelease(p, new);
248 	}
249 	return (finishdup(fdp, old, new, retval));
250 }
251 
252 /*
253  * The file control system call.
254  */
255 /* ARGSUSED */
256 int
257 sys_fcntl(p, v, retval)
258 	struct proc *p;
259 	void *v;
260 	register_t *retval;
261 {
262 	struct sys_fcntl_args /* {
263 		syscallarg(int) fd;
264 		syscallarg(int) cmd;
265 		syscallarg(void *) arg;
266 	} */ *uap = v;
267 	int fd = SCARG(uap, fd);
268 	struct filedesc *fdp = p->p_fd;
269 	struct file *fp;
270 	struct vnode *vp;
271 	int i, tmp, error, flg = F_POSIX;
272 	struct flock fl;
273 	int newmin;
274 
275 	if ((u_int)fd >= fdp->fd_nfiles ||
276 	    (fp = fdp->fd_ofiles[fd]) == NULL)
277 		return (EBADF);
278 	switch (SCARG(uap, cmd)) {
279 
280 	case F_DUPFD:
281 		newmin = (long)SCARG(uap, arg);
282 		if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
283 		    (u_int)newmin >= maxfiles)
284 			return (EINVAL);
285 		if ((error = fdalloc(p, newmin, &i)) != 0)
286 			return (error);
287 		return (finishdup(fdp, fd, i, retval));
288 
289 	case F_GETFD:
290 		*retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
291 		return (0);
292 
293 	case F_SETFD:
294 		if ((long)SCARG(uap, arg) & 1)
295 			fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
296 		else
297 			fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
298 		return (0);
299 
300 	case F_GETFL:
301 		*retval = OFLAGS(fp->f_flag);
302 		return (0);
303 
304 	case F_SETFL:
305 		fp->f_flag &= ~FCNTLFLAGS;
306 		fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
307 		tmp = fp->f_flag & FNONBLOCK;
308 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
309 		if (error)
310 			return (error);
311 		tmp = fp->f_flag & FASYNC;
312 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
313 		if (!error)
314 			return (0);
315 		fp->f_flag &= ~FNONBLOCK;
316 		tmp = 0;
317 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
318 		return (error);
319 
320 	case F_GETOWN:
321 		if (fp->f_type == DTYPE_SOCKET) {
322 			*retval = ((struct socket *)fp->f_data)->so_pgid;
323 			return (0);
324 		}
325 		error = (*fp->f_ops->fo_ioctl)
326 			(fp, TIOCGPGRP, (caddr_t)retval, p);
327 		*retval = -*retval;
328 		return (error);
329 
330 	case F_SETOWN:
331 		if (fp->f_type == DTYPE_SOCKET) {
332 			struct socket *so = (struct socket *)fp->f_data;
333 
334 			so->so_pgid = (long)SCARG(uap, arg);
335 			so->so_siguid = p->p_cred->p_ruid;
336 			so->so_sigeuid = p->p_ucred->cr_uid;
337 			return (0);
338 		}
339 		if ((long)SCARG(uap, arg) <= 0) {
340 			SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg));
341 		} else {
342 			struct proc *p1 = pfind((long)SCARG(uap, arg));
343 			if (p1 == 0)
344 				return (ESRCH);
345 			SCARG(uap, arg) = (void *)(long)p1->p_pgrp->pg_id;
346 		}
347 		return ((*fp->f_ops->fo_ioctl)
348 			(fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p));
349 
350 	case F_SETLKW:
351 		flg |= F_WAIT;
352 		/* FALLTHROUGH */
353 
354 	case F_SETLK:
355 		if (fp->f_type != DTYPE_VNODE)
356 			return (EBADF);
357 		vp = (struct vnode *)fp->f_data;
358 		/* Copy in the lock structure */
359 		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
360 		    sizeof (fl));
361 		if (error)
362 			return (error);
363 		if (fl.l_whence == SEEK_CUR) {
364 			if (fl.l_start == 0 && fl.l_len < 0) {
365 				/* lockf(3) compliance hack */
366 				fl.l_len = -fl.l_len;
367 				fl.l_start = fp->f_offset - fl.l_len;
368 			} else
369 				fl.l_start += fp->f_offset;
370 		}
371 		switch (fl.l_type) {
372 
373 		case F_RDLCK:
374 			if ((fp->f_flag & FREAD) == 0)
375 				return (EBADF);
376 			p->p_flag |= P_ADVLOCK;
377 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
378 
379 		case F_WRLCK:
380 			if ((fp->f_flag & FWRITE) == 0)
381 				return (EBADF);
382 			p->p_flag |= P_ADVLOCK;
383 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
384 
385 		case F_UNLCK:
386 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
387 				F_POSIX));
388 
389 		default:
390 			return (EINVAL);
391 		}
392 
393 	case F_GETLK:
394 		if (fp->f_type != DTYPE_VNODE)
395 			return (EBADF);
396 		vp = (struct vnode *)fp->f_data;
397 		/* Copy in the lock structure */
398 		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
399 		    sizeof (fl));
400 		if (error)
401 			return (error);
402 		if (fl.l_whence == SEEK_CUR) {
403 			if (fl.l_start == 0 && fl.l_len < 0) {
404 				/* lockf(3) compliance hack */
405 				fl.l_len = -fl.l_len;
406 				fl.l_start = fp->f_offset - fl.l_len;
407 			} else
408 				fl.l_start += fp->f_offset;
409 		}
410 		if (fl.l_type != F_RDLCK &&
411 		    fl.l_type != F_WRLCK &&
412 		    fl.l_type != F_UNLCK &&
413 		    fl.l_type != 0)
414 			return (EINVAL);
415 		error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
416 		if (error)
417 			return (error);
418 		return (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
419 		    sizeof (fl)));
420 
421 	default:
422 		return (EINVAL);
423 	}
424 	/* NOTREACHED */
425 }
426 
427 /*
428  * Common code for dup, dup2, and fcntl(F_DUPFD).
429  */
430 int
431 finishdup(fdp, old, new, retval)
432 	register struct filedesc *fdp;
433 	register int old, new;
434 	register_t *retval;
435 {
436 	register struct file *fp;
437 
438 	fp = fdp->fd_ofiles[old];
439 	if (fp->f_count == LONG_MAX-2)
440 		return (EDEADLK);
441 	fdp->fd_ofiles[new] = fp;
442 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
443 	fp->f_count++;
444 	fd_used(fdp, new);
445 	*retval = new;
446 	return (0);
447 }
448 
449 void
450 fdremove(fdp, fd)
451 	struct filedesc *fdp;
452 	int fd;
453 {
454 	fdp->fd_ofiles[fd] = NULL;
455 	fd_unused(fdp, fd);
456 }
457 
458 int
459 fdrelease(p, fd)
460 	struct proc *p;
461 	int fd;
462 {
463 	register struct filedesc *fdp = p->p_fd;
464 	register struct file **fpp, *fp;
465 	register char *pf;
466 
467 	fpp = &fdp->fd_ofiles[fd];
468 	fp = *fpp;
469 	if (fp == NULL)
470 		return (EBADF);
471 	pf = &fdp->fd_ofileflags[fd];
472 	if (*pf & UF_MAPPED) {
473 		/* XXX: USELESS? XXXCDC check it */
474 		p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
475 	}
476 	*fpp = NULL;
477 	*pf = 0;
478 	fd_unused(fdp, fd);
479 	if (fd < fdp->fd_knlistsize)
480 		knote_fdclose(p, fd);
481 	return (closef(fp, p));
482 }
483 
484 /*
485  * Close a file descriptor.
486  */
487 /* ARGSUSED */
488 int
489 sys_close(p, v, retval)
490 	struct proc *p;
491 	void *v;
492 	register_t *retval;
493 {
494 	struct sys_close_args /* {
495 		syscallarg(int) fd;
496 	} */ *uap = v;
497 	int fd = SCARG(uap, fd);
498 	register struct filedesc *fdp = p->p_fd;
499 
500 	if ((u_int)fd >= fdp->fd_nfiles)
501 		return (EBADF);
502 	return (fdrelease(p, fd));
503 }
504 
505 /*
506  * Return status information about a file descriptor.
507  */
508 /* ARGSUSED */
509 int
510 sys_fstat(p, v, retval)
511 	struct proc *p;
512 	void *v;
513 	register_t *retval;
514 {
515 	struct sys_fstat_args /* {
516 		syscallarg(int) fd;
517 		syscallarg(struct stat *) sb;
518 	} */ *uap = v;
519 	int fd = SCARG(uap, fd);
520 	struct filedesc *fdp = p->p_fd;
521 	struct file *fp;
522 	struct stat ub;
523 	int error;
524 
525 	if ((u_int)fd >= fdp->fd_nfiles ||
526 	    (fp = fdp->fd_ofiles[fd]) == NULL)
527 		return (EBADF);
528 	error = (*fp->f_ops->fo_stat)(fp, &ub, p);
529 	if (error == 0) {
530 		/* Don't let non-root see generation numbers
531 		   (for NFS security) */
532 		if (suser(p->p_ucred, &p->p_acflag))
533 			ub.st_gen = 0;
534 		error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb),
535 		    sizeof (ub));
536 	}
537 	return (error);
538 }
539 
540 /*
541  * Return pathconf information about a file descriptor.
542  */
543 /* ARGSUSED */
544 int
545 sys_fpathconf(p, v, retval)
546 	struct proc *p;
547 	void *v;
548 	register_t *retval;
549 {
550 	register struct sys_fpathconf_args /* {
551 		syscallarg(int) fd;
552 		syscallarg(int) name;
553 	} */ *uap = v;
554 	int fd = SCARG(uap, fd);
555 	struct filedesc *fdp = p->p_fd;
556 	struct file *fp;
557 	struct vnode *vp;
558 
559 	if ((u_int)fd >= fdp->fd_nfiles ||
560 	    (fp = fdp->fd_ofiles[fd]) == NULL)
561 		return (EBADF);
562 	switch (fp->f_type) {
563 	case DTYPE_PIPE:
564 	case DTYPE_SOCKET:
565 		if (SCARG(uap, name) != _PC_PIPE_BUF)
566 			return (EINVAL);
567 		*retval = PIPE_BUF;
568 		return (0);
569 
570 	case DTYPE_VNODE:
571 		vp = (struct vnode *)fp->f_data;
572 		return (VOP_PATHCONF(vp, SCARG(uap, name), retval));
573 
574 	default:
575 		return (EOPNOTSUPP);
576 	}
577 	/*NOTREACHED*/
578 }
579 
580 /*
581  * Allocate a file descriptor for the process.
582  */
583 int fdexpand;
584 
585 int
586 fdalloc(p, want, result)
587 	struct proc *p;
588 	int want;
589 	int *result;
590 {
591 	register struct filedesc *fdp = p->p_fd;
592 	register int i;
593 	int lim, last, nfiles;
594 	struct file **newofile;
595 	char *newofileflags;
596 	u_int *newhimap, *newlomap, new, off;
597 
598 	/*
599 	 * Search for a free descriptor starting at the higher
600 	 * of want or fd_freefile.  If that fails, consider
601 	 * expanding the ofile array.
602 	 */
603 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
604 	for (;;) {
605 		last = min(fdp->fd_nfiles, lim);
606 		if ((i = want) < fdp->fd_freefile)
607 			i = fdp->fd_freefile;
608 		off = i >> NDENTRYSHIFT;
609 		new = find_next_zero(fdp->fd_himap, off,
610 		    (last + NDENTRIES - 1) >> NDENTRYSHIFT);
611 		if (new != -1) {
612 			i = find_next_zero(&fdp->fd_lomap[new],
613 					   new > off ? 0 : i & NDENTRYMASK,
614 					   NDENTRIES);
615 			if (i == -1) {
616 				/* free file descriptor in this block was
617 				 * below want, try again with higher want.
618 				 */
619 				want = (new + 1) << NDENTRYSHIFT;
620 				continue;
621 			}
622 			i += (new << NDENTRYSHIFT);
623 			if (i < last) {
624 				fd_used(fdp, i);
625 				if (want <= fdp->fd_freefile)
626 					fdp->fd_freefile = i;
627 				*result = i;
628 				return (0);
629 			}
630 		}
631 
632 		/*
633 		 * No space in current array.  Expand?
634 		 */
635 		if (fdp->fd_nfiles >= lim)
636 			return (EMFILE);
637 		if (fdp->fd_nfiles < NDEXTENT)
638 			nfiles = NDEXTENT;
639 		else
640 			nfiles = 2 * fdp->fd_nfiles;
641 		nfiles = min(lim, nfiles);
642 		newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
643 		newofileflags = (char *) &newofile[nfiles];
644 
645 		/*
646 		 * Copy the existing ofile and ofileflags arrays
647 		 * and zero the new portion of each array.
648 		 */
649 		bcopy(fdp->fd_ofiles, newofile,
650 			(i = sizeof(struct file *) * fdp->fd_nfiles));
651 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
652 		bcopy(fdp->fd_ofileflags, newofileflags,
653 			(i = sizeof(char) * fdp->fd_nfiles));
654 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
655 
656 		if (fdp->fd_nfiles > NDFILE)
657 			free(fdp->fd_ofiles, M_FILEDESC);
658 
659 		if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
660 			newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int),
661 			    M_FILEDESC, M_WAITOK);
662 			newlomap = malloc( NDLOSLOTS(nfiles) * sizeof(u_int),
663 			    M_FILEDESC, M_WAITOK);
664 
665 			bcopy(fdp->fd_himap, newhimap,
666 			    (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)));
667 			bzero((char *)newhimap + i,
668 			    NDHISLOTS(nfiles) * sizeof(u_int) - i);
669 
670 			bcopy(fdp->fd_lomap, newlomap,
671 			    (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)));
672 			bzero((char *)newlomap + i,
673 			    NDLOSLOTS(nfiles) * sizeof(u_int) - i);
674 
675 			if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
676 				free(fdp->fd_himap, M_FILEDESC);
677 				free(fdp->fd_lomap, M_FILEDESC);
678 			}
679 			fdp->fd_himap = newhimap;
680 			fdp->fd_lomap = newlomap;
681 		}
682 		fdp->fd_ofiles = newofile;
683 		fdp->fd_ofileflags = newofileflags;
684 		fdp->fd_nfiles = nfiles;
685 		fdexpand++;
686 	}
687 }
688 
689 /*
690  * Check to see whether n user file descriptors
691  * are available to the process p.
692  */
693 int
694 fdavail(p, n)
695 	struct proc *p;
696 	register int n;
697 {
698 	register struct filedesc *fdp = p->p_fd;
699 	register struct file **fpp;
700 	register int i, lim;
701 
702 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
703 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
704 		return (1);
705 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
706 	for (i = min(lim, fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
707 		if (*fpp == NULL && --n <= 0)
708 			return (1);
709 	return (0);
710 }
711 
712 /*
713  * Create a new open file structure and allocate
714  * a file decriptor for the process that refers to it.
715  */
716 int
717 falloc(p, resultfp, resultfd)
718 	register struct proc *p;
719 	struct file **resultfp;
720 	int *resultfd;
721 {
722 	register struct file *fp, *fq;
723 	int error, i;
724 
725 	if ((error = fdalloc(p, 0, &i)) != 0)
726 		return (error);
727 	if (nfiles >= maxfiles) {
728 		fd_unused(p->p_fd, i);
729 		tablefull("file");
730 		return (ENFILE);
731 	}
732 	/*
733 	 * Allocate a new file descriptor.
734 	 * If the process has file descriptor zero open, add to the list
735 	 * of open files at that point, otherwise put it at the front of
736 	 * the list of open files.
737 	 */
738 	nfiles++;
739 	fp = pool_get(&file_pool, PR_WAITOK);
740 	bzero(fp, sizeof(struct file));
741 	if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
742 		LIST_INSERT_AFTER(fq, fp, f_list);
743 	} else {
744 		LIST_INSERT_HEAD(&filehead, fp, f_list);
745 	}
746 	p->p_fd->fd_ofiles[i] = fp;
747 	fp->f_count = 1;
748 	fp->f_cred = p->p_ucred;
749 	crhold(fp->f_cred);
750 	if (resultfp)
751 		*resultfp = fp;
752 	if (resultfd)
753 		*resultfd = i;
754 	return (0);
755 }
756 
757 /*
758  * Free a file descriptor.
759  */
760 void
761 ffree(fp)
762 	register struct file *fp;
763 {
764 	LIST_REMOVE(fp, f_list);
765 	crfree(fp->f_cred);
766 #ifdef DIAGNOSTIC
767 	fp->f_count = 0;
768 #endif
769 	nfiles--;
770 	pool_put(&file_pool, fp);
771 }
772 
773 /*
774  * Build a new filedesc structure.
775  */
776 struct filedesc *
777 fdinit(p)
778 	struct proc *p;
779 {
780 	register struct filedesc0 *newfdp;
781 	register struct filedesc *fdp = p->p_fd;
782 	extern int cmask;
783 
784 	newfdp = pool_get(&fdesc_pool, PR_WAITOK);
785 	bzero(newfdp, sizeof(struct filedesc0));
786 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
787 	VREF(newfdp->fd_fd.fd_cdir);
788 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
789 	if (newfdp->fd_fd.fd_rdir)
790 		VREF(newfdp->fd_fd.fd_rdir);
791 
792 	/* Create the file descriptor table. */
793 	newfdp->fd_fd.fd_refcnt = 1;
794 	newfdp->fd_fd.fd_cmask = cmask;
795 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
796 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
797 	newfdp->fd_fd.fd_nfiles = NDFILE;
798 	newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
799 	newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
800 	newfdp->fd_fd.fd_knlistsize = -1;
801 
802 	newfdp->fd_fd.fd_freefile = 0;
803 	newfdp->fd_fd.fd_lastfile = 0;
804 
805 	return (&newfdp->fd_fd);
806 }
807 
808 /*
809  * Share a filedesc structure.
810  */
811 struct filedesc *
812 fdshare(p)
813 	struct proc *p;
814 {
815 	p->p_fd->fd_refcnt++;
816 	return (p->p_fd);
817 }
818 
819 /*
820  * Copy a filedesc structure.
821  */
822 struct filedesc *
823 fdcopy(p)
824 	struct proc *p;
825 {
826 	struct filedesc *newfdp, *fdp = p->p_fd;
827 	struct file **fpp;
828 	int i;
829 
830 	newfdp = pool_get(&fdesc_pool, PR_WAITOK);
831 	bcopy(fdp, newfdp, sizeof(struct filedesc));
832 	VREF(newfdp->fd_cdir);
833 	if (newfdp->fd_rdir)
834 		VREF(newfdp->fd_rdir);
835 	newfdp->fd_refcnt = 1;
836 
837 	/*
838 	 * If the number of open files fits in the internal arrays
839 	 * of the open file structure, use them, otherwise allocate
840 	 * additional memory for the number of descriptors currently
841 	 * in use.
842 	 */
843 	if (newfdp->fd_lastfile < NDFILE) {
844 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
845 		newfdp->fd_ofileflags =
846 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
847 		i = NDFILE;
848 	} else {
849 		/*
850 		 * Compute the smallest multiple of NDEXTENT needed
851 		 * for the file descriptors currently in use,
852 		 * allowing the table to shrink.
853 		 */
854 		i = newfdp->fd_nfiles;
855 		while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
856 			i /= 2;
857 		newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
858 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
859 	}
860 	if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
861 		newfdp->fd_himap =
862 			((struct filedesc0 *) newfdp)->fd_dhimap;
863 		newfdp->fd_lomap =
864 			((struct filedesc0 *) newfdp)->fd_dlomap;
865 	} else {
866 		newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int),
867 		    M_FILEDESC, M_WAITOK);
868 		newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int),
869 		    M_FILEDESC, M_WAITOK);
870 	}
871 	newfdp->fd_nfiles = i;
872 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
873 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
874 	bcopy(fdp->fd_himap, newfdp->fd_himap, NDHISLOTS(i) * sizeof(u_int));
875 	bcopy(fdp->fd_lomap, newfdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int));
876 
877 	/*
878 	 * kq descriptors cannot be copied.
879 	 */
880 	if (newfdp->fd_knlistsize != -1) {
881 		fpp = newfdp->fd_ofiles;
882 		for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++)
883 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
884 				fdremove(newfdp, i);
885 		newfdp->fd_knlist = NULL;
886 		newfdp->fd_knlistsize = -1;
887 		newfdp->fd_knhash = NULL;
888 		newfdp->fd_knhashmask = 0;
889 	}
890 
891 	fpp = newfdp->fd_ofiles;
892 	for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++)
893 		if (*fpp != NULL) {
894 			/*
895 			 * XXX Gruesome hack. If count gets too high, fail
896 			 * to copy an fd, since fdcopy()'s callers do not
897 			 * permit it to indicate failure yet.
898 			 */
899 			if ((*fpp)->f_count == LONG_MAX-2)
900 				fdremove(newfdp, i);
901 			else
902 				(*fpp)->f_count++;
903 		}
904 	return (newfdp);
905 }
906 
907 /*
908  * Release a filedesc structure.
909  */
910 void
911 fdfree(p)
912 	struct proc *p;
913 {
914 	register struct filedesc *fdp = p->p_fd;
915 	register struct file **fpp, *fp;
916 	register int i;
917 
918 	if (--fdp->fd_refcnt > 0)
919 		return;
920 	fpp = fdp->fd_ofiles;
921 	for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
922 		fp = *fpp;
923 		if (fp != NULL) {
924 			*fpp = NULL;
925 			(void) closef(fp, p);
926 		}
927 	}
928 	p->p_fd = NULL;
929 	if (fdp->fd_nfiles > NDFILE)
930 		free(fdp->fd_ofiles, M_FILEDESC);
931 	if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
932 		free(fdp->fd_himap, M_FILEDESC);
933 		free(fdp->fd_lomap, M_FILEDESC);
934 	}
935 	vrele(fdp->fd_cdir);
936 	if (fdp->fd_rdir)
937 		vrele(fdp->fd_rdir);
938 	if (fdp->fd_knlist)
939 		FREE(fdp->fd_knlist, M_TEMP);
940 	if (fdp->fd_knhash)
941 		FREE(fdp->fd_knhash, M_TEMP);
942 	pool_put(&fdesc_pool, fdp);
943 }
944 
945 /*
946  * Internal form of close.
947  * Decrement reference count on file structure.
948  * Note: p may be NULL when closing a file
949  * that was being passed in a message.
950  */
951 int
952 closef(fp, p)
953 	register struct file *fp;
954 	register struct proc *p;
955 {
956 	struct vnode *vp;
957 	struct flock lf;
958 	int error;
959 
960 	if (fp == NULL)
961 		return (0);
962 	/*
963 	 * POSIX record locking dictates that any close releases ALL
964 	 * locks owned by this process.  This is handled by setting
965 	 * a flag in the unlock to free ONLY locks obeying POSIX
966 	 * semantics, and not to free BSD-style file locks.
967 	 * If the descriptor was in a message, POSIX-style locks
968 	 * aren't passed with the descriptor.
969 	 */
970 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
971 		lf.l_whence = SEEK_SET;
972 		lf.l_start = 0;
973 		lf.l_len = 0;
974 		lf.l_type = F_UNLCK;
975 		vp = (struct vnode *)fp->f_data;
976 		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
977 	}
978 	if (--fp->f_count > 0)
979 		return (0);
980 	if (fp->f_count < 0)
981 		panic("closef: count < 0");
982 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
983 		lf.l_whence = SEEK_SET;
984 		lf.l_start = 0;
985 		lf.l_len = 0;
986 		lf.l_type = F_UNLCK;
987 		vp = (struct vnode *)fp->f_data;
988 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
989 	}
990 	if (fp->f_ops)
991 		error = (*fp->f_ops->fo_close)(fp, p);
992 	else
993 		error = 0;
994 	ffree(fp);
995 	return (error);
996 }
997 
998 /*
999  * Apply an advisory lock on a file descriptor.
1000  *
1001  * Just attempt to get a record lock of the requested type on
1002  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1003  */
1004 /* ARGSUSED */
1005 int
1006 sys_flock(p, v, retval)
1007 	struct proc *p;
1008 	void *v;
1009 	register_t *retval;
1010 {
1011 	register struct sys_flock_args /* {
1012 		syscallarg(int) fd;
1013 		syscallarg(int) how;
1014 	} */ *uap = v;
1015 	int fd = SCARG(uap, fd);
1016 	int how = SCARG(uap, how);
1017 	register struct filedesc *fdp = p->p_fd;
1018 	register struct file *fp;
1019 	struct vnode *vp;
1020 	struct flock lf;
1021 
1022 	if ((u_int)fd >= fdp->fd_nfiles ||
1023 	    (fp = fdp->fd_ofiles[fd]) == NULL)
1024 		return (EBADF);
1025 	if (fp->f_type != DTYPE_VNODE)
1026 		return (EOPNOTSUPP);
1027 	vp = (struct vnode *)fp->f_data;
1028 	lf.l_whence = SEEK_SET;
1029 	lf.l_start = 0;
1030 	lf.l_len = 0;
1031 	if (how & LOCK_UN) {
1032 		lf.l_type = F_UNLCK;
1033 		fp->f_flag &= ~FHASLOCK;
1034 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1035 	}
1036 	if (how & LOCK_EX)
1037 		lf.l_type = F_WRLCK;
1038 	else if (how & LOCK_SH)
1039 		lf.l_type = F_RDLCK;
1040 	else
1041 		return (EINVAL);
1042 	fp->f_flag |= FHASLOCK;
1043 	if (how & LOCK_NB)
1044 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1045 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1046 }
1047 
1048 /*
1049  * File Descriptor pseudo-device driver (/dev/fd/).
1050  *
1051  * Opening minor device N dup()s the file (if any) connected to file
1052  * descriptor N belonging to the calling process.  Note that this driver
1053  * consists of only the ``open()'' routine, because all subsequent
1054  * references to this file will be direct to the other driver.
1055  */
1056 /* ARGSUSED */
1057 int
1058 filedescopen(dev, mode, type, p)
1059 	dev_t dev;
1060 	int mode, type;
1061 	struct proc *p;
1062 {
1063 
1064 	/*
1065 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1066 	 * the file descriptor being sought for duplication. The error
1067 	 * return ensures that the vnode for this device will be released
1068 	 * by vn_open. Open will detect this special error and take the
1069 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1070 	 * will simply report the error.
1071 	 */
1072 	p->p_dupfd = minor(dev);
1073 	return (ENODEV);
1074 }
1075 
1076 /*
1077  * Duplicate the specified descriptor to a free descriptor.
1078  */
1079 int
1080 dupfdopen(fdp, indx, dfd, mode, error)
1081 	register struct filedesc *fdp;
1082 	register int indx, dfd;
1083 	int mode;
1084 	int error;
1085 {
1086 	register struct file *wfp;
1087 	struct file *fp;
1088 
1089 	/*
1090 	 * If the to-be-dup'd fd number is greater than the allowed number
1091 	 * of file descriptors, or the fd to be dup'd has already been
1092 	 * closed, reject.  Note, check for new == old is necessary as
1093 	 * falloc could allocate an already closed to-be-dup'd descriptor
1094 	 * as the new descriptor.
1095 	 */
1096 	fp = fdp->fd_ofiles[indx];
1097 	if ((u_int)dfd >= fdp->fd_nfiles ||
1098 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1099 		return (EBADF);
1100 
1101 	/*
1102 	 * There are two cases of interest here.
1103 	 *
1104 	 * For ENODEV simply dup (dfd) to file descriptor
1105 	 * (indx) and return.
1106 	 *
1107 	 * For ENXIO steal away the file structure from (dfd) and
1108 	 * store it in (indx).  (dfd) is effectively closed by
1109 	 * this operation.
1110 	 *
1111 	 * Any other error code is just returned.
1112 	 */
1113 	switch (error) {
1114 	case ENODEV:
1115 		/*
1116 		 * Check that the mode the file is being opened for is a
1117 		 * subset of the mode of the existing descriptor.
1118 		 */
1119 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1120 			return (EACCES);
1121 		if (wfp->f_count == LONG_MAX-2)
1122 			return (EDEADLK);
1123 		fdp->fd_ofiles[indx] = wfp;
1124 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1125 		wfp->f_count++;
1126 		fd_used(fdp, indx);
1127 		return (0);
1128 
1129 	case ENXIO:
1130 		/*
1131 		 * Steal away the file pointer from dfd, and stuff it into indx.
1132 		 */
1133 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1134 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1135 		fdp->fd_ofiles[dfd] = NULL;
1136 		fdp->fd_ofileflags[dfd] = 0;
1137 		/*
1138 		 * Complete the clean up of the filedesc structure by
1139 		 * recomputing the various hints.
1140 		 */
1141 		fd_used(fdp, indx);
1142 		fd_unused(fdp, dfd);
1143 		return (0);
1144 
1145 	default:
1146 		return (error);
1147 	}
1148 	/* NOTREACHED */
1149 }
1150 
1151 /*
1152  * Close any files on exec?
1153  */
1154 void
1155 fdcloseexec(p)
1156 	struct proc *p;
1157 {
1158 	register struct filedesc *fdp = p->p_fd;
1159 	register int fd;
1160 
1161 	for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1162 		if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1163 			(void) fdrelease(p, fd);
1164 }
1165