xref: /netbsd-src/sys/kern/sys_generic.c (revision dc306354b0b29af51801a7632f1e95265a68cd81)
1 /*	$NetBSD: sys_generic.c,v 1.44 1998/08/04 04:03:15 perry Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)sys_generic.c	8.9 (Berkeley) 2/14/95
41  */
42 
43 #include "opt_ktrace.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61 
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64 
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67 
68 /*
69  * Read system call.
70  */
71 /* ARGSUSED */
72 int
73 sys_read(p, v, retval)
74 	struct proc *p;
75 	void *v;
76 	register_t *retval;
77 {
78 	register struct sys_read_args /* {
79 		syscallarg(int) fd;
80 		syscallarg(void *) buf;
81 		syscallarg(size_t) nbyte;
82 	} */ *uap = v;
83 	int fd = SCARG(uap, fd);
84 	register struct file *fp;
85 	register struct filedesc *fdp = p->p_fd;
86 
87 	if ((u_int)fd >= fdp->fd_nfiles ||
88 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
89 	    (fp->f_flag & FREAD) == 0)
90 		return (EBADF);
91 
92 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
93 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
94 }
95 
96 int
97 dofileread(p, fd, fp, buf, nbyte, offset, flags, retval)
98 	struct proc *p;
99 	int fd;
100 	struct file *fp;
101 	void *buf;
102 	size_t nbyte;
103 	off_t *offset;
104 	int flags;
105 	register_t *retval;
106 {
107 	struct uio auio;
108 	struct iovec aiov;
109 	long cnt, error = 0;
110 #ifdef KTRACE
111 	struct iovec ktriov;
112 #endif
113 
114 	aiov.iov_base = (caddr_t)buf;
115 	aiov.iov_len = nbyte;
116 	auio.uio_iov = &aiov;
117 	auio.uio_iovcnt = 1;
118 	auio.uio_resid = nbyte;
119 	auio.uio_rw = UIO_READ;
120 	auio.uio_segflg = UIO_USERSPACE;
121 	auio.uio_procp = p;
122 
123 	/*
124 	 * Reads return ssize_t because -1 is returned on error.  Therefore
125 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
126 	 * values.
127 	 */
128 	if (auio.uio_resid > SSIZE_MAX)
129 		return (EINVAL);
130 
131 #ifdef KTRACE
132 	/*
133 	 * if tracing, save a copy of iovec
134 	 */
135 	if (KTRPOINT(p, KTR_GENIO))
136 		ktriov = aiov;
137 #endif
138 	cnt = auio.uio_resid;
139 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
140 	if (error)
141 		if (auio.uio_resid != cnt && (error == ERESTART ||
142 		    error == EINTR || error == EWOULDBLOCK))
143 			error = 0;
144 	cnt -= auio.uio_resid;
145 #ifdef KTRACE
146 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
147 		ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error);
148 #endif
149 	*retval = cnt;
150 	return (error);
151 }
152 
153 /*
154  * Scatter read system call.
155  */
156 int
157 sys_readv(p, v, retval)
158 	struct proc *p;
159 	void *v;
160 	register_t *retval;
161 {
162 	register struct sys_readv_args /* {
163 		syscallarg(int) fd;
164 		syscallarg(const struct iovec *) iovp;
165 		syscallarg(int) iovcnt;
166 	} */ *uap = v;
167 	int fd = SCARG(uap, fd);
168 	register struct file *fp;
169 	register struct filedesc *fdp = p->p_fd;
170 
171 	if ((u_int)fd >= fdp->fd_nfiles ||
172 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
173 	    (fp->f_flag & FREAD) == 0)
174 		return (EBADF);
175 
176 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
177 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
178 }
179 
180 int
181 dofilereadv(p, fd, fp, iovp, iovcnt, offset, flags, retval)
182 	struct proc *p;
183 	int fd;
184 	struct file *fp;
185 	const struct iovec *iovp;
186 	int iovcnt;
187 	off_t *offset;
188 	int flags;
189 	register_t *retval;
190 {
191 	struct uio auio;
192 	register struct iovec *iov;
193 	struct iovec *needfree;
194 	struct iovec aiov[UIO_SMALLIOV];
195 	long i, cnt, error = 0;
196 	u_int iovlen;
197 #ifdef KTRACE
198 	struct iovec *ktriov = NULL;
199 #endif
200 
201 	/* note: can't use iovlen until iovcnt is validated */
202 	iovlen = iovcnt * sizeof(struct iovec);
203 	if ((u_int)iovcnt > UIO_SMALLIOV) {
204 		if ((u_int)iovcnt > IOV_MAX)
205 			return (EINVAL);
206 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
207 		needfree = iov;
208 	} else if ((u_int)iovcnt > 0) {
209 		iov = aiov;
210 		needfree = NULL;
211 	} else
212 		return (EINVAL);
213 
214 	auio.uio_iov = iov;
215 	auio.uio_iovcnt = iovcnt;
216 	auio.uio_rw = UIO_READ;
217 	auio.uio_segflg = UIO_USERSPACE;
218 	auio.uio_procp = p;
219 	error = copyin(iovp, iov, iovlen);
220 	if (error)
221 		goto done;
222 	auio.uio_resid = 0;
223 	for (i = 0; i < iovcnt; i++) {
224 		auio.uio_resid += iov->iov_len;
225 		/*
226 		 * Reads return ssize_t because -1 is returned on error.
227 		 * Therefore we must restrict the length to SSIZE_MAX to
228 		 * avoid garbage return values.
229 		 */
230 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
231 			error = EINVAL;
232 			goto done;
233 		}
234 		iov++;
235 	}
236 #ifdef KTRACE
237 	/*
238 	 * if tracing, save a copy of iovec
239 	 */
240 	if (KTRPOINT(p, KTR_GENIO))  {
241 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
242 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
243 	}
244 #endif
245 	cnt = auio.uio_resid;
246 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
247 	if (error)
248 		if (auio.uio_resid != cnt && (error == ERESTART ||
249 		    error == EINTR || error == EWOULDBLOCK))
250 			error = 0;
251 	cnt -= auio.uio_resid;
252 #ifdef KTRACE
253 	if (KTRPOINT(p, KTR_GENIO))
254 		if (error == 0) {
255 			ktrgenio(p->p_tracep, fd, UIO_READ, ktriov, cnt,
256 			    error);
257 		FREE(ktriov, M_TEMP);
258 	}
259 #endif
260 	*retval = cnt;
261 done:
262 	if (needfree)
263 		FREE(needfree, M_IOV);
264 	return (error);
265 }
266 
267 /*
268  * Write system call
269  */
270 int
271 sys_write(p, v, retval)
272 	struct proc *p;
273 	void *v;
274 	register_t *retval;
275 {
276 	register struct sys_write_args /* {
277 		syscallarg(int) fd;
278 		syscallarg(const void *) buf;
279 		syscallarg(size_t) nbyte;
280 	} */ *uap = v;
281 	int fd = SCARG(uap, fd);
282 	register struct file *fp;
283 	register struct filedesc *fdp = p->p_fd;
284 
285 	if ((u_int)fd >= fdp->fd_nfiles ||
286 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
287 	    (fp->f_flag & FWRITE) == 0)
288 		return (EBADF);
289 
290 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
291 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
292 }
293 
294 int
295 dofilewrite(p, fd, fp, buf, nbyte, offset, flags, retval)
296 	struct proc *p;
297 	int fd;
298 	struct file *fp;
299 	const void *buf;
300 	size_t nbyte;
301 	off_t *offset;
302 	int flags;
303 	register_t *retval;
304 {
305 	struct uio auio;
306 	struct iovec aiov;
307 	long cnt, error = 0;
308 #ifdef KTRACE
309 	struct iovec ktriov;
310 #endif
311 
312 	aiov.iov_base = (caddr_t)buf;		/* XXX kills const */
313 	aiov.iov_len = nbyte;
314 	auio.uio_iov = &aiov;
315 	auio.uio_iovcnt = 1;
316 	auio.uio_resid = nbyte;
317 	auio.uio_rw = UIO_WRITE;
318 	auio.uio_segflg = UIO_USERSPACE;
319 	auio.uio_procp = p;
320 
321 	/*
322 	 * Writes return ssize_t because -1 is returned on error.  Therefore
323 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
324 	 * values.
325 	 */
326 	if (auio.uio_resid > SSIZE_MAX)
327 		return (EINVAL);
328 
329 #ifdef KTRACE
330 	/*
331 	 * if tracing, save a copy of iovec
332 	 */
333 	if (KTRPOINT(p, KTR_GENIO))
334 		ktriov = aiov;
335 #endif
336 	cnt = auio.uio_resid;
337 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
338 	if (error) {
339 		if (auio.uio_resid != cnt && (error == ERESTART ||
340 		    error == EINTR || error == EWOULDBLOCK))
341 			error = 0;
342 		if (error == EPIPE)
343 			psignal(p, SIGPIPE);
344 	}
345 	cnt -= auio.uio_resid;
346 #ifdef KTRACE
347 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
348 		ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktriov, cnt, error);
349 #endif
350 	*retval = cnt;
351 	return (error);
352 }
353 
354 /*
355  * Gather write system call
356  */
357 int
358 sys_writev(p, v, retval)
359 	struct proc *p;
360 	void *v;
361 	register_t *retval;
362 {
363 	register struct sys_writev_args /* {
364 		syscallarg(int) fd;
365 		syscallarg(const struct iovec *) iovp;
366 		syscallarg(int) iovcnt;
367 	} */ *uap = v;
368 	int fd = SCARG(uap, fd);
369 	register struct file *fp;
370 	register struct filedesc *fdp = p->p_fd;
371 
372 	if ((u_int)fd >= fdp->fd_nfiles ||
373 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
374 	    (fp->f_flag & FWRITE) == 0)
375 		return (EBADF);
376 
377 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
378 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
379 }
380 
381 int
382 dofilewritev(p, fd, fp, iovp, iovcnt, offset, flags, retval)
383 	struct proc *p;
384 	int fd;
385 	struct file *fp;
386 	const struct iovec *iovp;
387 	int iovcnt;
388 	off_t *offset;
389 	int flags;
390 	register_t *retval;
391 {
392 	struct uio auio;
393 	register struct iovec *iov;
394 	struct iovec *needfree;
395 	struct iovec aiov[UIO_SMALLIOV];
396 	long i, cnt, error = 0;
397 	u_int iovlen;
398 #ifdef KTRACE
399 	struct iovec *ktriov = NULL;
400 #endif
401 
402 	/* note: can't use iovlen until iovcnt is validated */
403 	iovlen = iovcnt * sizeof(struct iovec);
404 	if ((u_int)iovcnt > UIO_SMALLIOV) {
405 		if ((u_int)iovcnt > IOV_MAX)
406 			return (EINVAL);
407 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
408 		needfree = iov;
409 	} else if ((u_int)iovcnt > 0) {
410 		iov = aiov;
411 		needfree = NULL;
412 	} else
413 		return (EINVAL);
414 
415 	auio.uio_iov = iov;
416 	auio.uio_iovcnt = iovcnt;
417 	auio.uio_rw = UIO_WRITE;
418 	auio.uio_segflg = UIO_USERSPACE;
419 	auio.uio_procp = p;
420 	error = copyin(iovp, iov, iovlen);
421 	if (error)
422 		goto done;
423 	auio.uio_resid = 0;
424 	for (i = 0; i < iovcnt; i++) {
425 		auio.uio_resid += iov->iov_len;
426 		/*
427 		 * Writes return ssize_t because -1 is returned on error.
428 		 * Therefore we must restrict the length to SSIZE_MAX to
429 		 * avoid garbage return values.
430 		 */
431 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
432 			error = EINVAL;
433 			goto done;
434 		}
435 		iov++;
436 	}
437 #ifdef KTRACE
438 	/*
439 	 * if tracing, save a copy of iovec
440 	 */
441 	if (KTRPOINT(p, KTR_GENIO))  {
442 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
443 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
444 	}
445 #endif
446 	cnt = auio.uio_resid;
447 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
448 	if (error) {
449 		if (auio.uio_resid != cnt && (error == ERESTART ||
450 		    error == EINTR || error == EWOULDBLOCK))
451 			error = 0;
452 		if (error == EPIPE)
453 			psignal(p, SIGPIPE);
454 	}
455 	cnt -= auio.uio_resid;
456 #ifdef KTRACE
457 	if (KTRPOINT(p, KTR_GENIO))
458 		if (error == 0) {
459 			ktrgenio(p->p_tracep, fd, UIO_WRITE, ktriov, cnt,
460 			    error);
461 		FREE(ktriov, M_TEMP);
462 	}
463 #endif
464 	*retval = cnt;
465 done:
466 	if (needfree)
467 		FREE(needfree, M_IOV);
468 	return (error);
469 }
470 
471 /*
472  * Ioctl system call
473  */
474 /* ARGSUSED */
475 int
476 sys_ioctl(p, v, retval)
477 	struct proc *p;
478 	void *v;
479 	register_t *retval;
480 {
481 	register struct sys_ioctl_args /* {
482 		syscallarg(int) fd;
483 		syscallarg(u_long) com;
484 		syscallarg(caddr_t) data;
485 	} */ *uap = v;
486 	register struct file *fp;
487 	register struct filedesc *fdp;
488 	register u_long com;
489 	register int error;
490 	register u_int size;
491 	caddr_t data, memp;
492 	int tmp;
493 #define STK_PARAMS	128
494 	char stkbuf[STK_PARAMS];
495 
496 	fdp = p->p_fd;
497 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
498 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
499 		return (EBADF);
500 
501 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
502 		return (EBADF);
503 
504 	switch (com = SCARG(uap, com)) {
505 	case FIONCLEX:
506 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
507 		return (0);
508 	case FIOCLEX:
509 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
510 		return (0);
511 	}
512 
513 	/*
514 	 * Interpret high order word to find amount of data to be
515 	 * copied to/from the user's address space.
516 	 */
517 	size = IOCPARM_LEN(com);
518 	if (size > IOCPARM_MAX)
519 		return (ENOTTY);
520 	memp = NULL;
521 	if (size > sizeof(stkbuf)) {
522 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
523 		data = memp;
524 	} else
525 		data = stkbuf;
526 	if (com&IOC_IN) {
527 		if (size) {
528 			error = copyin(SCARG(uap, data), data, size);
529 			if (error) {
530 				if (memp)
531 					free(memp, M_IOCTLOPS);
532 				return (error);
533 			}
534 		} else
535 			*(caddr_t *)data = SCARG(uap, data);
536 	} else if ((com&IOC_OUT) && size)
537 		/*
538 		 * Zero the buffer so the user always
539 		 * gets back something deterministic.
540 		 */
541 		memset(data, 0, size);
542 	else if (com&IOC_VOID)
543 		*(caddr_t *)data = SCARG(uap, data);
544 
545 	switch (com) {
546 
547 	case FIONBIO:
548 		if ((tmp = *(int *)data) != 0)
549 			fp->f_flag |= FNONBLOCK;
550 		else
551 			fp->f_flag &= ~FNONBLOCK;
552 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
553 		break;
554 
555 	case FIOASYNC:
556 		if ((tmp = *(int *)data) != 0)
557 			fp->f_flag |= FASYNC;
558 		else
559 			fp->f_flag &= ~FASYNC;
560 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
561 		break;
562 
563 	case FIOSETOWN:
564 		tmp = *(int *)data;
565 		if (fp->f_type == DTYPE_SOCKET) {
566 			((struct socket *)fp->f_data)->so_pgid = tmp;
567 			error = 0;
568 			break;
569 		}
570 		if (tmp <= 0) {
571 			tmp = -tmp;
572 		} else {
573 			struct proc *p1 = pfind(tmp);
574 			if (p1 == 0) {
575 				error = ESRCH;
576 				break;
577 			}
578 			tmp = p1->p_pgrp->pg_id;
579 		}
580 		error = (*fp->f_ops->fo_ioctl)
581 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
582 		break;
583 
584 	case FIOGETOWN:
585 		if (fp->f_type == DTYPE_SOCKET) {
586 			error = 0;
587 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
588 			break;
589 		}
590 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
591 		*(int *)data = -*(int *)data;
592 		break;
593 
594 	default:
595 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
596 		/*
597 		 * Copy any data to user, size was
598 		 * already set and checked above.
599 		 */
600 		if (error == 0 && (com&IOC_OUT) && size)
601 			error = copyout(data, SCARG(uap, data), size);
602 		break;
603 	}
604 	if (memp)
605 		free(memp, M_IOCTLOPS);
606 	return (error);
607 }
608 
609 int	selwait, nselcoll;
610 
611 /*
612  * Select system call.
613  */
614 int
615 sys_select(p, v, retval)
616 	register struct proc *p;
617 	void *v;
618 	register_t *retval;
619 {
620 	register struct sys_select_args /* {
621 		syscallarg(int) nd;
622 		syscallarg(fd_set *) in;
623 		syscallarg(fd_set *) ou;
624 		syscallarg(fd_set *) ex;
625 		syscallarg(struct timeval *) tv;
626 	} */ *uap = v;
627 	caddr_t bits;
628 	char smallbits[howmany(FD_SETSIZE, NFDBITS) * sizeof(fd_mask) * 6];
629 	struct timeval atv;
630 	int s, ncoll, error = 0, timo;
631 	size_t ni;
632 
633 	if (SCARG(uap, nd) < 0)
634 		return (EINVAL);
635 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
636 		/* forgiving; slightly wrong */
637 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
638 	}
639 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
640 	if (ni * 6 > sizeof(smallbits))
641 		bits = malloc(ni * 6, M_TEMP, M_WAITOK);
642 	else
643 		bits = smallbits;
644 
645 #define	getbits(name, x) \
646 	if (SCARG(uap, name)) { \
647 		error = copyin(SCARG(uap, name), bits + ni * x, ni); \
648 		if (error) \
649 			goto done; \
650 	} else \
651 		memset(bits + ni * x, 0, ni);
652 	getbits(in, 0);
653 	getbits(ou, 1);
654 	getbits(ex, 2);
655 #undef	getbits
656 
657 	if (SCARG(uap, tv)) {
658 		error = copyin(SCARG(uap, tv), (caddr_t)&atv,
659 			sizeof(atv));
660 		if (error)
661 			goto done;
662 		if (itimerfix(&atv)) {
663 			error = EINVAL;
664 			goto done;
665 		}
666 		s = splclock();
667 		timeradd(&atv, &time, &atv);
668 		timo = hzto(&atv);
669 		/*
670 		 * Avoid inadvertently sleeping forever.
671 		 */
672 		if (timo == 0)
673 			timo = 1;
674 		splx(s);
675 	} else
676 		timo = 0;
677 retry:
678 	ncoll = nselcoll;
679 	p->p_flag |= P_SELECT;
680 	error = selscan(p, (fd_mask *)(bits + ni * 0),
681 			   (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
682 	if (error || *retval)
683 		goto done;
684 	s = splhigh();
685 	if (timo && timercmp(&time, &atv, >=)) {
686 		splx(s);
687 		goto done;
688 	}
689 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
690 		splx(s);
691 		goto retry;
692 	}
693 	p->p_flag &= ~P_SELECT;
694 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
695 	splx(s);
696 	if (error == 0)
697 		goto retry;
698 done:
699 	p->p_flag &= ~P_SELECT;
700 	/* select is not restarted after signals... */
701 	if (error == ERESTART)
702 		error = EINTR;
703 	if (error == EWOULDBLOCK)
704 		error = 0;
705 	if (error == 0) {
706 #define	putbits(name, x) \
707 		if (SCARG(uap, name)) { \
708 			error = copyout(bits + ni * x, SCARG(uap, name), ni); \
709 			if (error) \
710 				goto out; \
711 		}
712 		putbits(in, 3);
713 		putbits(ou, 4);
714 		putbits(ex, 5);
715 #undef putbits
716 	}
717 out:
718 	if (ni * 6 > sizeof(smallbits))
719 		free(bits, M_TEMP);
720 	return (error);
721 }
722 
723 int
724 selscan(p, ibitp, obitp, nfd, retval)
725 	struct proc *p;
726 	fd_mask *ibitp, *obitp;
727 	int nfd;
728 	register_t *retval;
729 {
730 	register struct filedesc *fdp = p->p_fd;
731 	register int msk, i, j, fd;
732 	register fd_mask ibits, obits;
733 	struct file *fp;
734 	int n = 0;
735 	static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
736 			       POLLWRNORM | POLLHUP | POLLERR,
737 			       POLLRDBAND };
738 
739 	for (msk = 0; msk < 3; msk++) {
740 		for (i = 0; i < nfd; i += NFDBITS) {
741 			ibits = *ibitp++;
742 			obits = 0;
743 			while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
744 				ibits &= ~(1 << j);
745 				fp = fdp->fd_ofiles[fd];
746 				if (fp == NULL)
747 					return (EBADF);
748 				if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
749 					obits |= (1 << j);
750 					n++;
751 				}
752 			}
753 			*obitp++ = obits;
754 		}
755 	}
756 	*retval = n;
757 	return (0);
758 }
759 
760 /*
761  * Poll system call.
762  */
763 int
764 sys_poll(p, v, retval)
765 	register struct proc *p;
766 	void *v;
767 	register_t *retval;
768 {
769 	register struct sys_poll_args /* {
770 		syscallarg(struct pollfd *) fds;
771 		syscallarg(u_int) nfds;
772 		syscallarg(int) timeout;
773 	} */ *uap = v;
774 	caddr_t bits;
775 	char smallbits[32 * sizeof(struct pollfd)];
776 	struct timeval atv;
777 	int s, ncoll, error = 0, timo;
778 	size_t ni;
779 
780 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
781 		/* forgiving; slightly wrong */
782 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
783 	}
784 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
785 	if (ni > sizeof(smallbits))
786 		bits = malloc(ni, M_TEMP, M_WAITOK);
787 	else
788 		bits = smallbits;
789 
790 	error = copyin(SCARG(uap, fds), bits, ni);
791 	if (error)
792 		goto done;
793 
794 	if (SCARG(uap, timeout) != INFTIM) {
795 		atv.tv_sec = SCARG(uap, timeout) / 1000;
796 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
797 		if (itimerfix(&atv)) {
798 			error = EINVAL;
799 			goto done;
800 		}
801 		s = splclock();
802 		timeradd(&atv, &time, &atv);
803 		timo = hzto(&atv);
804 		/*
805 		 * Avoid inadvertently sleeping forever.
806 		 */
807 		if (timo == 0)
808 			timo = 1;
809 		splx(s);
810 	} else
811 		timo = 0;
812 retry:
813 	ncoll = nselcoll;
814 	p->p_flag |= P_SELECT;
815 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
816 	if (error || *retval)
817 		goto done;
818 	s = splhigh();
819 	if (timo && timercmp(&time, &atv, >=)) {
820 		splx(s);
821 		goto done;
822 	}
823 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
824 		splx(s);
825 		goto retry;
826 	}
827 	p->p_flag &= ~P_SELECT;
828 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
829 	splx(s);
830 	if (error == 0)
831 		goto retry;
832 done:
833 	p->p_flag &= ~P_SELECT;
834 	/* poll is not restarted after signals... */
835 	if (error == ERESTART)
836 		error = EINTR;
837 	if (error == EWOULDBLOCK)
838 		error = 0;
839 	if (error == 0) {
840 		error = copyout(bits, SCARG(uap, fds), ni);
841 		if (error)
842 			goto out;
843 	}
844 out:
845 	if (ni > sizeof(smallbits))
846 		free(bits, M_TEMP);
847 	return (error);
848 }
849 
850 int
851 pollscan(p, fds, nfd, retval)
852 	struct proc *p;
853 	struct pollfd *fds;
854 	int nfd;
855 	register_t *retval;
856 {
857 	register struct filedesc *fdp = p->p_fd;
858 	int i;
859 	struct file *fp;
860 	int n = 0;
861 
862 	for (i = 0; i < nfd; i++, fds++) {
863 		if ((u_int)fds->fd >= fdp->fd_nfiles) {
864 			fds->revents = POLLNVAL;
865 			n++;
866 		} else {
867 			fp = fdp->fd_ofiles[fds->fd];
868 			if (fp == 0) {
869 				fds->revents = POLLNVAL;
870 				n++;
871 			} else {
872 				fds->revents = (*fp->f_ops->fo_poll)(fp,
873 				    fds->events | POLLERR | POLLHUP, p);
874 				if (fds->revents != 0)
875 					n++;
876 			}
877 		}
878 	}
879 	*retval = n;
880 	return (0);
881 }
882 
883 /*ARGSUSED*/
884 int
885 seltrue(dev, events, p)
886 	dev_t dev;
887 	int events;
888 	struct proc *p;
889 {
890 
891 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
892 }
893 
894 /*
895  * Record a select request.
896  */
897 void
898 selrecord(selector, sip)
899 	struct proc *selector;
900 	struct selinfo *sip;
901 {
902 	struct proc *p;
903 	pid_t mypid;
904 
905 	mypid = selector->p_pid;
906 	if (sip->si_pid == mypid)
907 		return;
908 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
909 	    p->p_wchan == (caddr_t)&selwait)
910 		sip->si_flags |= SI_COLL;
911 	else
912 		sip->si_pid = mypid;
913 }
914 
915 /*
916  * Do a wakeup when a selectable event occurs.
917  */
918 void
919 selwakeup(sip)
920 	register struct selinfo *sip;
921 {
922 	register struct proc *p;
923 	int s;
924 
925 	if (sip->si_pid == 0)
926 		return;
927 	if (sip->si_flags & SI_COLL) {
928 		nselcoll++;
929 		sip->si_flags &= ~SI_COLL;
930 		wakeup((caddr_t)&selwait);
931 	}
932 	p = pfind(sip->si_pid);
933 	sip->si_pid = 0;
934 	if (p != NULL) {
935 		s = splhigh();
936 		if (p->p_wchan == (caddr_t)&selwait) {
937 			if (p->p_stat == SSLEEP)
938 				setrunnable(p);
939 			else
940 				unsleep(p);
941 		} else if (p->p_flag & P_SELECT)
942 			p->p_flag &= ~P_SELECT;
943 		splx(s);
944 	}
945 }
946