xref: /netbsd-src/sys/kern/sys_generic.c (revision 76dfffe33547c37f8bdd446e3e4ab0f3c16cea4b)
1 /*	$NetBSD: sys_generic.c,v 1.30 1996/09/07 21:47:23 mycroft Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/ioctl.h>
47 #include <sys/file.h>
48 #include <sys/proc.h>
49 #include <sys/socketvar.h>
50 #include <sys/signalvar.h>
51 #include <sys/uio.h>
52 #include <sys/kernel.h>
53 #include <sys/stat.h>
54 #include <sys/malloc.h>
55 #include <sys/poll.h>
56 #ifdef KTRACE
57 #include <sys/ktrace.h>
58 #endif
59 
60 #include <sys/mount.h>
61 #include <sys/syscallargs.h>
62 
63 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
64 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
65 
66 /*
67  * Read system call.
68  */
69 /* ARGSUSED */
70 int
71 sys_read(p, v, retval)
72 	struct proc *p;
73 	void *v;
74 	register_t *retval;
75 {
76 	register struct sys_read_args /* {
77 		syscallarg(int) fd;
78 		syscallarg(char *) buf;
79 		syscallarg(u_int) nbyte;
80 	} */ *uap = v;
81 	register struct file *fp;
82 	register struct filedesc *fdp = p->p_fd;
83 	struct uio auio;
84 	struct iovec aiov;
85 	long cnt, error = 0;
86 #ifdef KTRACE
87 	struct iovec ktriov;
88 #endif
89 
90 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
91 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
92 	    (fp->f_flag & FREAD) == 0)
93 		return (EBADF);
94 	aiov.iov_base = (caddr_t)SCARG(uap, buf);
95 	aiov.iov_len = SCARG(uap, nbyte);
96 	auio.uio_iov = &aiov;
97 	auio.uio_iovcnt = 1;
98 	auio.uio_resid = SCARG(uap, nbyte);
99 	auio.uio_rw = UIO_READ;
100 	auio.uio_segflg = UIO_USERSPACE;
101 	auio.uio_procp = p;
102 	if (auio.uio_resid < 0)
103 		return EINVAL;
104 #ifdef KTRACE
105 	/*
106 	 * if tracing, save a copy of iovec
107 	 */
108 	if (KTRPOINT(p, KTR_GENIO))
109 		ktriov = aiov;
110 #endif
111 	cnt = SCARG(uap, nbyte);
112 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
113 	if (error)
114 		if (auio.uio_resid != cnt && (error == ERESTART ||
115 		    error == EINTR || error == EWOULDBLOCK))
116 			error = 0;
117 	cnt -= auio.uio_resid;
118 #ifdef KTRACE
119 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
120 		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, &ktriov,
121 		    cnt, error);
122 #endif
123 	*retval = cnt;
124 	return (error);
125 }
126 
127 /*
128  * Scatter read system call.
129  */
130 int
131 sys_readv(p, v, retval)
132 	struct proc *p;
133 	void *v;
134 	register_t *retval;
135 {
136 	register struct sys_readv_args /* {
137 		syscallarg(int) fd;
138 		syscallarg(struct iovec *) iovp;
139 		syscallarg(u_int) iovcnt;
140 	} */ *uap = v;
141 	register struct file *fp;
142 	register struct filedesc *fdp = p->p_fd;
143 	struct uio auio;
144 	register struct iovec *iov;
145 	struct iovec *needfree;
146 	struct iovec aiov[UIO_SMALLIOV];
147 	long i, cnt, error = 0;
148 	u_int iovlen;
149 #ifdef KTRACE
150 	struct iovec *ktriov = NULL;
151 #endif
152 
153 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
154 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
155 	    (fp->f_flag & FREAD) == 0)
156 		return (EBADF);
157 	/* note: can't use iovlen until iovcnt is validated */
158 	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
159 	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
160 		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
161 			return (EINVAL);
162 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
163 		needfree = iov;
164 	} else {
165 		iov = aiov;
166 		needfree = NULL;
167 	}
168 	auio.uio_iov = iov;
169 	auio.uio_iovcnt = SCARG(uap, iovcnt);
170 	auio.uio_rw = UIO_READ;
171 	auio.uio_segflg = UIO_USERSPACE;
172 	auio.uio_procp = p;
173 	error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen);
174 	if (error)
175 		goto done;
176 	auio.uio_resid = 0;
177 	for (i = 0; i < SCARG(uap, iovcnt); i++) {
178 #if 0
179 		/* Cannot happen iov_len is unsigned */
180 		if (iov->iov_len < 0) {
181 			error = EINVAL;
182 			goto done;
183 		}
184 #endif
185 		auio.uio_resid += iov->iov_len;
186 		if (auio.uio_resid < 0) {
187 			error = EINVAL;
188 			goto done;
189 		}
190 		iov++;
191 	}
192 #ifdef KTRACE
193 	/*
194 	 * if tracing, save a copy of iovec
195 	 */
196 	if (KTRPOINT(p, KTR_GENIO))  {
197 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
198 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
199 	}
200 #endif
201 	cnt = auio.uio_resid;
202 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
203 	if (error)
204 		if (auio.uio_resid != cnt && (error == ERESTART ||
205 		    error == EINTR || error == EWOULDBLOCK))
206 			error = 0;
207 	cnt -= auio.uio_resid;
208 #ifdef KTRACE
209 	if (ktriov != NULL) {
210 		if (error == 0)
211 			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, ktriov,
212 			    cnt, error);
213 		FREE(ktriov, M_TEMP);
214 	}
215 #endif
216 	*retval = cnt;
217 done:
218 	if (needfree)
219 		FREE(needfree, M_IOV);
220 	return (error);
221 }
222 
223 /*
224  * Write system call
225  */
226 int
227 sys_write(p, v, retval)
228 	struct proc *p;
229 	void *v;
230 	register_t *retval;
231 {
232 	register struct sys_write_args /* {
233 		syscallarg(int) fd;
234 		syscallarg(char *) buf;
235 		syscallarg(u_int) nbyte;
236 	} */ *uap = v;
237 	register struct file *fp;
238 	register struct filedesc *fdp = p->p_fd;
239 	struct uio auio;
240 	struct iovec aiov;
241 	long cnt, error = 0;
242 #ifdef KTRACE
243 	struct iovec ktriov;
244 #endif
245 
246 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
247 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
248 	    (fp->f_flag & FWRITE) == 0)
249 		return (EBADF);
250 	aiov.iov_base = (caddr_t)SCARG(uap, buf);
251 	aiov.iov_len = SCARG(uap, nbyte);
252 	auio.uio_iov = &aiov;
253 	auio.uio_iovcnt = 1;
254 	auio.uio_resid = SCARG(uap, nbyte);
255 	auio.uio_rw = UIO_WRITE;
256 	auio.uio_segflg = UIO_USERSPACE;
257 	auio.uio_procp = p;
258 	if (auio.uio_resid < 0)
259 		return EINVAL;
260 #ifdef KTRACE
261 	/*
262 	 * if tracing, save a copy of iovec
263 	 */
264 	if (KTRPOINT(p, KTR_GENIO))
265 		ktriov = aiov;
266 #endif
267 	cnt = SCARG(uap, nbyte);
268 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
269 	if (error) {
270 		if (auio.uio_resid != cnt && (error == ERESTART ||
271 		    error == EINTR || error == EWOULDBLOCK))
272 			error = 0;
273 		if (error == EPIPE)
274 			psignal(p, SIGPIPE);
275 	}
276 	cnt -= auio.uio_resid;
277 #ifdef KTRACE
278 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
279 		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
280 		    &ktriov, cnt, error);
281 #endif
282 	*retval = cnt;
283 	return (error);
284 }
285 
286 /*
287  * Gather write system call
288  */
289 int
290 sys_writev(p, v, retval)
291 	struct proc *p;
292 	void *v;
293 	register_t *retval;
294 {
295 	register struct sys_writev_args /* {
296 		syscallarg(int) fd;
297 		syscallarg(struct iovec *) iovp;
298 		syscallarg(u_int) iovcnt;
299 	} */ *uap = v;
300 	register struct file *fp;
301 	register struct filedesc *fdp = p->p_fd;
302 	struct uio auio;
303 	register struct iovec *iov;
304 	struct iovec *needfree;
305 	struct iovec aiov[UIO_SMALLIOV];
306 	long i, cnt, error = 0;
307 	u_int iovlen;
308 #ifdef KTRACE
309 	struct iovec *ktriov = NULL;
310 #endif
311 
312 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
313 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
314 	    (fp->f_flag & FWRITE) == 0)
315 		return (EBADF);
316 	/* note: can't use iovlen until iovcnt is validated */
317 	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
318 	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
319 		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
320 			return (EINVAL);
321 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
322 		needfree = iov;
323 	} else {
324 		iov = aiov;
325 		needfree = NULL;
326 	}
327 	auio.uio_iov = iov;
328 	auio.uio_iovcnt = SCARG(uap, iovcnt);
329 	auio.uio_rw = UIO_WRITE;
330 	auio.uio_segflg = UIO_USERSPACE;
331 	auio.uio_procp = p;
332 	error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen);
333 	if (error)
334 		goto done;
335 	auio.uio_resid = 0;
336 	for (i = 0; i < SCARG(uap, iovcnt); i++) {
337 #if 0
338 		/* Cannot happen iov_len is unsigned */
339 		if (iov->iov_len < 0) {
340 			error = EINVAL;
341 			goto done;
342 		}
343 #endif
344 		auio.uio_resid += iov->iov_len;
345 		if (auio.uio_resid < 0) {
346 			error = EINVAL;
347 			goto done;
348 		}
349 		iov++;
350 	}
351 #ifdef KTRACE
352 	/*
353 	 * if tracing, save a copy of iovec
354 	 */
355 	if (KTRPOINT(p, KTR_GENIO))  {
356 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
357 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
358 	}
359 #endif
360 	cnt = auio.uio_resid;
361 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
362 	if (error) {
363 		if (auio.uio_resid != cnt && (error == ERESTART ||
364 		    error == EINTR || error == EWOULDBLOCK))
365 			error = 0;
366 		if (error == EPIPE)
367 			psignal(p, SIGPIPE);
368 	}
369 	cnt -= auio.uio_resid;
370 #ifdef KTRACE
371 	if (ktriov != NULL) {
372 		if (error == 0)
373 			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
374 				ktriov, cnt, error);
375 		FREE(ktriov, M_TEMP);
376 	}
377 #endif
378 	*retval = cnt;
379 done:
380 	if (needfree)
381 		FREE(needfree, M_IOV);
382 	return (error);
383 }
384 
385 /*
386  * Ioctl system call
387  */
388 /* ARGSUSED */
389 int
390 sys_ioctl(p, v, retval)
391 	struct proc *p;
392 	void *v;
393 	register_t *retval;
394 {
395 	register struct sys_ioctl_args /* {
396 		syscallarg(int) fd;
397 		syscallarg(u_long) com;
398 		syscallarg(caddr_t) data;
399 	} */ *uap = v;
400 	register struct file *fp;
401 	register struct filedesc *fdp;
402 	register u_long com;
403 	register int error;
404 	register u_int size;
405 	caddr_t data, memp;
406 	int tmp;
407 #define STK_PARAMS	128
408 	char stkbuf[STK_PARAMS];
409 
410 	fdp = p->p_fd;
411 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
412 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
413 		return (EBADF);
414 
415 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
416 		return (EBADF);
417 
418 	switch (com = SCARG(uap, com)) {
419 	case FIONCLEX:
420 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
421 		return (0);
422 	case FIOCLEX:
423 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
424 		return (0);
425 	}
426 
427 	/*
428 	 * Interpret high order word to find amount of data to be
429 	 * copied to/from the user's address space.
430 	 */
431 	size = IOCPARM_LEN(com);
432 	if (size > IOCPARM_MAX)
433 		return (ENOTTY);
434 	memp = NULL;
435 	if (size > sizeof (stkbuf)) {
436 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
437 		data = memp;
438 	} else
439 		data = stkbuf;
440 	if (com&IOC_IN) {
441 		if (size) {
442 			error = copyin(SCARG(uap, data), data, (u_int)size);
443 			if (error) {
444 				if (memp)
445 					free(memp, M_IOCTLOPS);
446 				return (error);
447 			}
448 		} else
449 			*(caddr_t *)data = SCARG(uap, data);
450 	} else if ((com&IOC_OUT) && size)
451 		/*
452 		 * Zero the buffer so the user always
453 		 * gets back something deterministic.
454 		 */
455 		bzero(data, size);
456 	else if (com&IOC_VOID)
457 		*(caddr_t *)data = SCARG(uap, data);
458 
459 	switch (com) {
460 
461 	case FIONBIO:
462 		if ((tmp = *(int *)data) != 0)
463 			fp->f_flag |= FNONBLOCK;
464 		else
465 			fp->f_flag &= ~FNONBLOCK;
466 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
467 		break;
468 
469 	case FIOASYNC:
470 		if ((tmp = *(int *)data) != 0)
471 			fp->f_flag |= FASYNC;
472 		else
473 			fp->f_flag &= ~FASYNC;
474 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
475 		break;
476 
477 	case FIOSETOWN:
478 		tmp = *(int *)data;
479 		if (fp->f_type == DTYPE_SOCKET) {
480 			((struct socket *)fp->f_data)->so_pgid = tmp;
481 			error = 0;
482 			break;
483 		}
484 		if (tmp <= 0) {
485 			tmp = -tmp;
486 		} else {
487 			struct proc *p1 = pfind(tmp);
488 			if (p1 == 0) {
489 				error = ESRCH;
490 				break;
491 			}
492 			tmp = p1->p_pgrp->pg_id;
493 		}
494 		error = (*fp->f_ops->fo_ioctl)
495 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
496 		break;
497 
498 	case FIOGETOWN:
499 		if (fp->f_type == DTYPE_SOCKET) {
500 			error = 0;
501 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
502 			break;
503 		}
504 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
505 		*(int *)data = -*(int *)data;
506 		break;
507 
508 	default:
509 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
510 		/*
511 		 * Copy any data to user, size was
512 		 * already set and checked above.
513 		 */
514 		if (error == 0 && (com&IOC_OUT) && size)
515 			error = copyout(data, SCARG(uap, data), (u_int)size);
516 		break;
517 	}
518 	if (memp)
519 		free(memp, M_IOCTLOPS);
520 	return (error);
521 }
522 
523 int	selwait, nselcoll;
524 
525 /*
526  * Select system call.
527  */
528 int
529 sys_select(p, v, retval)
530 	register struct proc *p;
531 	void *v;
532 	register_t *retval;
533 {
534 	register struct sys_select_args /* {
535 		syscallarg(u_int) nd;
536 		syscallarg(fd_set *) in;
537 		syscallarg(fd_set *) ou;
538 		syscallarg(fd_set *) ex;
539 		syscallarg(struct timeval *) tv;
540 	} */ *uap = v;
541 	caddr_t bits;
542 	char smallbits[howmany(FD_SETSIZE, NFDBITS) * sizeof(fd_mask) * 6];
543 	struct timeval atv;
544 	int s, ncoll, error = 0, timo;
545 	size_t ni;
546 
547 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
548 		/* forgiving; slightly wrong */
549 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
550 	}
551 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
552 	if (ni * 6 > sizeof(smallbits))
553 		bits = malloc(ni * 6, M_TEMP, M_WAITOK);
554 	else
555 		bits = smallbits;
556 
557 #define	getbits(name, x) \
558 	if (SCARG(uap, name)) { \
559 		error = copyin((caddr_t)SCARG(uap, name), bits + ni * x, ni); \
560 		if (error) \
561 			goto done; \
562 	} else \
563 		bzero(bits + ni * x, ni);
564 	getbits(in, 0);
565 	getbits(ou, 1);
566 	getbits(ex, 2);
567 #undef	getbits
568 
569 	if (SCARG(uap, tv)) {
570 		error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
571 			sizeof (atv));
572 		if (error)
573 			goto done;
574 		if (itimerfix(&atv)) {
575 			error = EINVAL;
576 			goto done;
577 		}
578 		s = splclock();
579 		timeradd(&atv, &time, &atv);
580 		timo = hzto(&atv);
581 		/*
582 		 * Avoid inadvertently sleeping forever.
583 		 */
584 		if (timo == 0)
585 			timo = 1;
586 		splx(s);
587 	} else
588 		timo = 0;
589 retry:
590 	ncoll = nselcoll;
591 	p->p_flag |= P_SELECT;
592 	error = selscan(p, (fd_mask *)(bits + ni * 0),
593 			   (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
594 	if (error || *retval)
595 		goto done;
596 	s = splhigh();
597 	if (timo && timercmp(&time, &atv, >=)) {
598 		splx(s);
599 		goto done;
600 	}
601 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
602 		splx(s);
603 		goto retry;
604 	}
605 	p->p_flag &= ~P_SELECT;
606 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
607 	splx(s);
608 	if (error == 0)
609 		goto retry;
610 done:
611 	p->p_flag &= ~P_SELECT;
612 	/* select is not restarted after signals... */
613 	if (error == ERESTART)
614 		error = EINTR;
615 	if (error == EWOULDBLOCK)
616 		error = 0;
617 	if (error == 0) {
618 #define	putbits(name, x) \
619 		if (SCARG(uap, name)) { \
620 			error = copyout(bits + ni * x, (caddr_t)SCARG(uap, name), ni); \
621 			if (error) \
622 				goto out; \
623 		}
624 		putbits(in, 3);
625 		putbits(ou, 4);
626 		putbits(ex, 5);
627 #undef putbits
628 	}
629 out:
630 	if (ni * 6 > sizeof(smallbits))
631 		free(bits, M_TEMP);
632 	return (error);
633 }
634 
635 int
636 selscan(p, ibitp, obitp, nfd, retval)
637 	struct proc *p;
638 	fd_mask *ibitp, *obitp;
639 	int nfd;
640 	register_t *retval;
641 {
642 	register struct filedesc *fdp = p->p_fd;
643 	register int msk, i, j, fd;
644 	register fd_mask ibits, obits;
645 	struct file *fp;
646 	int n = 0;
647 	static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
648 			       POLLWRNORM | POLLHUP | POLLERR,
649 			       POLLRDBAND };
650 
651 	for (msk = 0; msk < 3; msk++) {
652 		for (i = 0; i < nfd; i += NFDBITS) {
653 			ibits = *ibitp++;
654 			obits = 0;
655 			while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
656 				ibits &= ~(1 << j);
657 				fp = fdp->fd_ofiles[fd];
658 				if (fp == NULL)
659 					return (EBADF);
660 				if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
661 					obits |= (1 << j);
662 					n++;
663 				}
664 			}
665 			*obitp++ = obits;
666 		}
667 	}
668 	*retval = n;
669 	return (0);
670 }
671 
672 /*
673  * Poll system call.
674  */
675 int
676 sys_poll(p, v, retval)
677 	register struct proc *p;
678 	void *v;
679 	register_t *retval;
680 {
681 	register struct sys_poll_args /* {
682 		syscallarg(struct pollfd *) fds;
683 		syscallarg(u_int) nfds;
684 		syscallarg(int) timeout;
685 	} */ *uap = v;
686 	caddr_t bits;
687 	char smallbits[32 * sizeof(struct pollfd)];
688 	struct timeval atv;
689 	int s, ncoll, error = 0, timo;
690 	size_t ni;
691 
692 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
693 		/* forgiving; slightly wrong */
694 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
695 	}
696 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
697 	if (ni > sizeof(smallbits))
698 		bits = malloc(ni, M_TEMP, M_WAITOK);
699 	else
700 		bits = smallbits;
701 
702 	error = copyin((caddr_t)SCARG(uap, fds), bits, ni);
703 	if (error)
704 		goto done;
705 
706 	if (SCARG(uap, timeout) != INFTIM) {
707 		atv.tv_sec = SCARG(uap, timeout) / 1000;
708 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
709 		if (itimerfix(&atv)) {
710 			error = EINVAL;
711 			goto done;
712 		}
713 		s = splclock();
714 		timeradd(&atv, &time, &atv);
715 		timo = hzto(&atv);
716 		/*
717 		 * Avoid inadvertently sleeping forever.
718 		 */
719 		if (timo == 0)
720 			timo = 1;
721 		splx(s);
722 	} else
723 		timo = 0;
724 retry:
725 	ncoll = nselcoll;
726 	p->p_flag |= P_SELECT;
727 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
728 	if (error || *retval)
729 		goto done;
730 	s = splhigh();
731 	if (timo && timercmp(&time, &atv, >=)) {
732 		splx(s);
733 		goto done;
734 	}
735 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
736 		splx(s);
737 		goto retry;
738 	}
739 	p->p_flag &= ~P_SELECT;
740 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
741 	splx(s);
742 	if (error == 0)
743 		goto retry;
744 done:
745 	p->p_flag &= ~P_SELECT;
746 	/* poll is not restarted after signals... */
747 	if (error == ERESTART)
748 		error = EINTR;
749 	if (error == EWOULDBLOCK)
750 		error = 0;
751 	if (error == 0) {
752 		error = copyout(bits, (caddr_t)SCARG(uap, fds), ni);
753 		if (error)
754 			goto out;
755 	}
756 out:
757 	if (ni > sizeof(smallbits))
758 		free(bits, M_TEMP);
759 	return (error);
760 }
761 
762 int
763 pollscan(p, fds, nfd, retval)
764 	struct proc *p;
765 	struct pollfd *fds;
766 	int nfd;
767 	register_t *retval;
768 {
769 	register struct filedesc *fdp = p->p_fd;
770 	int i;
771 	struct file *fp;
772 	int n = 0;
773 
774 	for (i = 0; i < nfd; i++, fds++) {
775 		fp = fdp->fd_ofiles[fds->fd];
776 		if (fp == 0) {
777 			fds->revents = POLLNVAL;
778 			n++;
779 		} else {
780 			fds->revents = (*fp->f_ops->fo_poll)(fp,
781 			    fds->events | POLLERR | POLLHUP, p);
782 			if (fds->revents != 0)
783 				n++;
784 		}
785 	}
786 	*retval = n;
787 	return (0);
788 }
789 
790 /*ARGSUSED*/
791 int
792 seltrue(dev, events, p)
793 	dev_t dev;
794 	int events;
795 	struct proc *p;
796 {
797 
798 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
799 }
800 
801 /*
802  * Record a select request.
803  */
804 void
805 selrecord(selector, sip)
806 	struct proc *selector;
807 	struct selinfo *sip;
808 {
809 	struct proc *p;
810 	pid_t mypid;
811 
812 	mypid = selector->p_pid;
813 	if (sip->si_pid == mypid)
814 		return;
815 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
816 	    p->p_wchan == (caddr_t)&selwait)
817 		sip->si_flags |= SI_COLL;
818 	else
819 		sip->si_pid = mypid;
820 }
821 
822 /*
823  * Do a wakeup when a selectable event occurs.
824  */
825 void
826 selwakeup(sip)
827 	register struct selinfo *sip;
828 {
829 	register struct proc *p;
830 	int s;
831 
832 	if (sip->si_pid == 0)
833 		return;
834 	if (sip->si_flags & SI_COLL) {
835 		nselcoll++;
836 		sip->si_flags &= ~SI_COLL;
837 		wakeup((caddr_t)&selwait);
838 	}
839 	p = pfind(sip->si_pid);
840 	sip->si_pid = 0;
841 	if (p != NULL) {
842 		s = splhigh();
843 		if (p->p_wchan == (caddr_t)&selwait) {
844 			if (p->p_stat == SSLEEP)
845 				setrunnable(p);
846 			else
847 				unsleep(p);
848 		} else if (p->p_flag & P_SELECT)
849 			p->p_flag &= ~P_SELECT;
850 		splx(s);
851 	}
852 }
853