xref: /openbsd-src/sys/kern/sys_generic.c (revision 9b355cb2ae160e0b4c6c4c9a8414b8249bb78ff3)
1 /*	$OpenBSD: sys_generic.c,v 1.13 1998/07/28 00:12:58 millert Exp $	*/
2 /*	$NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1996 Theo de Raadt
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61 
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64 
65 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *));
66 int seltrue __P((dev_t, int, struct proc *));
67 void pollscan __P((struct proc *, struct pollfd *, int, register_t *));
68 
69 /*
70  * Read system call.
71  */
72 /* ARGSUSED */
73 int
74 sys_read(p, v, retval)
75 	struct proc *p;
76 	void *v;
77 	register_t *retval;
78 {
79 	register struct sys_read_args /* {
80 		syscallarg(int) fd;
81 		syscallarg(char *) buf;
82 		syscallarg(u_int) nbyte;
83 	} */ *uap = v;
84 	register struct file *fp;
85 	register struct filedesc *fdp = p->p_fd;
86 	struct uio auio;
87 	struct iovec aiov;
88 	long cnt, error = 0;
89 #ifdef KTRACE
90 	struct iovec ktriov;
91 #endif
92 
93 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
94 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
95 	    (fp->f_flag & FREAD) == 0)
96 		return (EBADF);
97 	aiov.iov_base = (caddr_t)SCARG(uap, buf);
98 	aiov.iov_len = SCARG(uap, nbyte);
99 	auio.uio_iov = &aiov;
100 	auio.uio_iovcnt = 1;
101 	auio.uio_resid = SCARG(uap, nbyte);
102 	auio.uio_rw = UIO_READ;
103 	auio.uio_segflg = UIO_USERSPACE;
104 	auio.uio_procp = p;
105 #ifdef KTRACE
106 	/*
107 	 * if tracing, save a copy of iovec
108 	 */
109 	if (KTRPOINT(p, KTR_GENIO))
110 		ktriov = aiov;
111 #endif
112 	cnt = SCARG(uap, nbyte);
113 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
114 	if (error)
115 		if (auio.uio_resid != cnt && (error == ERESTART ||
116 		    error == EINTR || error == EWOULDBLOCK))
117 			error = 0;
118 	cnt -= auio.uio_resid;
119 #ifdef KTRACE
120 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
121 		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, &ktriov,
122 		    cnt, error);
123 #endif
124 	*retval = cnt;
125 	return (error);
126 }
127 
128 /*
129  * Scatter read system call.
130  */
131 int
132 sys_readv(p, v, retval)
133 	struct proc *p;
134 	void *v;
135 	register_t *retval;
136 {
137 	register struct sys_readv_args /* {
138 		syscallarg(int) fd;
139 		syscallarg(struct iovec *) iovp;
140 		syscallarg(u_int) iovcnt;
141 	} */ *uap = v;
142 	register struct file *fp;
143 	register struct filedesc *fdp = p->p_fd;
144 	struct uio auio;
145 	register struct iovec *iov;
146 	struct iovec *needfree;
147 	struct iovec aiov[UIO_SMALLIOV];
148 	long i, cnt, error = 0;
149 	u_int iovlen;
150 #ifdef KTRACE
151 	struct iovec *ktriov = NULL;
152 #endif
153 
154 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
155 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
156 	    (fp->f_flag & FREAD) == 0)
157 		return (EBADF);
158 	if (SCARG(uap, iovcnt) <= 0)
159 		return (EINVAL);
160 	/* note: can't use iovlen until iovcnt is validated */
161 	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
162 	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
163 		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
164 			return (EINVAL);
165 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
166 		needfree = iov;
167 	} else {
168 		iov = aiov;
169 		needfree = NULL;
170 	}
171 	auio.uio_iov = iov;
172 	auio.uio_iovcnt = SCARG(uap, iovcnt);
173 	auio.uio_rw = UIO_READ;
174 	auio.uio_segflg = UIO_USERSPACE;
175 	auio.uio_procp = p;
176 	error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen);
177 	if (error)
178 		goto done;
179 	auio.uio_resid = 0;
180 	for (i = 0; i < SCARG(uap, iovcnt); i++) {
181 		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
182 			error = EINVAL;
183 			goto done;
184 		}
185 
186 		auio.uio_resid += iov->iov_len;
187 		iov++;
188 	}
189 #ifdef KTRACE
190 	/*
191 	 * if tracing, save a copy of iovec
192 	 */
193 	if (KTRPOINT(p, KTR_GENIO))  {
194 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
195 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
196 	}
197 #endif
198 	cnt = auio.uio_resid;
199 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
200 	if (error)
201 		if (auio.uio_resid != cnt && (error == ERESTART ||
202 		    error == EINTR || error == EWOULDBLOCK))
203 			error = 0;
204 	cnt -= auio.uio_resid;
205 #ifdef KTRACE
206 	if (ktriov != NULL) {
207 		if (error == 0)
208 			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, ktriov,
209 			    cnt, error);
210 		FREE(ktriov, M_TEMP);
211 	}
212 #endif
213 	*retval = cnt;
214 done:
215 	if (needfree)
216 		FREE(needfree, M_IOV);
217 	return (error);
218 }
219 
220 /*
221  * Write system call
222  */
223 int
224 sys_write(p, v, retval)
225 	struct proc *p;
226 	void *v;
227 	register_t *retval;
228 {
229 	register struct sys_write_args /* {
230 		syscallarg(int) fd;
231 		syscallarg(char *) buf;
232 		syscallarg(u_int) nbyte;
233 	} */ *uap = v;
234 	register struct file *fp;
235 	register struct filedesc *fdp = p->p_fd;
236 	struct uio auio;
237 	struct iovec aiov;
238 	long cnt, error = 0;
239 #ifdef KTRACE
240 	struct iovec ktriov;
241 #endif
242 
243 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
244 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
245 	    (fp->f_flag & FWRITE) == 0)
246 		return (EBADF);
247 	aiov.iov_base = (caddr_t)SCARG(uap, buf);
248 	aiov.iov_len = SCARG(uap, nbyte);
249 	auio.uio_iov = &aiov;
250 	auio.uio_iovcnt = 1;
251 	auio.uio_resid = SCARG(uap, nbyte);
252 	auio.uio_rw = UIO_WRITE;
253 	auio.uio_segflg = UIO_USERSPACE;
254 	auio.uio_procp = p;
255 #ifdef KTRACE
256 	/*
257 	 * if tracing, save a copy of iovec
258 	 */
259 	if (KTRPOINT(p, KTR_GENIO))
260 		ktriov = aiov;
261 #endif
262 	cnt = SCARG(uap, nbyte);
263 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
264 	if (error) {
265 		if (auio.uio_resid != cnt && (error == ERESTART ||
266 		    error == EINTR || error == EWOULDBLOCK))
267 			error = 0;
268 		if (error == EPIPE)
269 			psignal(p, SIGPIPE);
270 	}
271 	cnt -= auio.uio_resid;
272 #ifdef KTRACE
273 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
274 		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
275 		    &ktriov, cnt, error);
276 #endif
277 	*retval = cnt;
278 	return (error);
279 }
280 
281 /*
282  * Gather write system call
283  */
284 int
285 sys_writev(p, v, retval)
286 	struct proc *p;
287 	void *v;
288 	register_t *retval;
289 {
290 	register struct sys_writev_args /* {
291 		syscallarg(int) fd;
292 		syscallarg(struct iovec *) iovp;
293 		syscallarg(u_int) iovcnt;
294 	} */ *uap = v;
295 	register struct file *fp;
296 	register struct filedesc *fdp = p->p_fd;
297 	struct uio auio;
298 	register struct iovec *iov;
299 	struct iovec *needfree;
300 	struct iovec aiov[UIO_SMALLIOV];
301 	long i, cnt, error = 0;
302 	u_int iovlen;
303 #ifdef KTRACE
304 	struct iovec *ktriov = NULL;
305 #endif
306 
307 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
308 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
309 	    (fp->f_flag & FWRITE) == 0)
310 		return (EBADF);
311 	if (SCARG(uap, iovcnt) <= 0)
312 		return (EINVAL);
313 	/* note: can't use iovlen until iovcnt is validated */
314 	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
315 	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
316 		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
317 			return (EINVAL);
318 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
319 		needfree = iov;
320 	} else {
321 		iov = aiov;
322 		needfree = NULL;
323 	}
324 	auio.uio_iov = iov;
325 	auio.uio_iovcnt = SCARG(uap, iovcnt);
326 	auio.uio_rw = UIO_WRITE;
327 	auio.uio_segflg = UIO_USERSPACE;
328 	auio.uio_procp = p;
329 	error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen);
330 	if (error)
331 		goto done;
332 	auio.uio_resid = 0;
333 	for (i = 0; i < SCARG(uap, iovcnt); i++) {
334 		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
335 			error = EINVAL;
336 			goto done;
337 		}
338 
339 		auio.uio_resid += iov->iov_len;
340 		iov++;
341 	}
342 #ifdef KTRACE
343 	/*
344 	 * if tracing, save a copy of iovec
345 	 */
346 	if (KTRPOINT(p, KTR_GENIO))  {
347 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
348 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
349 	}
350 #endif
351 	cnt = auio.uio_resid;
352 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
353 	if (error) {
354 		if (auio.uio_resid != cnt && (error == ERESTART ||
355 		    error == EINTR || error == EWOULDBLOCK))
356 			error = 0;
357 		if (error == EPIPE)
358 			psignal(p, SIGPIPE);
359 	}
360 	cnt -= auio.uio_resid;
361 #ifdef KTRACE
362 	if (ktriov != NULL) {
363 		if (error == 0)
364 			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
365 				ktriov, cnt, error);
366 		FREE(ktriov, M_TEMP);
367 	}
368 #endif
369 	*retval = cnt;
370 done:
371 	if (needfree)
372 		FREE(needfree, M_IOV);
373 	return (error);
374 }
375 
376 /*
377  * Ioctl system call
378  */
379 /* ARGSUSED */
380 int
381 sys_ioctl(p, v, retval)
382 	struct proc *p;
383 	void *v;
384 	register_t *retval;
385 {
386 	register struct sys_ioctl_args /* {
387 		syscallarg(int) fd;
388 		syscallarg(u_long) com;
389 		syscallarg(caddr_t) data;
390 	} */ *uap = v;
391 	register struct file *fp;
392 	register struct filedesc *fdp;
393 	register u_long com;
394 	register int error;
395 	register u_int size;
396 	caddr_t data, memp;
397 	int tmp;
398 #define STK_PARAMS	128
399 	char stkbuf[STK_PARAMS];
400 
401 	fdp = p->p_fd;
402 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
403 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
404 		return (EBADF);
405 
406 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
407 		return (EBADF);
408 
409 	switch (com = SCARG(uap, com)) {
410 	case FIONCLEX:
411 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
412 		return (0);
413 	case FIOCLEX:
414 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
415 		return (0);
416 	}
417 
418 	/*
419 	 * Interpret high order word to find amount of data to be
420 	 * copied to/from the user's address space.
421 	 */
422 	size = IOCPARM_LEN(com);
423 	if (size > IOCPARM_MAX)
424 		return (ENOTTY);
425 	memp = NULL;
426 	if (size > sizeof (stkbuf)) {
427 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
428 		data = memp;
429 	} else
430 		data = stkbuf;
431 	if (com&IOC_IN) {
432 		if (size) {
433 			error = copyin(SCARG(uap, data), data, (u_int)size);
434 			if (error) {
435 				if (memp)
436 					free(memp, M_IOCTLOPS);
437 				return (error);
438 			}
439 		} else
440 			*(caddr_t *)data = SCARG(uap, data);
441 	} else if ((com&IOC_OUT) && size)
442 		/*
443 		 * Zero the buffer so the user always
444 		 * gets back something deterministic.
445 		 */
446 		bzero(data, size);
447 	else if (com&IOC_VOID)
448 		*(caddr_t *)data = SCARG(uap, data);
449 
450 	switch (com) {
451 
452 	case FIONBIO:
453 		if ((tmp = *(int *)data) != 0)
454 			fp->f_flag |= FNONBLOCK;
455 		else
456 			fp->f_flag &= ~FNONBLOCK;
457 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
458 		break;
459 
460 	case FIOASYNC:
461 		if ((tmp = *(int *)data) != 0)
462 			fp->f_flag |= FASYNC;
463 		else
464 			fp->f_flag &= ~FASYNC;
465 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
466 		break;
467 
468 	case FIOSETOWN:
469 		tmp = *(int *)data;
470 		if (fp->f_type == DTYPE_SOCKET) {
471 			struct socket *so = (struct socket *)fp->f_data;
472 
473 			so->so_pgid = tmp;
474 			so->so_siguid = p->p_cred->p_ruid;
475 			so->so_sigeuid = p->p_ucred->cr_uid;
476 			error = 0;
477 			break;
478 		}
479 		if (tmp <= 0) {
480 			tmp = -tmp;
481 		} else {
482 			struct proc *p1 = pfind(tmp);
483 			if (p1 == 0) {
484 				error = ESRCH;
485 				break;
486 			}
487 			tmp = p1->p_pgrp->pg_id;
488 		}
489 		error = (*fp->f_ops->fo_ioctl)
490 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
491 		break;
492 
493 	case FIOGETOWN:
494 		if (fp->f_type == DTYPE_SOCKET) {
495 			error = 0;
496 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
497 			break;
498 		}
499 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
500 		*(int *)data = -*(int *)data;
501 		break;
502 
503 	default:
504 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
505 		/*
506 		 * Copy any data to user, size was
507 		 * already set and checked above.
508 		 */
509 		if (error == 0 && (com&IOC_OUT) && size)
510 			error = copyout(data, SCARG(uap, data), (u_int)size);
511 		break;
512 	}
513 	if (memp)
514 		free(memp, M_IOCTLOPS);
515 	return (error);
516 }
517 
518 int	selwait, nselcoll;
519 
520 /*
521  * Select system call.
522  */
523 int
524 sys_select(p, v, retval)
525 	register struct proc *p;
526 	void *v;
527 	register_t *retval;
528 {
529 	register struct sys_select_args /* {
530 		syscallarg(int) nd;
531 		syscallarg(fd_set *) in;
532 		syscallarg(fd_set *) ou;
533 		syscallarg(fd_set *) ex;
534 		syscallarg(struct timeval *) tv;
535 	} */ *uap = v;
536 	fd_set bits[6], *pibits[3], *pobits[3];
537 	struct timeval atv;
538 	int s, ncoll, error = 0, timo;
539 	u_int ni;
540 
541 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
542 		/* forgiving; slightly wrong */
543 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
544 	}
545 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
546 	if (SCARG(uap, nd) > FD_SETSIZE) {
547 		caddr_t mbits;
548 
549 		if ((mbits = malloc(ni * 6, M_TEMP, M_WAITOK)) == NULL) {
550 			error = EINVAL;
551 			goto cleanup;
552 		}
553 		bzero(mbits, ni * 6);
554 		pibits[0] = (fd_set *)&mbits[ni * 0];
555 		pibits[1] = (fd_set *)&mbits[ni * 1];
556 		pibits[2] = (fd_set *)&mbits[ni * 2];
557 		pobits[0] = (fd_set *)&mbits[ni * 3];
558 		pobits[1] = (fd_set *)&mbits[ni * 4];
559 		pobits[2] = (fd_set *)&mbits[ni * 5];
560 	} else {
561 		bzero((caddr_t)bits, sizeof(bits));
562 		pibits[0] = &bits[0];
563 		pibits[1] = &bits[1];
564 		pibits[2] = &bits[2];
565 		pobits[0] = &bits[3];
566 		pobits[1] = &bits[4];
567 		pobits[2] = &bits[5];
568 	}
569 
570 #define	getbits(name, x) \
571 	if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \
572 	    (caddr_t)pibits[x], ni))) \
573 		goto done;
574 	getbits(in, 0);
575 	getbits(ou, 1);
576 	getbits(ex, 2);
577 #undef	getbits
578 
579 	if (SCARG(uap, tv)) {
580 		error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
581 			sizeof (atv));
582 		if (error)
583 			goto done;
584 		if (itimerfix(&atv)) {
585 			error = EINVAL;
586 			goto done;
587 		}
588 		s = splclock();
589 		timeradd(&atv, &time, &atv);
590 		timo = hzto(&atv);
591 		/*
592 		 * Avoid inadvertently sleeping forever.
593 		 */
594 		if (timo == 0)
595 			timo = 1;
596 		splx(s);
597 	} else
598 		timo = 0;
599 retry:
600 	ncoll = nselcoll;
601 	p->p_flag |= P_SELECT;
602 	error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval);
603 	if (error || *retval)
604 		goto done;
605 	s = splhigh();
606 	/* this should be timercmp(&time, &atv, >=) */
607 	if (SCARG(uap, tv) && (time.tv_sec > atv.tv_sec ||
608 	    (time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec))) {
609 		splx(s);
610 		goto done;
611 	}
612 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
613 		splx(s);
614 		goto retry;
615 	}
616 	p->p_flag &= ~P_SELECT;
617 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
618 	splx(s);
619 	if (error == 0)
620 		goto retry;
621 done:
622 	p->p_flag &= ~P_SELECT;
623 	/* select is not restarted after signals... */
624 	if (error == ERESTART)
625 		error = EINTR;
626 	if (error == EWOULDBLOCK)
627 		error = 0;
628 #define	putbits(name, x) \
629 	if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \
630 	    (caddr_t)SCARG(uap, name), ni))) \
631 		error = error2;
632 	if (error == 0) {
633 		int error2;
634 
635 		putbits(in, 0);
636 		putbits(ou, 1);
637 		putbits(ex, 2);
638 #undef putbits
639 	}
640 
641 cleanup:
642 	if (pibits[0] != &bits[0])
643 		free(pibits[0], M_TEMP);
644 	return (error);
645 }
646 
647 int
648 selscan(p, ibits, obits, nfd, retval)
649 	struct proc *p;
650 	fd_set *ibits, *obits;
651 	int nfd;
652 	register_t *retval;
653 {
654 	caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
655 	register struct filedesc *fdp = p->p_fd;
656 	register int msk, i, j, fd;
657 	register fd_mask bits;
658 	struct file *fp;
659 	int ni, n = 0;
660 	static int flag[3] = { FREAD, FWRITE, 0 };
661 
662 	/*
663 	 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded
664 	 * up to the next byte) otherwise the fd_set's are normal sized.
665 	 */
666 	ni = sizeof(fd_set);
667 	if (nfd > FD_SETSIZE)
668 		ni = howmany(nfd, NFDBITS) * sizeof(fd_mask);
669 
670 	for (msk = 0; msk < 3; msk++) {
671 		fd_set *pibits = (fd_set *)&cibits[msk*ni];
672 		fd_set *pobits = (fd_set *)&cobits[msk*ni];
673 
674 		for (i = 0; i < nfd; i += NFDBITS) {
675 			bits = pibits->fds_bits[i/NFDBITS];
676 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
677 				bits &= ~(1 << j);
678 				fp = fdp->fd_ofiles[fd];
679 				if (fp == NULL)
680 					return (EBADF);
681 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
682 					FD_SET(fd, pobits);
683 					n++;
684 				}
685 			}
686 		}
687 	}
688 	*retval = n;
689 	return (0);
690 }
691 
692 /*ARGSUSED*/
693 int
694 seltrue(dev, flag, p)
695 	dev_t dev;
696 	int flag;
697 	struct proc *p;
698 {
699 
700 	return (1);
701 }
702 
703 /*
704  * Record a select request.
705  */
706 void
707 selrecord(selector, sip)
708 	struct proc *selector;
709 	struct selinfo *sip;
710 {
711 	struct proc *p;
712 	pid_t mypid;
713 
714 	mypid = selector->p_pid;
715 	if (sip->si_selpid == mypid)
716 		return;
717 	if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
718 	    p->p_wchan == (caddr_t)&selwait)
719 		sip->si_flags |= SI_COLL;
720 	else
721 		sip->si_selpid = mypid;
722 }
723 
724 /*
725  * Do a wakeup when a selectable event occurs.
726  */
727 void
728 selwakeup(sip)
729 	register struct selinfo *sip;
730 {
731 	register struct proc *p;
732 	int s;
733 
734 	if (sip->si_selpid == 0)
735 		return;
736 	if (sip->si_flags & SI_COLL) {
737 		nselcoll++;
738 		sip->si_flags &= ~SI_COLL;
739 		wakeup((caddr_t)&selwait);
740 	}
741 	p = pfind(sip->si_selpid);
742 	sip->si_selpid = 0;
743 	if (p != NULL) {
744 		s = splhigh();
745 		if (p->p_wchan == (caddr_t)&selwait) {
746 			if (p->p_stat == SSLEEP)
747 				setrunnable(p);
748 			else
749 				unsleep(p);
750 		} else if (p->p_flag & P_SELECT)
751 			p->p_flag &= ~P_SELECT;
752 		splx(s);
753 	}
754 }
755 
756 void
757 pollscan(p, pl, nfd, retval)
758 	struct proc *p;
759 	struct pollfd *pl;
760 	int nfd;
761 	register_t *retval;
762 {
763 	register struct filedesc *fdp = p->p_fd;
764 	register int msk, i;
765 	struct file *fp;
766 	int n = 0;
767 	static int flag[3] = { FREAD, FWRITE, 0 };
768 	static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR };
769 
770 	/*
771 	 * XXX: We need to implement the rest of the flags.
772 	 */
773 	for (i = 0; i < nfd; i++) {
774 		fp = fdp->fd_ofiles[pl[i].fd];
775 		if (fp == NULL) {
776 			if (pl[i].events & POLLNVAL) {
777 				pl[i].revents |= POLLNVAL;
778 				n++;
779 			}
780 			continue;
781 		}
782 		for (msk = 0; msk < 3; msk++) {
783 			if (pl[i].events & pflag[msk]) {
784 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
785 					pl[i].revents |= pflag[msk] &
786 					    pl[i].events;
787 					n++;
788 				}
789 			}
790 		}
791 	}
792 	*retval = n;
793 }
794 
795 /*
796  * We are using the same mechanism as select only we encode/decode args
797  * differently.
798  */
799 int
800 sys_poll(p, v, retval)
801 	register struct proc *p;
802 	void *v;
803 	register_t *retval;
804 {
805 	struct sys_poll_args *uap = v;
806 	size_t sz = sizeof(struct pollfd) * SCARG(uap, nfds);
807 	struct pollfd *pl;
808 	int msec = SCARG(uap, timeout);
809 	struct timeval atv;
810 	int timo, ncoll, i, s, error, error2;
811 	extern int nselcoll, selwait;
812 
813 	pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
814 
815 	if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
816 		goto bad;
817 
818 	for (i = 0; i < SCARG(uap, nfds); i++)
819 		pl[i].revents = 0;
820 
821 	if (msec != -1) {
822 		atv.tv_sec = msec / 1000;
823 		atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
824 
825 		if (itimerfix(&atv)) {
826 			error = EINVAL;
827 			goto done;
828 		}
829 		s = splclock();
830 		timeradd(&atv, &time, &atv);
831 		timo = hzto(&atv);
832 		/*
833 		 * Avoid inadvertently sleeping forever.
834 		 */
835 		if (timo == 0)
836 			timo = 1;
837 		splx(s);
838 	} else
839 		timo = 0;
840 
841 retry:
842 	ncoll = nselcoll;
843 	p->p_flag |= P_SELECT;
844 	pollscan(p, pl, SCARG(uap, nfds), retval);
845 	if (*retval)
846 		goto done;
847 	s = splhigh();
848 	if (timo && timercmp(&time, &atv, >=)) {
849 		splx(s);
850 		goto done;
851 	}
852 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
853 		splx(s);
854 		goto retry;
855 	}
856 	p->p_flag &= ~P_SELECT;
857 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
858 	splx(s);
859 	if (error == 0)
860 		goto retry;
861 
862 done:
863 	p->p_flag &= ~P_SELECT;
864 	/* poll is not restarted after signals... */
865 	if (error == ERESTART)
866 		error = EINTR;
867 	if (error == EWOULDBLOCK)
868 		error = 0;
869 	if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0)
870 		error = error2;
871 bad:
872 	free((char *) pl, M_TEMP);
873 	return (error);
874 }
875