xref: /openbsd-src/sys/kern/sys_generic.c (revision e40dc44bdeda605e85ba28365f57b125a1d66621)
1 /*	$OpenBSD: sys_generic.c,v 1.26 2000/07/07 14:33:20 art Exp $	*/
2 /*	$NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1996 Theo de Raadt
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/resourcevar.h>
52 #include <sys/socketvar.h>
53 #include <sys/signalvar.h>
54 #include <sys/uio.h>
55 #include <sys/kernel.h>
56 #include <sys/stat.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62 
63 #include <sys/mount.h>
64 #include <sys/syscallargs.h>
65 
66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *));
67 int seltrue __P((dev_t, int, struct proc *));
68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *));
69 
70 /*
71  * Read system call.
72  */
73 /* ARGSUSED */
74 int
75 sys_read(p, v, retval)
76 	struct proc *p;
77 	void *v;
78 	register_t *retval;
79 {
80 	struct sys_read_args /* {
81 		syscallarg(int) fd;
82 		syscallarg(void *) buf;
83 		syscallarg(size_t) nbyte;
84 	} */ *uap = v;
85 	int fd = SCARG(uap, fd);
86 	struct file *fp;
87 	struct filedesc *fdp = p->p_fd;
88 
89 	if ((u_int)fd >= fdp->fd_nfiles ||
90 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
91 #if notyet
92 	    (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
93 #endif
94 	    (fp->f_flag & FREAD) == 0)
95 		return (EBADF);
96 
97 #if notyet
98 	FILE_USE(fp);
99 #endif
100 	/* dofileread() will unuse the descriptor for us */
101 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
102 	    &fp->f_offset, retval));
103 }
104 
105 int
106 dofileread(p, fd, fp, buf, nbyte, offset, retval)
107 	struct proc *p;
108 	int fd;
109 	struct file *fp;
110 	void *buf;
111 	size_t nbyte;
112 	off_t *offset;
113 	register_t *retval;
114 {
115 	struct uio auio;
116 	struct iovec aiov;
117 	long cnt, error = 0;
118 #ifdef KTRACE
119 	struct iovec ktriov;
120 #endif
121 
122 	aiov.iov_base = (caddr_t)buf;
123 	aiov.iov_len = nbyte;
124 	auio.uio_iov = &aiov;
125 	auio.uio_iovcnt = 1;
126 	auio.uio_resid = nbyte;
127 	auio.uio_rw = UIO_READ;
128 	auio.uio_segflg = UIO_USERSPACE;
129 	auio.uio_procp = p;
130 
131 	/*
132 	 * Reads return ssize_t because -1 is returned on error.  Therefore
133 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
134 	 * values.
135 	 */
136 	if (auio.uio_resid > SSIZE_MAX) {
137 		error = EINVAL;
138 		goto out;
139 	}
140 
141 #ifdef KTRACE
142 	/*
143 	 * if tracing, save a copy of iovec
144 	 */
145 	if (KTRPOINT(p, KTR_GENIO))
146 		ktriov = aiov;
147 #endif
148 	cnt = auio.uio_resid;
149 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
150 	if (error)
151 		if (auio.uio_resid != cnt && (error == ERESTART ||
152 		    error == EINTR || error == EWOULDBLOCK))
153 			error = 0;
154 	cnt -= auio.uio_resid;
155 #ifdef KTRACE
156 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
157 		ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error);
158 #endif
159 	*retval = cnt;
160  out:
161 #if notyet
162 	FILE_UNUSE(fp, p);
163 #endif
164 	return (error);
165 }
166 
167 /*
168  * Scatter read system call.
169  */
170 int
171 sys_readv(p, v, retval)
172 	struct proc *p;
173 	void *v;
174 	register_t *retval;
175 {
176 	struct sys_readv_args /* {
177 		syscallarg(int) fd;
178 		syscallarg(const struct iovec *) iovp;
179 		syscallarg(int) iovcnt;
180 	} */ *uap = v;
181 	int fd = SCARG(uap, fd);
182 	struct file *fp;
183 	struct filedesc *fdp = p->p_fd;
184 
185 	if ((u_int)fd >= fdp->fd_nfiles ||
186 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
187 #if notyet
188 	    (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
189 #endif
190 	    (fp->f_flag & FREAD) == 0)
191 		return (EBADF);
192 
193 #if notyet
194 	FILE_USE(fp);
195 #endif
196 	/* dofilereadv() will unuse the descriptor for us */
197 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
198 	    &fp->f_offset, retval));
199 }
200 
201 int
202 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval)
203 	struct proc *p;
204 	int fd;
205 	struct file *fp;
206 	const struct iovec *iovp;
207 	int iovcnt;
208 	off_t *offset;
209 	register_t *retval;
210 {
211 	struct uio auio;
212 	struct iovec *iov;
213 	struct iovec *needfree;
214 	struct iovec aiov[UIO_SMALLIOV];
215 	long i, cnt, error = 0;
216 	u_int iovlen;
217 #ifdef KTRACE
218 	struct iovec *ktriov = NULL;
219 #endif
220 
221 	/* note: can't use iovlen until iovcnt is validated */
222 	iovlen = iovcnt * sizeof(struct iovec);
223 	if ((u_int)iovcnt > UIO_SMALLIOV) {
224 		if ((u_int)iovcnt > IOV_MAX) {
225 			error = EINVAL;
226 			goto out;
227 		}
228 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
229 		needfree = iov;
230 	} else if ((u_int)iovcnt > 0) {
231 		iov = aiov;
232 		needfree = NULL;
233 	} else {
234 		error = EINVAL;
235 		goto out;
236 	}
237 
238 	auio.uio_iov = iov;
239 	auio.uio_iovcnt = iovcnt;
240 	auio.uio_rw = UIO_READ;
241 	auio.uio_segflg = UIO_USERSPACE;
242 	auio.uio_procp = p;
243 	error = copyin(iovp, iov, iovlen);
244 	if (error)
245 		goto done;
246 	auio.uio_resid = 0;
247 	for (i = 0; i < iovcnt; i++) {
248 		auio.uio_resid += iov->iov_len;
249 		/*
250 		 * Reads return ssize_t because -1 is returned on error.
251 		 * Therefore we must restrict the length to SSIZE_MAX to
252 		 * avoid garbage return values.
253 		 */
254 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
255 			error = EINVAL;
256 			goto done;
257 		}
258 		iov++;
259 	}
260 #ifdef KTRACE
261 	/*
262 	 * if tracing, save a copy of iovec
263 	 */
264 	if (KTRPOINT(p, KTR_GENIO))  {
265 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
266 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
267 	}
268 #endif
269 	cnt = auio.uio_resid;
270 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
271 	if (error)
272 		if (auio.uio_resid != cnt && (error == ERESTART ||
273 		    error == EINTR || error == EWOULDBLOCK))
274 			error = 0;
275 	cnt -= auio.uio_resid;
276 #ifdef KTRACE
277 	if (ktriov != NULL) {
278 		if (error == 0)
279 			ktrgenio(p->p_tracep, fd, UIO_READ, ktriov, cnt,
280 			    error);
281 		FREE(ktriov, M_TEMP);
282 	}
283 #endif
284 	*retval = cnt;
285  done:
286 	if (needfree)
287 		FREE(needfree, M_IOV);
288  out:
289 #if notyet
290 	FILE_UNUSE(fp, p);
291 #endif
292 	return (error);
293 }
294 
295 /*
296  * Write system call
297  */
298 int
299 sys_write(p, v, retval)
300 	struct proc *p;
301 	void *v;
302 	register_t *retval;
303 {
304 	struct sys_write_args /* {
305 		syscallarg(int) fd;
306 		syscallarg(const void *) buf;
307 		syscallarg(size_t) nbyte;
308 	} */ *uap = v;
309 	int fd = SCARG(uap, fd);
310 	struct file *fp;
311 	struct filedesc *fdp = p->p_fd;
312 
313 	if ((u_int)fd >= fdp->fd_nfiles ||
314 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
315 #if notyet
316 	    (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
317 #endif
318 	    (fp->f_flag & FWRITE) == 0)
319 		return (EBADF);
320 
321 #if notyet
322 	FILE_USE(fp);
323 #endif
324 	/* dofilewrite() will unuse the descriptor for us */
325 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
326 	    &fp->f_offset, retval));
327 }
328 
329 int
330 dofilewrite(p, fd, fp, buf, nbyte, offset, retval)
331 	struct proc *p;
332 	int fd;
333 	struct file *fp;
334 	const void *buf;
335 	size_t nbyte;
336 	off_t *offset;
337 	register_t *retval;
338 {
339 	struct uio auio;
340 	struct iovec aiov;
341 	long cnt, error = 0;
342 #ifdef KTRACE
343 	struct iovec ktriov;
344 #endif
345 
346 	aiov.iov_base = (caddr_t)buf;		/* XXX kills const */
347 	aiov.iov_len = nbyte;
348 	auio.uio_iov = &aiov;
349 	auio.uio_iovcnt = 1;
350 	auio.uio_resid = nbyte;
351 	auio.uio_rw = UIO_WRITE;
352 	auio.uio_segflg = UIO_USERSPACE;
353 	auio.uio_procp = p;
354 
355 	/*
356 	 * Writes return ssize_t because -1 is returned on error.  Therefore
357 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
358 	 * values.
359 	 */
360 	if (auio.uio_resid > SSIZE_MAX) {
361 		error = EINVAL;
362 		goto out;
363 	}
364 
365 #ifdef KTRACE
366 	/*
367 	 * if tracing, save a copy of iovec
368 	 */
369 	if (KTRPOINT(p, KTR_GENIO))
370 		ktriov = aiov;
371 #endif
372 	cnt = auio.uio_resid;
373 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
374 	if (error) {
375 		if (auio.uio_resid != cnt && (error == ERESTART ||
376 		    error == EINTR || error == EWOULDBLOCK))
377 			error = 0;
378 		if (error == EPIPE)
379 			psignal(p, SIGPIPE);
380 	}
381 	cnt -= auio.uio_resid;
382 #ifdef KTRACE
383 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
384 		ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktriov, cnt, error);
385 #endif
386 	*retval = cnt;
387  out:
388 #if notyet
389 	FILE_UNUSE(fp, p);
390 #endif
391 	return (error);
392 }
393 
394 /*
395  * Gather write system call
396  */
397 int
398 sys_writev(p, v, retval)
399 	struct proc *p;
400 	void *v;
401 	register_t *retval;
402 {
403 	struct sys_writev_args /* {
404 		syscallarg(int) fd;
405 		syscallarg(const struct iovec *) iovp;
406 		syscallarg(int) iovcnt;
407 	} */ *uap = v;
408 	int fd = SCARG(uap, fd);
409 	struct file *fp;
410 	struct filedesc *fdp = p->p_fd;
411 
412 	if ((u_int)fd >= fdp->fd_nfiles ||
413 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
414 #if notyet
415 	    (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
416 #endif
417 	    (fp->f_flag & FWRITE) == 0)
418 		return (EBADF);
419 
420 #if notyet
421 	FILE_USE(fp);
422 #endif
423 	/* dofilewritev() will unuse the descriptor for us */
424 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
425 	    &fp->f_offset, retval));
426 }
427 
428 int
429 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval)
430 	struct proc *p;
431 	int fd;
432 	struct file *fp;
433 	const struct iovec *iovp;
434 	int iovcnt;
435 	off_t *offset;
436 	register_t *retval;
437 {
438 	struct uio auio;
439 	struct iovec *iov;
440 	struct iovec *needfree;
441 	struct iovec aiov[UIO_SMALLIOV];
442 	long i, cnt, error = 0;
443 	u_int iovlen;
444 #ifdef KTRACE
445 	struct iovec *ktriov = NULL;
446 #endif
447 
448 	/* note: can't use iovlen until iovcnt is validated */
449 	iovlen = iovcnt * sizeof(struct iovec);
450 	if ((u_int)iovcnt > UIO_SMALLIOV) {
451 		if ((u_int)iovcnt > IOV_MAX)
452 			return (EINVAL);
453 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
454 		needfree = iov;
455 	} else if ((u_int)iovcnt > 0) {
456 		iov = aiov;
457 		needfree = NULL;
458 	} else {
459 		error = EINVAL;
460 		goto out;
461 	}
462 
463 	auio.uio_iov = iov;
464 	auio.uio_iovcnt = iovcnt;
465 	auio.uio_rw = UIO_WRITE;
466 	auio.uio_segflg = UIO_USERSPACE;
467 	auio.uio_procp = p;
468 	error = copyin(iovp, iov, iovlen);
469 	if (error)
470 		goto done;
471 	auio.uio_resid = 0;
472 	for (i = 0; i < iovcnt; i++) {
473 		auio.uio_resid += iov->iov_len;
474 		/*
475 		 * Writes return ssize_t because -1 is returned on error.
476 		 * Therefore we must restrict the length to SSIZE_MAX to
477 		 * avoid garbage return values.
478 		 */
479 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
480 			error = EINVAL;
481 			goto done;
482 		}
483 		iov++;
484 	}
485 #ifdef KTRACE
486 	/*
487 	 * if tracing, save a copy of iovec
488 	 */
489 	if (KTRPOINT(p, KTR_GENIO))  {
490 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
491 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
492 	}
493 #endif
494 	cnt = auio.uio_resid;
495 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
496 	if (error) {
497 		if (auio.uio_resid != cnt && (error == ERESTART ||
498 		    error == EINTR || error == EWOULDBLOCK))
499 			error = 0;
500 		if (error == EPIPE)
501 			psignal(p, SIGPIPE);
502 	}
503 	cnt -= auio.uio_resid;
504 #ifdef KTRACE
505 	if (ktriov != NULL) {
506 		if (error == 0)
507 			ktrgenio(p->p_tracep, fd, UIO_WRITE, ktriov, cnt,
508 			    error);
509 		FREE(ktriov, M_TEMP);
510 	}
511 #endif
512 	*retval = cnt;
513  done:
514 	if (needfree)
515 		FREE(needfree, M_IOV);
516  out:
517 #if notyet
518 	FILE_UNUSE(fp, p);
519 #endif
520 	return (error);
521 }
522 
523 /*
524  * Ioctl system call
525  */
526 /* ARGSUSED */
527 int
528 sys_ioctl(p, v, retval)
529 	struct proc *p;
530 	void *v;
531 	register_t *retval;
532 {
533 	register struct sys_ioctl_args /* {
534 		syscallarg(int) fd;
535 		syscallarg(u_long) com;
536 		syscallarg(caddr_t) data;
537 	} */ *uap = v;
538 	register struct file *fp;
539 	register struct filedesc *fdp;
540 	register u_long com;
541 	register int error;
542 	register u_int size;
543 	caddr_t data, memp;
544 	int tmp;
545 #define STK_PARAMS	128
546 	char stkbuf[STK_PARAMS];
547 
548 	fdp = p->p_fd;
549 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
550 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
551 		return (EBADF);
552 
553 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
554 		return (EBADF);
555 
556 	switch (com = SCARG(uap, com)) {
557 	case FIONCLEX:
558 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
559 		return (0);
560 	case FIOCLEX:
561 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
562 		return (0);
563 	}
564 
565 	/*
566 	 * Interpret high order word to find amount of data to be
567 	 * copied to/from the user's address space.
568 	 */
569 	size = IOCPARM_LEN(com);
570 	if (size > IOCPARM_MAX)
571 		return (ENOTTY);
572 	memp = NULL;
573 	if (size > sizeof (stkbuf)) {
574 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
575 		data = memp;
576 	} else
577 		data = stkbuf;
578 	if (com&IOC_IN) {
579 		if (size) {
580 			error = copyin(SCARG(uap, data), data, (u_int)size);
581 			if (error) {
582 				if (memp)
583 					free(memp, M_IOCTLOPS);
584 				return (error);
585 			}
586 		} else
587 			*(caddr_t *)data = SCARG(uap, data);
588 	} else if ((com&IOC_OUT) && size)
589 		/*
590 		 * Zero the buffer so the user always
591 		 * gets back something deterministic.
592 		 */
593 		bzero(data, size);
594 	else if (com&IOC_VOID)
595 		*(caddr_t *)data = SCARG(uap, data);
596 
597 	switch (com) {
598 
599 	case FIONBIO:
600 		if ((tmp = *(int *)data) != 0)
601 			fp->f_flag |= FNONBLOCK;
602 		else
603 			fp->f_flag &= ~FNONBLOCK;
604 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
605 		break;
606 
607 	case FIOASYNC:
608 		if ((tmp = *(int *)data) != 0)
609 			fp->f_flag |= FASYNC;
610 		else
611 			fp->f_flag &= ~FASYNC;
612 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
613 		break;
614 
615 	case FIOSETOWN:
616 		tmp = *(int *)data;
617 		if (fp->f_type == DTYPE_SOCKET) {
618 			struct socket *so = (struct socket *)fp->f_data;
619 
620 			so->so_pgid = tmp;
621 			so->so_siguid = p->p_cred->p_ruid;
622 			so->so_sigeuid = p->p_ucred->cr_uid;
623 			error = 0;
624 			break;
625 		}
626 		if (tmp <= 0) {
627 			tmp = -tmp;
628 		} else {
629 			struct proc *p1 = pfind(tmp);
630 			if (p1 == 0) {
631 				error = ESRCH;
632 				break;
633 			}
634 			tmp = p1->p_pgrp->pg_id;
635 		}
636 		error = (*fp->f_ops->fo_ioctl)
637 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
638 		break;
639 
640 	case FIOGETOWN:
641 		if (fp->f_type == DTYPE_SOCKET) {
642 			error = 0;
643 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
644 			break;
645 		}
646 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
647 		*(int *)data = -*(int *)data;
648 		break;
649 
650 	default:
651 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
652 		/*
653 		 * Copy any data to user, size was
654 		 * already set and checked above.
655 		 */
656 		if (error == 0 && (com&IOC_OUT) && size)
657 			error = copyout(data, SCARG(uap, data), (u_int)size);
658 		break;
659 	}
660 	if (memp)
661 		free(memp, M_IOCTLOPS);
662 	return (error);
663 }
664 
665 int	selwait, nselcoll;
666 
667 /*
668  * Select system call.
669  */
670 int
671 sys_select(p, v, retval)
672 	register struct proc *p;
673 	void *v;
674 	register_t *retval;
675 {
676 	register struct sys_select_args /* {
677 		syscallarg(int) nd;
678 		syscallarg(fd_set *) in;
679 		syscallarg(fd_set *) ou;
680 		syscallarg(fd_set *) ex;
681 		syscallarg(struct timeval *) tv;
682 	} */ *uap = v;
683 	fd_set bits[6], *pibits[3], *pobits[3];
684 	struct timeval atv;
685 	int s, ncoll, error = 0, timo;
686 	u_int ni;
687 
688 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
689 		/* forgiving; slightly wrong */
690 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
691 	}
692 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
693 	if (SCARG(uap, nd) > FD_SETSIZE) {
694 		caddr_t mbits;
695 
696 		if ((mbits = malloc(ni * 6, M_TEMP, M_WAITOK)) == NULL) {
697 			error = EINVAL;
698 			goto cleanup;
699 		}
700 		bzero(mbits, ni * 6);
701 		pibits[0] = (fd_set *)&mbits[ni * 0];
702 		pibits[1] = (fd_set *)&mbits[ni * 1];
703 		pibits[2] = (fd_set *)&mbits[ni * 2];
704 		pobits[0] = (fd_set *)&mbits[ni * 3];
705 		pobits[1] = (fd_set *)&mbits[ni * 4];
706 		pobits[2] = (fd_set *)&mbits[ni * 5];
707 	} else {
708 		bzero((caddr_t)bits, sizeof(bits));
709 		pibits[0] = &bits[0];
710 		pibits[1] = &bits[1];
711 		pibits[2] = &bits[2];
712 		pobits[0] = &bits[3];
713 		pobits[1] = &bits[4];
714 		pobits[2] = &bits[5];
715 	}
716 
717 #define	getbits(name, x) \
718 	if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \
719 	    (caddr_t)pibits[x], ni))) \
720 		goto done;
721 	getbits(in, 0);
722 	getbits(ou, 1);
723 	getbits(ex, 2);
724 #undef	getbits
725 
726 	if (SCARG(uap, tv)) {
727 		error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
728 			sizeof (atv));
729 		if (error)
730 			goto done;
731 		if (itimerfix(&atv)) {
732 			error = EINVAL;
733 			goto done;
734 		}
735 		s = splclock();
736 		timeradd(&atv, &time, &atv);
737 		splx(s);
738 	} else
739 		timo = 0;
740 retry:
741 	ncoll = nselcoll;
742 	p->p_flag |= P_SELECT;
743 	error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval);
744 	if (error || *retval)
745 		goto done;
746 	if (SCARG(uap, tv)) {
747 		/*
748 		 * We have to recalculate the timeout on every retry.
749 		 */
750 		timo = hzto(&atv);
751 		if (timo <= 0)
752 			goto done;
753 	}
754 	s = splhigh();
755 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
756 		splx(s);
757 		goto retry;
758 	}
759 	p->p_flag &= ~P_SELECT;
760 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
761 	splx(s);
762 	if (error == 0)
763 		goto retry;
764 done:
765 	p->p_flag &= ~P_SELECT;
766 	/* select is not restarted after signals... */
767 	if (error == ERESTART)
768 		error = EINTR;
769 	if (error == EWOULDBLOCK)
770 		error = 0;
771 #define	putbits(name, x) \
772 	if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \
773 	    (caddr_t)SCARG(uap, name), ni))) \
774 		error = error2;
775 	if (error == 0) {
776 		int error2;
777 
778 		putbits(in, 0);
779 		putbits(ou, 1);
780 		putbits(ex, 2);
781 #undef putbits
782 	}
783 
784 cleanup:
785 	if (pibits[0] != &bits[0])
786 		free(pibits[0], M_TEMP);
787 	return (error);
788 }
789 
790 int
791 selscan(p, ibits, obits, nfd, retval)
792 	struct proc *p;
793 	fd_set *ibits, *obits;
794 	int nfd;
795 	register_t *retval;
796 {
797 	caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
798 	register struct filedesc *fdp = p->p_fd;
799 	register int msk, i, j, fd;
800 	register fd_mask bits;
801 	struct file *fp;
802 	int ni, n = 0;
803 	static int flag[3] = { FREAD, FWRITE, 0 };
804 
805 	/*
806 	 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded
807 	 * up to the next byte) otherwise the fd_set's are normal sized.
808 	 */
809 	ni = sizeof(fd_set);
810 	if (nfd > FD_SETSIZE)
811 		ni = howmany(nfd, NFDBITS) * sizeof(fd_mask);
812 
813 	for (msk = 0; msk < 3; msk++) {
814 		fd_set *pibits = (fd_set *)&cibits[msk*ni];
815 		fd_set *pobits = (fd_set *)&cobits[msk*ni];
816 
817 		for (i = 0; i < nfd; i += NFDBITS) {
818 			bits = pibits->fds_bits[i/NFDBITS];
819 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
820 				bits &= ~(1 << j);
821 				fp = fdp->fd_ofiles[fd];
822 				if (fp == NULL)
823 					return (EBADF);
824 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
825 					FD_SET(fd, pobits);
826 					n++;
827 				}
828 			}
829 		}
830 	}
831 	*retval = n;
832 	return (0);
833 }
834 
835 /*ARGSUSED*/
836 int
837 seltrue(dev, flag, p)
838 	dev_t dev;
839 	int flag;
840 	struct proc *p;
841 {
842 
843 	return (1);
844 }
845 
846 /*
847  * Record a select request.
848  */
849 void
850 selrecord(selector, sip)
851 	struct proc *selector;
852 	struct selinfo *sip;
853 {
854 	struct proc *p;
855 	pid_t mypid;
856 
857 	mypid = selector->p_pid;
858 	if (sip->si_selpid == mypid)
859 		return;
860 	if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
861 	    p->p_wchan == (caddr_t)&selwait)
862 		sip->si_flags |= SI_COLL;
863 	else
864 		sip->si_selpid = mypid;
865 }
866 
867 /*
868  * Do a wakeup when a selectable event occurs.
869  */
870 void
871 selwakeup(sip)
872 	register struct selinfo *sip;
873 {
874 	register struct proc *p;
875 	int s;
876 
877 	if (sip->si_selpid == 0)
878 		return;
879 	if (sip->si_flags & SI_COLL) {
880 		nselcoll++;
881 		sip->si_flags &= ~SI_COLL;
882 		wakeup((caddr_t)&selwait);
883 	}
884 	p = pfind(sip->si_selpid);
885 	sip->si_selpid = 0;
886 	if (p != NULL) {
887 		s = splhigh();
888 		if (p->p_wchan == (caddr_t)&selwait) {
889 			if (p->p_stat == SSLEEP)
890 				setrunnable(p);
891 			else
892 				unsleep(p);
893 		} else if (p->p_flag & P_SELECT)
894 			p->p_flag &= ~P_SELECT;
895 		splx(s);
896 	}
897 }
898 
899 void
900 pollscan(p, pl, nfd, retval)
901 	struct proc *p;
902 	struct pollfd *pl;
903 	int nfd;
904 	register_t *retval;
905 {
906 	register struct filedesc *fdp = p->p_fd;
907 	register int msk, i;
908 	struct file *fp;
909 	int x, n = 0;
910 	static int flag[3] = { FREAD, FWRITE, 0 };
911 	static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR };
912 
913 	/*
914 	 * XXX: We need to implement the rest of the flags.
915 	 */
916 	for (i = 0; i < nfd; i++) {
917 		/* Check the file descriptor. */
918 		if (pl[i].fd < 0)
919 			continue;
920 		if (pl[i].fd >= fdp->fd_nfiles) {
921 			pl[i].revents = POLLNVAL;
922 			n++;
923 			continue;
924 		}
925 
926 		fp = fdp->fd_ofiles[pl[i].fd];
927 		if (fp == NULL) {
928 			pl[i].revents = POLLNVAL;
929 			n++;
930 			continue;
931 		}
932 		for (x = msk = 0; msk < 3; msk++) {
933 			if (pl[i].events & pflag[msk]) {
934 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
935 					pl[i].revents |= pflag[msk] &
936 					    pl[i].events;
937 					x++;
938 				}
939 			}
940 		}
941 		if (x)
942 			n++;
943 	}
944 	*retval = n;
945 }
946 
947 /*
948  * We are using the same mechanism as select only we encode/decode args
949  * differently.
950  */
951 int
952 sys_poll(p, v, retval)
953 	register struct proc *p;
954 	void *v;
955 	register_t *retval;
956 {
957 	struct sys_poll_args *uap = v;
958 	size_t sz;
959 	struct pollfd pfds[4], *pl = pfds;
960 	int msec = SCARG(uap, timeout);
961 	struct timeval atv;
962 	int timo, ncoll, i, s, error, error2;
963 	extern int nselcoll, selwait;
964 
965 	/* Standards say no more than MAX_OPEN; this is possibly better. */
966 	if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
967 	    maxfiles))
968 		return (EINVAL);
969 
970 	sz = sizeof(struct pollfd) * SCARG(uap, nfds);
971 
972 	/* optimize for the default case, of a small nfds value */
973 	if (sz > sizeof(pfds))
974 		pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
975 
976 	if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
977 		goto bad;
978 
979 	for (i = 0; i < SCARG(uap, nfds); i++)
980 		pl[i].revents = 0;
981 
982 	if (msec != -1) {
983 		atv.tv_sec = msec / 1000;
984 		atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
985 
986 		if (itimerfix(&atv)) {
987 			error = EINVAL;
988 			goto done;
989 		}
990 		s = splclock();
991 		timeradd(&atv, &time, &atv);
992 		splx(s);
993 	} else
994 		timo = 0;
995 
996 retry:
997 	ncoll = nselcoll;
998 	p->p_flag |= P_SELECT;
999 	pollscan(p, pl, SCARG(uap, nfds), retval);
1000 	if (*retval)
1001 		goto done;
1002 	if (msec != -1) {
1003 		/*
1004 		 * We have to recalculate the timeout on every retry.
1005 		 */
1006 		timo = hzto(&atv);
1007 		if (timo <= 0)
1008 			goto done;
1009 	}
1010 	s = splhigh();
1011 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
1012 		splx(s);
1013 		goto retry;
1014 	}
1015 	p->p_flag &= ~P_SELECT;
1016 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
1017 	splx(s);
1018 	if (error == 0)
1019 		goto retry;
1020 
1021 done:
1022 	p->p_flag &= ~P_SELECT;
1023 	/* poll is not restarted after signals... */
1024 	if (error == ERESTART)
1025 		error = EINTR;
1026 	if (error == EWOULDBLOCK)
1027 		error = 0;
1028 	if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0)
1029 		error = error2;
1030 bad:
1031 	if (pl != pfds)
1032 		free((char *) pl, M_TEMP);
1033 	return (error);
1034 }
1035 
1036