xref: /netbsd-src/sys/kern/sys_generic.c (revision eb7c1594f145c931049e1fd9eb056a5987e87e59)
1 /*	$NetBSD: sys_generic.c,v 1.77 2003/08/07 16:31:54 agc Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)sys_generic.c	8.9 (Berkeley) 2/14/95
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.77 2003/08/07 16:31:54 agc Exp $");
41 
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60 
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64 
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67 
68 /*
69  * Read system call.
70  */
71 /* ARGSUSED */
72 int
73 sys_read(struct lwp *l, void *v, register_t *retval)
74 {
75 	struct sys_read_args /* {
76 		syscallarg(int)		fd;
77 		syscallarg(void *)	buf;
78 		syscallarg(size_t)	nbyte;
79 	} */ *uap = v;
80 	int		fd;
81 	struct file	*fp;
82 	struct proc	*p;
83 	struct filedesc	*fdp;
84 
85 	fd = SCARG(uap, fd);
86 	p = l->l_proc;
87 	fdp = p->p_fd;
88 
89 	if ((fp = fd_getfile(fdp, fd)) == NULL)
90 		return (EBADF);
91 
92 	if ((fp->f_flag & FREAD) == 0) {
93 		simple_unlock(&fp->f_slock);
94 		return (EBADF);
95 	}
96 
97 	FILE_USE(fp);
98 
99 	/* dofileread() will unuse the descriptor for us */
100 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
101 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
102 }
103 
104 int
105 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
106 	off_t *offset, int flags, register_t *retval)
107 {
108 	struct uio	auio;
109 	struct iovec	aiov;
110 	size_t		cnt;
111 	int		error;
112 #ifdef KTRACE
113 	struct iovec	ktriov;
114 #endif
115 	error = 0;
116 
117 	aiov.iov_base = (caddr_t)buf;
118 	aiov.iov_len = nbyte;
119 	auio.uio_iov = &aiov;
120 	auio.uio_iovcnt = 1;
121 	auio.uio_resid = nbyte;
122 	auio.uio_rw = UIO_READ;
123 	auio.uio_segflg = UIO_USERSPACE;
124 	auio.uio_procp = p;
125 
126 	/*
127 	 * Reads return ssize_t because -1 is returned on error.  Therefore
128 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
129 	 * values.
130 	 */
131 	if (auio.uio_resid > SSIZE_MAX) {
132 		error = EINVAL;
133 		goto out;
134 	}
135 
136 #ifdef KTRACE
137 	/*
138 	 * if tracing, save a copy of iovec
139 	 */
140 	if (KTRPOINT(p, KTR_GENIO))
141 		ktriov = aiov;
142 #endif
143 	cnt = auio.uio_resid;
144 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
145 	if (error)
146 		if (auio.uio_resid != cnt && (error == ERESTART ||
147 		    error == EINTR || error == EWOULDBLOCK))
148 			error = 0;
149 	cnt -= auio.uio_resid;
150 #ifdef KTRACE
151 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
152 		ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
153 #endif
154 	*retval = cnt;
155  out:
156 	FILE_UNUSE(fp, p);
157 	return (error);
158 }
159 
160 /*
161  * Scatter read system call.
162  */
163 int
164 sys_readv(struct lwp *l, void *v, register_t *retval)
165 {
166 	struct sys_readv_args /* {
167 		syscallarg(int)				fd;
168 		syscallarg(const struct iovec *)	iovp;
169 		syscallarg(int)				iovcnt;
170 	} */ *uap = v;
171 	int		fd;
172 	struct file	*fp;
173 	struct proc	*p;
174 	struct filedesc	*fdp;
175 
176 	fd = SCARG(uap, fd);
177 	p = l->l_proc;
178 	fdp = p->p_fd;
179 
180 	if ((fp = fd_getfile(fdp, fd)) == NULL)
181 		return (EBADF);
182 
183 	if ((fp->f_flag & FREAD) == 0) {
184 		simple_unlock(&fp->f_slock);
185 		return (EBADF);
186 	}
187 
188 	FILE_USE(fp);
189 
190 	/* dofilereadv() will unuse the descriptor for us */
191 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
192 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
193 }
194 
195 int
196 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
197 	int iovcnt, off_t *offset, int flags, register_t *retval)
198 {
199 	struct uio	auio;
200 	struct iovec	*iov, *needfree, aiov[UIO_SMALLIOV];
201 	int		i, error;
202 	size_t		cnt;
203 	u_int		iovlen;
204 #ifdef KTRACE
205 	struct iovec	*ktriov;
206 #endif
207 
208 	error = 0;
209 #ifdef KTRACE
210 	ktriov = NULL;
211 #endif
212 	/* note: can't use iovlen until iovcnt is validated */
213 	iovlen = iovcnt * sizeof(struct iovec);
214 	if ((u_int)iovcnt > UIO_SMALLIOV) {
215 		if ((u_int)iovcnt > IOV_MAX) {
216 			error = EINVAL;
217 			goto out;
218 		}
219 		iov = malloc(iovlen, M_IOV, M_WAITOK);
220 		needfree = iov;
221 	} else if ((u_int)iovcnt > 0) {
222 		iov = aiov;
223 		needfree = NULL;
224 	} else {
225 		error = EINVAL;
226 		goto out;
227 	}
228 
229 	auio.uio_iov = iov;
230 	auio.uio_iovcnt = iovcnt;
231 	auio.uio_rw = UIO_READ;
232 	auio.uio_segflg = UIO_USERSPACE;
233 	auio.uio_procp = p;
234 	error = copyin(iovp, iov, iovlen);
235 	if (error)
236 		goto done;
237 	auio.uio_resid = 0;
238 	for (i = 0; i < iovcnt; i++) {
239 		auio.uio_resid += iov->iov_len;
240 		/*
241 		 * Reads return ssize_t because -1 is returned on error.
242 		 * Therefore we must restrict the length to SSIZE_MAX to
243 		 * avoid garbage return values.
244 		 */
245 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
246 			error = EINVAL;
247 			goto done;
248 		}
249 		iov++;
250 	}
251 #ifdef KTRACE
252 	/*
253 	 * if tracing, save a copy of iovec
254 	 */
255 	if (KTRPOINT(p, KTR_GENIO))  {
256 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
257 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
258 	}
259 #endif
260 	cnt = auio.uio_resid;
261 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
262 	if (error)
263 		if (auio.uio_resid != cnt && (error == ERESTART ||
264 		    error == EINTR || error == EWOULDBLOCK))
265 			error = 0;
266 	cnt -= auio.uio_resid;
267 #ifdef KTRACE
268 	if (ktriov != NULL) {
269 		if (error == 0)
270 			ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
271 		free(ktriov, M_TEMP);
272 	}
273 #endif
274 	*retval = cnt;
275  done:
276 	if (needfree)
277 		free(needfree, M_IOV);
278  out:
279 	FILE_UNUSE(fp, p);
280 	return (error);
281 }
282 
283 /*
284  * Write system call
285  */
286 int
287 sys_write(struct lwp *l, void *v, register_t *retval)
288 {
289 	struct sys_write_args /* {
290 		syscallarg(int)			fd;
291 		syscallarg(const void *)	buf;
292 		syscallarg(size_t)		nbyte;
293 	} */ *uap = v;
294 	int		fd;
295 	struct file	*fp;
296 	struct proc	*p;
297 	struct filedesc	*fdp;
298 
299 	fd = SCARG(uap, fd);
300 	p = l->l_proc;
301 	fdp = p->p_fd;
302 
303 	if ((fp = fd_getfile(fdp, fd)) == NULL)
304 		return (EBADF);
305 
306 	if ((fp->f_flag & FWRITE) == 0) {
307 		simple_unlock(&fp->f_slock);
308 		return (EBADF);
309 	}
310 
311 	FILE_USE(fp);
312 
313 	/* dofilewrite() will unuse the descriptor for us */
314 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
315 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
316 }
317 
318 int
319 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
320 	size_t nbyte, off_t *offset, int flags, register_t *retval)
321 {
322 	struct uio	auio;
323 	struct iovec	aiov;
324 	size_t		cnt;
325 	int		error;
326 #ifdef KTRACE
327 	struct iovec	ktriov;
328 #endif
329 
330 	error = 0;
331 	aiov.iov_base = (caddr_t)buf;		/* XXX kills const */
332 	aiov.iov_len = nbyte;
333 	auio.uio_iov = &aiov;
334 	auio.uio_iovcnt = 1;
335 	auio.uio_resid = nbyte;
336 	auio.uio_rw = UIO_WRITE;
337 	auio.uio_segflg = UIO_USERSPACE;
338 	auio.uio_procp = p;
339 
340 	/*
341 	 * Writes return ssize_t because -1 is returned on error.  Therefore
342 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
343 	 * values.
344 	 */
345 	if (auio.uio_resid > SSIZE_MAX) {
346 		error = EINVAL;
347 		goto out;
348 	}
349 
350 #ifdef KTRACE
351 	/*
352 	 * if tracing, save a copy of iovec
353 	 */
354 	if (KTRPOINT(p, KTR_GENIO))
355 		ktriov = aiov;
356 #endif
357 	cnt = auio.uio_resid;
358 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
359 	if (error) {
360 		if (auio.uio_resid != cnt && (error == ERESTART ||
361 		    error == EINTR || error == EWOULDBLOCK))
362 			error = 0;
363 		if (error == EPIPE)
364 			psignal(p, SIGPIPE);
365 	}
366 	cnt -= auio.uio_resid;
367 #ifdef KTRACE
368 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
369 		ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
370 #endif
371 	*retval = cnt;
372  out:
373 	FILE_UNUSE(fp, p);
374 	return (error);
375 }
376 
377 /*
378  * Gather write system call
379  */
380 int
381 sys_writev(struct lwp *l, void *v, register_t *retval)
382 {
383 	struct sys_writev_args /* {
384 		syscallarg(int)				fd;
385 		syscallarg(const struct iovec *)	iovp;
386 		syscallarg(int)				iovcnt;
387 	} */ *uap = v;
388 	int		fd;
389 	struct file	*fp;
390 	struct proc	*p;
391 	struct filedesc	*fdp;
392 
393 	fd = SCARG(uap, fd);
394 	p = l->l_proc;
395 	fdp = p->p_fd;
396 
397 	if ((fp = fd_getfile(fdp, fd)) == NULL)
398 		return (EBADF);
399 
400 	if ((fp->f_flag & FWRITE) == 0) {
401 		simple_unlock(&fp->f_slock);
402 		return (EBADF);
403 	}
404 
405 	FILE_USE(fp);
406 
407 	/* dofilewritev() will unuse the descriptor for us */
408 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
409 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
410 }
411 
412 int
413 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
414 	int iovcnt, off_t *offset, int flags, register_t *retval)
415 {
416 	struct uio	auio;
417 	struct iovec	*iov, *needfree, aiov[UIO_SMALLIOV];
418 	int		i, error;
419 	size_t		cnt;
420 	u_int		iovlen;
421 #ifdef KTRACE
422 	struct iovec	*ktriov;
423 #endif
424 
425 	error = 0;
426 #ifdef KTRACE
427 	ktriov = NULL;
428 #endif
429 	/* note: can't use iovlen until iovcnt is validated */
430 	iovlen = iovcnt * sizeof(struct iovec);
431 	if ((u_int)iovcnt > UIO_SMALLIOV) {
432 		if ((u_int)iovcnt > IOV_MAX) {
433 			error = EINVAL;
434 			goto out;
435 		}
436 		iov = malloc(iovlen, M_IOV, M_WAITOK);
437 		needfree = iov;
438 	} else if ((u_int)iovcnt > 0) {
439 		iov = aiov;
440 		needfree = NULL;
441 	} else {
442 		error = EINVAL;
443 		goto out;
444 	}
445 
446 	auio.uio_iov = iov;
447 	auio.uio_iovcnt = iovcnt;
448 	auio.uio_rw = UIO_WRITE;
449 	auio.uio_segflg = UIO_USERSPACE;
450 	auio.uio_procp = p;
451 	error = copyin(iovp, iov, iovlen);
452 	if (error)
453 		goto done;
454 	auio.uio_resid = 0;
455 	for (i = 0; i < iovcnt; i++) {
456 		auio.uio_resid += iov->iov_len;
457 		/*
458 		 * Writes return ssize_t because -1 is returned on error.
459 		 * Therefore we must restrict the length to SSIZE_MAX to
460 		 * avoid garbage return values.
461 		 */
462 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
463 			error = EINVAL;
464 			goto done;
465 		}
466 		iov++;
467 	}
468 #ifdef KTRACE
469 	/*
470 	 * if tracing, save a copy of iovec
471 	 */
472 	if (KTRPOINT(p, KTR_GENIO))  {
473 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
474 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
475 	}
476 #endif
477 	cnt = auio.uio_resid;
478 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
479 	if (error) {
480 		if (auio.uio_resid != cnt && (error == ERESTART ||
481 		    error == EINTR || error == EWOULDBLOCK))
482 			error = 0;
483 		if (error == EPIPE)
484 			psignal(p, SIGPIPE);
485 	}
486 	cnt -= auio.uio_resid;
487 #ifdef KTRACE
488 	if (KTRPOINT(p, KTR_GENIO))
489 		if (error == 0) {
490 			ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
491 		free(ktriov, M_TEMP);
492 	}
493 #endif
494 	*retval = cnt;
495  done:
496 	if (needfree)
497 		free(needfree, M_IOV);
498  out:
499 	FILE_UNUSE(fp, p);
500 	return (error);
501 }
502 
503 /*
504  * Ioctl system call
505  */
506 /* ARGSUSED */
507 int
508 sys_ioctl(struct lwp *l, void *v, register_t *retval)
509 {
510 	struct sys_ioctl_args /* {
511 		syscallarg(int)		fd;
512 		syscallarg(u_long)	com;
513 		syscallarg(caddr_t)	data;
514 	} */ *uap = v;
515 	struct file	*fp;
516 	struct proc	*p;
517 	struct filedesc	*fdp;
518 	u_long		com;
519 	int		error;
520 	u_int		size;
521 	caddr_t		data, memp;
522 	int		tmp;
523 #define	STK_PARAMS	128
524 	u_long		stkbuf[STK_PARAMS/sizeof(u_long)];
525 
526 	error = 0;
527 	p = l->l_proc;
528 	fdp = p->p_fd;
529 
530 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
531 		return (EBADF);
532 
533 	FILE_USE(fp);
534 
535 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
536 		error = EBADF;
537 		com = 0;
538 		goto out;
539 	}
540 
541 	switch (com = SCARG(uap, com)) {
542 	case FIONCLEX:
543 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
544 		goto out;
545 
546 	case FIOCLEX:
547 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
548 		goto out;
549 	}
550 
551 	/*
552 	 * Interpret high order word to find amount of data to be
553 	 * copied to/from the user's address space.
554 	 */
555 	size = IOCPARM_LEN(com);
556 	if (size > IOCPARM_MAX) {
557 		error = ENOTTY;
558 		goto out;
559 	}
560 	memp = NULL;
561 	if (size > sizeof(stkbuf)) {
562 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
563 		data = memp;
564 	} else
565 		data = (caddr_t)stkbuf;
566 	if (com&IOC_IN) {
567 		if (size) {
568 			error = copyin(SCARG(uap, data), data, size);
569 			if (error) {
570 				if (memp)
571 					free(memp, M_IOCTLOPS);
572 				goto out;
573 			}
574 #ifdef KTRACE
575 			if (KTRPOINT(p, KTR_GENIO)) {
576 				struct iovec iov;
577 				iov.iov_base = SCARG(uap, data);
578 				iov.iov_len = size;
579 				ktrgenio(p, SCARG(uap, fd), UIO_WRITE, &iov,
580 					size, 0);
581 			}
582 #endif
583 		} else
584 			*(caddr_t *)data = SCARG(uap, data);
585 	} else if ((com&IOC_OUT) && size)
586 		/*
587 		 * Zero the buffer so the user always
588 		 * gets back something deterministic.
589 		 */
590 		memset(data, 0, size);
591 	else if (com&IOC_VOID)
592 		*(caddr_t *)data = SCARG(uap, data);
593 
594 	switch (com) {
595 
596 	case FIONBIO:
597 		if ((tmp = *(int *)data) != 0)
598 			fp->f_flag |= FNONBLOCK;
599 		else
600 			fp->f_flag &= ~FNONBLOCK;
601 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
602 		break;
603 
604 	case FIOASYNC:
605 		if ((tmp = *(int *)data) != 0)
606 			fp->f_flag |= FASYNC;
607 		else
608 			fp->f_flag &= ~FASYNC;
609 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
610 		break;
611 
612 	case FIOSETOWN:
613 		tmp = *(int *)data;
614 		if (fp->f_type == DTYPE_SOCKET) {
615 			((struct socket *)fp->f_data)->so_pgid = tmp;
616 			error = 0;
617 			break;
618 		}
619 		if (tmp <= 0) {
620 			tmp = -tmp;
621 		} else {
622 			struct proc *p1 = pfind(tmp);
623 			if (p1 == 0) {
624 				error = ESRCH;
625 				break;
626 			}
627 			tmp = p1->p_pgrp->pg_id;
628 		}
629 		error = (*fp->f_ops->fo_ioctl)
630 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
631 		break;
632 
633 	case FIOGETOWN:
634 		if (fp->f_type == DTYPE_SOCKET) {
635 			error = 0;
636 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
637 			break;
638 		}
639 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
640 		if (error == 0)
641 			*(int *)data = -*(int *)data;
642 		break;
643 
644 	default:
645 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
646 		/*
647 		 * Copy any data to user, size was
648 		 * already set and checked above.
649 		 */
650 		if (error == 0 && (com&IOC_OUT) && size) {
651 			error = copyout(data, SCARG(uap, data), size);
652 #ifdef KTRACE
653 			if (KTRPOINT(p, KTR_GENIO)) {
654 				struct iovec iov;
655 				iov.iov_base = SCARG(uap, data);
656 				iov.iov_len = size;
657 				ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov,
658 					size, error);
659 			}
660 #endif
661 		}
662 		break;
663 	}
664 	if (memp)
665 		free(memp, M_IOCTLOPS);
666  out:
667 	FILE_UNUSE(fp, p);
668 	switch (error) {
669 	case -1:
670 		printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
671 		    "pid=%d comm=%s\n",
672 		    (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
673 		    (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
674 		    p->p_pid, p->p_comm);
675 		/* FALLTHROUGH */
676 	case EPASSTHROUGH:
677 		error = ENOTTY;
678 		/* FALLTHROUGH */
679 	default:
680 		return (error);
681 	}
682 }
683 
684 int	selwait, nselcoll;
685 
686 /*
687  * Select system call.
688  */
689 int
690 sys_select(struct lwp *l, void *v, register_t *retval)
691 {
692 	struct sys_select_args /* {
693 		syscallarg(int)			nd;
694 		syscallarg(fd_set *)		in;
695 		syscallarg(fd_set *)		ou;
696 		syscallarg(fd_set *)		ex;
697 		syscallarg(struct timeval *)	tv;
698 	} */ *uap = v;
699 	struct proc	*p;
700 	caddr_t		bits;
701 	char		smallbits[howmany(FD_SETSIZE, NFDBITS) *
702 			    sizeof(fd_mask) * 6];
703 	struct		timeval atv;
704 	int		s, ncoll, error, timo;
705 	size_t		ni;
706 
707 	error = 0;
708 	p = l->l_proc;
709 	if (SCARG(uap, nd) < 0)
710 		return (EINVAL);
711 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
712 		/* forgiving; slightly wrong */
713 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
714 	}
715 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
716 	if (ni * 6 > sizeof(smallbits))
717 		bits = malloc(ni * 6, M_TEMP, M_WAITOK);
718 	else
719 		bits = smallbits;
720 
721 #define	getbits(name, x)						\
722 	if (SCARG(uap, name)) {						\
723 		error = copyin(SCARG(uap, name), bits + ni * x, ni);	\
724 		if (error)						\
725 			goto done;					\
726 	} else								\
727 		memset(bits + ni * x, 0, ni);
728 	getbits(in, 0);
729 	getbits(ou, 1);
730 	getbits(ex, 2);
731 #undef	getbits
732 
733 	timo = 0;
734 	if (SCARG(uap, tv)) {
735 		error = copyin(SCARG(uap, tv), (caddr_t)&atv,
736 			sizeof(atv));
737 		if (error)
738 			goto done;
739 		if (itimerfix(&atv)) {
740 			error = EINVAL;
741 			goto done;
742 		}
743 		s = splclock();
744 		timeradd(&atv, &time, &atv);
745 		splx(s);
746 	}
747 
748  retry:
749 	ncoll = nselcoll;
750 	l->l_flag |= L_SELECT;
751 	error = selscan(p, (fd_mask *)(bits + ni * 0),
752 			   (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
753 	if (error || *retval)
754 		goto done;
755 	if (SCARG(uap, tv)) {
756 		/*
757 		 * We have to recalculate the timeout on every retry.
758 		 */
759 		timo = hzto(&atv);
760 		if (timo <= 0)
761 			goto done;
762 	}
763 	s = splsched();
764 	if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
765 		splx(s);
766 		goto retry;
767 	}
768 	l->l_flag &= ~L_SELECT;
769 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
770 	splx(s);
771 	if (error == 0)
772 		goto retry;
773  done:
774 	l->l_flag &= ~L_SELECT;
775 	/* select is not restarted after signals... */
776 	if (error == ERESTART)
777 		error = EINTR;
778 	if (error == EWOULDBLOCK)
779 		error = 0;
780 	if (error == 0) {
781 
782 #define	putbits(name, x)						\
783 		if (SCARG(uap, name)) {					\
784 			error = copyout(bits + ni * x, SCARG(uap, name), ni); \
785 			if (error)					\
786 				goto out;				\
787 		}
788 		putbits(in, 3);
789 		putbits(ou, 4);
790 		putbits(ex, 5);
791 #undef putbits
792 	}
793  out:
794 	if (ni * 6 > sizeof(smallbits))
795 		free(bits, M_TEMP);
796 	return (error);
797 }
798 
799 int
800 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
801 	register_t *retval)
802 {
803 	struct filedesc	*fdp;
804 	int		msk, i, j, fd, n;
805 	fd_mask		ibits, obits;
806 	struct file	*fp;
807 	static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
808 			       POLLWRNORM | POLLHUP | POLLERR,
809 			       POLLRDBAND };
810 
811 	fdp = p->p_fd;
812 	n = 0;
813 	for (msk = 0; msk < 3; msk++) {
814 		for (i = 0; i < nfd; i += NFDBITS) {
815 			ibits = *ibitp++;
816 			obits = 0;
817 			while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
818 				ibits &= ~(1 << j);
819 				if ((fp = fd_getfile(fdp, fd)) == NULL)
820 					return (EBADF);
821 				FILE_USE(fp);
822 				if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
823 					obits |= (1 << j);
824 					n++;
825 				}
826 				FILE_UNUSE(fp, p);
827 			}
828 			*obitp++ = obits;
829 		}
830 	}
831 	*retval = n;
832 	return (0);
833 }
834 
835 /*
836  * Poll system call.
837  */
838 int
839 sys_poll(struct lwp *l, void *v, register_t *retval)
840 {
841 	struct sys_poll_args /* {
842 		syscallarg(struct pollfd *)	fds;
843 		syscallarg(u_int)		nfds;
844 		syscallarg(int)			timeout;
845 	} */ *uap = v;
846 	struct proc	*p;
847 	caddr_t		bits;
848 	char		smallbits[32 * sizeof(struct pollfd)];
849 	struct timeval	atv;
850 	int		s, ncoll, error, timo;
851 	size_t		ni;
852 
853 	error = 0;
854 	p = l->l_proc;
855 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
856 		/* forgiving; slightly wrong */
857 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
858 	}
859 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
860 	if (ni > sizeof(smallbits))
861 		bits = malloc(ni, M_TEMP, M_WAITOK);
862 	else
863 		bits = smallbits;
864 
865 	error = copyin(SCARG(uap, fds), bits, ni);
866 	if (error)
867 		goto done;
868 
869 	timo = 0;
870 	if (SCARG(uap, timeout) != INFTIM) {
871 		atv.tv_sec = SCARG(uap, timeout) / 1000;
872 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
873 		if (itimerfix(&atv)) {
874 			error = EINVAL;
875 			goto done;
876 		}
877 		s = splclock();
878 		timeradd(&atv, &time, &atv);
879 		splx(s);
880 	}
881 
882  retry:
883 	ncoll = nselcoll;
884 	l->l_flag |= L_SELECT;
885 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
886 	if (error || *retval)
887 		goto done;
888 	if (SCARG(uap, timeout) != INFTIM) {
889 		/*
890 		 * We have to recalculate the timeout on every retry.
891 		 */
892 		timo = hzto(&atv);
893 		if (timo <= 0)
894 			goto done;
895 	}
896 	s = splsched();
897 	if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
898 		splx(s);
899 		goto retry;
900 	}
901 	l->l_flag &= ~L_SELECT;
902 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
903 	splx(s);
904 	if (error == 0)
905 		goto retry;
906  done:
907 	l->l_flag &= ~L_SELECT;
908 	/* poll is not restarted after signals... */
909 	if (error == ERESTART)
910 		error = EINTR;
911 	if (error == EWOULDBLOCK)
912 		error = 0;
913 	if (error == 0) {
914 		error = copyout(bits, SCARG(uap, fds), ni);
915 		if (error)
916 			goto out;
917 	}
918  out:
919 	if (ni > sizeof(smallbits))
920 		free(bits, M_TEMP);
921 	return (error);
922 }
923 
924 int
925 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
926 {
927 	struct filedesc	*fdp;
928 	int		i, n;
929 	struct file	*fp;
930 
931 	fdp = p->p_fd;
932 	n = 0;
933 	for (i = 0; i < nfd; i++, fds++) {
934 		if (fds->fd >= fdp->fd_nfiles) {
935 			fds->revents = POLLNVAL;
936 			n++;
937 		} else if (fds->fd < 0) {
938 			fds->revents = 0;
939 		} else {
940 			if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
941 				fds->revents = POLLNVAL;
942 				n++;
943 			} else {
944 				FILE_USE(fp);
945 				fds->revents = (*fp->f_ops->fo_poll)(fp,
946 				    fds->events | POLLERR | POLLHUP, p);
947 				if (fds->revents != 0)
948 					n++;
949 				FILE_UNUSE(fp, p);
950 			}
951 		}
952 	}
953 	*retval = n;
954 	return (0);
955 }
956 
957 /*ARGSUSED*/
958 int
959 seltrue(dev_t dev, int events, struct proc *p)
960 {
961 
962 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
963 }
964 
965 /*
966  * Record a select request.
967  */
968 void
969 selrecord(struct proc *selector, struct selinfo *sip)
970 {
971 	struct lwp	*l;
972 	struct proc	*p;
973 	pid_t		mypid;
974 
975 	mypid = selector->p_pid;
976 	if (sip->sel_pid == mypid)
977 		return;
978 	if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
979 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
980 			if (l->l_wchan == (caddr_t)&selwait) {
981 				sip->sel_collision = 1;
982 				return;
983 			}
984 		}
985 	}
986 
987 	sip->sel_pid = mypid;
988 }
989 
990 /*
991  * Do a wakeup when a selectable event occurs.
992  */
993 void
994 selwakeup(sip)
995 	struct selinfo *sip;
996 {
997 	struct lwp *l;
998 	struct proc *p;
999 	int s;
1000 
1001 	if (sip->sel_pid == 0)
1002 		return;
1003 	if (sip->sel_collision) {
1004 		sip->sel_pid = 0;
1005 		nselcoll++;
1006 		sip->sel_collision = 0;
1007 		wakeup((caddr_t)&selwait);
1008 		return;
1009 	}
1010 	p = pfind(sip->sel_pid);
1011 	sip->sel_pid = 0;
1012 	if (p != NULL) {
1013 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1014 			SCHED_LOCK(s);
1015 			if (l->l_wchan == (caddr_t)&selwait) {
1016 				if (l->l_stat == LSSLEEP)
1017 					setrunnable(l);
1018 				else
1019 					unsleep(l);
1020 			} else if (l->l_flag & L_SELECT)
1021 				l->l_flag &= ~L_SELECT;
1022 			SCHED_UNLOCK(s);
1023 		}
1024 	}
1025 }
1026