xref: /openbsd-src/sys/kern/sys_generic.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /*	$OpenBSD: sys_generic.c,v 1.147 2022/02/08 08:56:41 visa Exp $	*/
2 /*	$NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1996 Theo de Raadt
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/filedesc.h>
44 #include <sys/ioctl.h>
45 #include <sys/fcntl.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/time.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #include <sys/eventvar.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62 #include <sys/sched.h>
63 #include <sys/pledge.h>
64 
65 #include <sys/mount.h>
66 #include <sys/syscallargs.h>
67 
68 #include <uvm/uvm_extern.h>
69 
70 /*
71  * Debug values:
72  *  1 - print implementation errors, things that should not happen.
73  *  2 - print ppoll(2) information, somewhat verbose
74  *  3 - print pselect(2) and ppoll(2) information, very verbose
75  */
76 int kqpoll_debug = 0;
77 #define DPRINTFN(v, x...) if (kqpoll_debug > v) {			\
78 	printf("%s(%d): ", curproc->p_p->ps_comm, curproc->p_tid);	\
79 	printf(x);							\
80 }
81 
82 int pselregister(struct proc *, fd_set *[], fd_set *[], int, int *, int *);
83 int pselcollect(struct proc *, struct kevent *, fd_set *[], int *);
84 void ppollregister(struct proc *, struct pollfd *, int, int *, int *);
85 int ppollcollect(struct proc *, struct kevent *, struct pollfd *, u_int);
86 
87 int pollout(struct pollfd *, struct pollfd *, u_int);
88 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *,
89     struct timespec *, const sigset_t *, register_t *);
90 int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *,
91     const sigset_t *, register_t *);
92 void doselwakeup(struct selinfo *);
93 
94 int
95 iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov,
96     unsigned int iovcnt, size_t *residp)
97 {
98 #ifdef KTRACE
99 	struct proc *p = curproc;
100 #endif
101 	struct iovec *iov;
102 	int error, i;
103 	size_t resid = 0;
104 
105 	if (iovcnt > UIO_SMALLIOV) {
106 		if (iovcnt > IOV_MAX)
107 			return (EINVAL);
108 		iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK);
109 	} else if (iovcnt > 0) {
110 		iov = aiov;
111 	} else {
112 		return (EINVAL);
113 	}
114 	*iovp = iov;
115 
116 	if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov))))
117 		return (error);
118 
119 #ifdef KTRACE
120 	if (KTRPOINT(p, KTR_STRUCT))
121 		ktriovec(p, iov, iovcnt);
122 #endif
123 
124 	for (i = 0; i < iovcnt; i++) {
125 		resid += iov->iov_len;
126 		/*
127 		 * Writes return ssize_t because -1 is returned on error.
128 		 * Therefore we must restrict the length to SSIZE_MAX to
129 		 * avoid garbage return values.  Note that the addition is
130 		 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX.
131 		 */
132 		if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX)
133 			return (EINVAL);
134 		iov++;
135 	}
136 
137 	if (residp != NULL)
138 		*residp = resid;
139 
140 	return (0);
141 }
142 
143 void
144 iovec_free(struct iovec *iov, unsigned int iovcnt)
145 {
146 	if (iovcnt > UIO_SMALLIOV)
147 		free(iov, M_IOV, iovcnt * sizeof(*iov));
148 }
149 
150 /*
151  * Read system call.
152  */
153 int
154 sys_read(struct proc *p, void *v, register_t *retval)
155 {
156 	struct sys_read_args /* {
157 		syscallarg(int) fd;
158 		syscallarg(void *) buf;
159 		syscallarg(size_t) nbyte;
160 	} */ *uap = v;
161 	struct iovec iov;
162 	struct uio auio;
163 
164 	iov.iov_base = SCARG(uap, buf);
165 	iov.iov_len = SCARG(uap, nbyte);
166 	if (iov.iov_len > SSIZE_MAX)
167 		return (EINVAL);
168 
169 	auio.uio_iov = &iov;
170 	auio.uio_iovcnt = 1;
171 	auio.uio_resid = iov.iov_len;
172 
173 	return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval));
174 }
175 
176 /*
177  * Scatter read system call.
178  */
179 int
180 sys_readv(struct proc *p, void *v, register_t *retval)
181 {
182 	struct sys_readv_args /* {
183 		syscallarg(int) fd;
184 		syscallarg(const struct iovec *) iovp;
185 		syscallarg(int) iovcnt;
186 	} */ *uap = v;
187 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
188 	int error, iovcnt = SCARG(uap, iovcnt);
189 	struct uio auio;
190 	size_t resid;
191 
192 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
193 	if (error)
194 		goto done;
195 
196 	auio.uio_iov = iov;
197 	auio.uio_iovcnt = iovcnt;
198 	auio.uio_resid = resid;
199 
200 	error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval);
201  done:
202 	iovec_free(iov, iovcnt);
203 	return (error);
204 }
205 
206 int
207 dofilereadv(struct proc *p, int fd, struct uio *uio, int flags,
208     register_t *retval)
209 {
210 	struct filedesc *fdp = p->p_fd;
211 	struct file *fp;
212 	long cnt, error = 0;
213 	u_int iovlen;
214 #ifdef KTRACE
215 	struct iovec *ktriov = NULL;
216 #endif
217 
218 	KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0);
219 	iovlen = uio->uio_iovcnt * sizeof(struct iovec);
220 
221 	if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL)
222 		return (EBADF);
223 
224 	/* Checks for positioned read. */
225 	if (flags & FO_POSITION) {
226 		struct vnode *vp = fp->f_data;
227 
228 		if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
229 		    (vp->v_flag & VISTTY)) {
230 			error = ESPIPE;
231 			goto done;
232 		}
233 
234 		if (uio->uio_offset < 0 && vp->v_type != VCHR) {
235 			error = EINVAL;
236 			goto done;
237 		}
238 	}
239 
240 	uio->uio_rw = UIO_READ;
241 	uio->uio_segflg = UIO_USERSPACE;
242 	uio->uio_procp = p;
243 #ifdef KTRACE
244 	/*
245 	 * if tracing, save a copy of iovec
246 	 */
247 	if (KTRPOINT(p, KTR_GENIO)) {
248 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
249 		memcpy(ktriov, uio->uio_iov, iovlen);
250 	}
251 #endif
252 	cnt = uio->uio_resid;
253 	error = (*fp->f_ops->fo_read)(fp, uio, flags);
254 	if (error) {
255 		if (uio->uio_resid != cnt && (error == ERESTART ||
256 		    error == EINTR || error == EWOULDBLOCK))
257 			error = 0;
258 	}
259 	cnt -= uio->uio_resid;
260 
261 	mtx_enter(&fp->f_mtx);
262 	fp->f_rxfer++;
263 	fp->f_rbytes += cnt;
264 	mtx_leave(&fp->f_mtx);
265 #ifdef KTRACE
266 	if (ktriov != NULL) {
267 		if (error == 0)
268 			ktrgenio(p, fd, UIO_READ, ktriov, cnt);
269 		free(ktriov, M_TEMP, iovlen);
270 	}
271 #endif
272 	*retval = cnt;
273  done:
274 	FRELE(fp, p);
275 	return (error);
276 }
277 
278 /*
279  * Write system call
280  */
281 int
282 sys_write(struct proc *p, void *v, register_t *retval)
283 {
284 	struct sys_write_args /* {
285 		syscallarg(int) fd;
286 		syscallarg(const void *) buf;
287 		syscallarg(size_t) nbyte;
288 	} */ *uap = v;
289 	struct iovec iov;
290 	struct uio auio;
291 
292 	iov.iov_base = (void *)SCARG(uap, buf);
293 	iov.iov_len = SCARG(uap, nbyte);
294 	if (iov.iov_len > SSIZE_MAX)
295 		return (EINVAL);
296 
297 	auio.uio_iov = &iov;
298 	auio.uio_iovcnt = 1;
299 	auio.uio_resid = iov.iov_len;
300 
301 	return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval));
302 }
303 
304 /*
305  * Gather write system call
306  */
307 int
308 sys_writev(struct proc *p, void *v, register_t *retval)
309 {
310 	struct sys_writev_args /* {
311 		syscallarg(int) fd;
312 		syscallarg(const struct iovec *) iovp;
313 		syscallarg(int) iovcnt;
314 	} */ *uap = v;
315 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
316 	int error, iovcnt = SCARG(uap, iovcnt);
317 	struct uio auio;
318 	size_t resid;
319 
320 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
321 	if (error)
322 		goto done;
323 
324 	auio.uio_iov = iov;
325 	auio.uio_iovcnt = iovcnt;
326 	auio.uio_resid = resid;
327 
328 	error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval);
329  done:
330 	iovec_free(iov, iovcnt);
331  	return (error);
332 }
333 
334 int
335 dofilewritev(struct proc *p, int fd, struct uio *uio, int flags,
336     register_t *retval)
337 {
338 	struct filedesc *fdp = p->p_fd;
339 	struct file *fp;
340 	long cnt, error = 0;
341 	u_int iovlen;
342 #ifdef KTRACE
343 	struct iovec *ktriov = NULL;
344 #endif
345 
346 	KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0);
347 	iovlen = uio->uio_iovcnt * sizeof(struct iovec);
348 
349 	if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL)
350 		return (EBADF);
351 
352 	/* Checks for positioned write. */
353 	if (flags & FO_POSITION) {
354 		struct vnode *vp = fp->f_data;
355 
356 		if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO ||
357 		    (vp->v_flag & VISTTY)) {
358 			error = ESPIPE;
359 			goto done;
360 		}
361 
362 		if (uio->uio_offset < 0 && vp->v_type != VCHR) {
363 			error = EINVAL;
364 			goto done;
365 		}
366 	}
367 
368 	uio->uio_rw = UIO_WRITE;
369 	uio->uio_segflg = UIO_USERSPACE;
370 	uio->uio_procp = p;
371 #ifdef KTRACE
372 	/*
373 	 * if tracing, save a copy of iovec
374 	 */
375 	if (KTRPOINT(p, KTR_GENIO)) {
376 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
377 		memcpy(ktriov, uio->uio_iov, iovlen);
378 	}
379 #endif
380 	cnt = uio->uio_resid;
381 	error = (*fp->f_ops->fo_write)(fp, uio, flags);
382 	if (error) {
383 		if (uio->uio_resid != cnt && (error == ERESTART ||
384 		    error == EINTR || error == EWOULDBLOCK))
385 			error = 0;
386 		if (error == EPIPE) {
387 			KERNEL_LOCK();
388 			ptsignal(p, SIGPIPE, STHREAD);
389 			KERNEL_UNLOCK();
390 		}
391 	}
392 	cnt -= uio->uio_resid;
393 
394 	mtx_enter(&fp->f_mtx);
395 	fp->f_wxfer++;
396 	fp->f_wbytes += cnt;
397 	mtx_leave(&fp->f_mtx);
398 #ifdef KTRACE
399 	if (ktriov != NULL) {
400 		if (error == 0)
401 			ktrgenio(p, fd, UIO_WRITE, ktriov, cnt);
402 		free(ktriov, M_TEMP, iovlen);
403 	}
404 #endif
405 	*retval = cnt;
406  done:
407 	FRELE(fp, p);
408 	return (error);
409 }
410 
411 /*
412  * Ioctl system call
413  */
414 int
415 sys_ioctl(struct proc *p, void *v, register_t *retval)
416 {
417 	struct sys_ioctl_args /* {
418 		syscallarg(int) fd;
419 		syscallarg(u_long) com;
420 		syscallarg(void *) data;
421 	} */ *uap = v;
422 	struct file *fp;
423 	struct filedesc *fdp = p->p_fd;
424 	u_long com = SCARG(uap, com);
425 	int error = 0;
426 	u_int size = 0;
427 	caddr_t data, memp = NULL;
428 	int tmp;
429 #define STK_PARAMS	128
430 	long long stkbuf[STK_PARAMS / sizeof(long long)];
431 
432 	if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL)
433 		return (EBADF);
434 
435 	if (fp->f_type == DTYPE_SOCKET) {
436 		struct socket *so = fp->f_data;
437 
438 		if (so->so_state & SS_DNS) {
439 			error = EINVAL;
440 			goto out;
441 		}
442 	}
443 
444 	error = pledge_ioctl(p, com, fp);
445 	if (error)
446 		goto out;
447 
448 	switch (com) {
449 	case FIONCLEX:
450 	case FIOCLEX:
451 		fdplock(fdp);
452 		if (com == FIONCLEX)
453 			fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
454 		else
455 			fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
456 		fdpunlock(fdp);
457 		goto out;
458 	}
459 
460 	/*
461 	 * Interpret high order word to find amount of data to be
462 	 * copied to/from the user's address space.
463 	 */
464 	size = IOCPARM_LEN(com);
465 	if (size > IOCPARM_MAX) {
466 		error = ENOTTY;
467 		goto out;
468 	}
469 	if (size > sizeof (stkbuf)) {
470 		memp = malloc(size, M_IOCTLOPS, M_WAITOK);
471 		data = memp;
472 	} else
473 		data = (caddr_t)stkbuf;
474 	if (com&IOC_IN) {
475 		if (size) {
476 			error = copyin(SCARG(uap, data), data, size);
477 			if (error) {
478 				goto out;
479 			}
480 		} else
481 			*(caddr_t *)data = SCARG(uap, data);
482 	} else if ((com&IOC_OUT) && size)
483 		/*
484 		 * Zero the buffer so the user always
485 		 * gets back something deterministic.
486 		 */
487 		memset(data, 0, size);
488 	else if (com&IOC_VOID)
489 		*(caddr_t *)data = SCARG(uap, data);
490 
491 	switch (com) {
492 
493 	case FIONBIO:
494 		if ((tmp = *(int *)data) != 0)
495 			atomic_setbits_int(&fp->f_flag, FNONBLOCK);
496 		else
497 			atomic_clearbits_int(&fp->f_flag, FNONBLOCK);
498 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
499 		break;
500 
501 	case FIOASYNC:
502 		if ((tmp = *(int *)data) != 0)
503 			atomic_setbits_int(&fp->f_flag, FASYNC);
504 		else
505 			atomic_clearbits_int(&fp->f_flag, FASYNC);
506 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
507 		break;
508 
509 	default:
510 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
511 		break;
512 	}
513 	/*
514 	 * Copy any data to user, size was
515 	 * already set and checked above.
516 	 */
517 	if (error == 0 && (com&IOC_OUT) && size)
518 		error = copyout(data, SCARG(uap, data), size);
519 out:
520 	FRELE(fp, p);
521 	free(memp, M_IOCTLOPS, size);
522 	return (error);
523 }
524 
525 int	selwait, nselcoll;
526 
527 /*
528  * Select system call.
529  */
530 int
531 sys_select(struct proc *p, void *v, register_t *retval)
532 {
533 	struct sys_select_args /* {
534 		syscallarg(int) nd;
535 		syscallarg(fd_set *) in;
536 		syscallarg(fd_set *) ou;
537 		syscallarg(fd_set *) ex;
538 		syscallarg(struct timeval *) tv;
539 	} */ *uap = v;
540 
541 	struct timespec ts, *tsp = NULL;
542 	int error;
543 
544 	if (SCARG(uap, tv) != NULL) {
545 		struct timeval tv;
546 		if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0)
547 			return (error);
548 #ifdef KTRACE
549 		if (KTRPOINT(p, KTR_STRUCT))
550 			ktrreltimeval(p, &tv);
551 #endif
552 		if (tv.tv_sec < 0 || !timerisvalid(&tv))
553 			return (EINVAL);
554 		TIMEVAL_TO_TIMESPEC(&tv, &ts);
555 		tsp = &ts;
556 	}
557 
558 	return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou),
559 	    SCARG(uap, ex), tsp, NULL, retval));
560 }
561 
562 int
563 sys_pselect(struct proc *p, void *v, register_t *retval)
564 {
565 	struct sys_pselect_args /* {
566 		syscallarg(int) nd;
567 		syscallarg(fd_set *) in;
568 		syscallarg(fd_set *) ou;
569 		syscallarg(fd_set *) ex;
570 		syscallarg(const struct timespec *) ts;
571 		syscallarg(const sigset_t *) mask;
572 	} */ *uap = v;
573 
574 	struct timespec ts, *tsp = NULL;
575 	sigset_t ss, *ssp = NULL;
576 	int error;
577 
578 	if (SCARG(uap, ts) != NULL) {
579 		if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0)
580 			return (error);
581 #ifdef KTRACE
582 		if (KTRPOINT(p, KTR_STRUCT))
583 			ktrreltimespec(p, &ts);
584 #endif
585 		if (ts.tv_sec < 0 || !timespecisvalid(&ts))
586 			return (EINVAL);
587 		tsp = &ts;
588 	}
589 	if (SCARG(uap, mask) != NULL) {
590 		if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0)
591 			return (error);
592 		ssp = &ss;
593 	}
594 
595 	return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou),
596 	    SCARG(uap, ex), tsp, ssp, retval));
597 }
598 
599 int
600 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex,
601     struct timespec *timeout, const sigset_t *sigmask, register_t *retval)
602 {
603 	struct kqueue_scan_state scan;
604 	struct timespec zerots = {};
605 	fd_mask bits[6];
606 	fd_set *pibits[3], *pobits[3];
607 	int error, ncollected = 0, nevents = 0;
608 	u_int ni;
609 
610 	if (nd < 0)
611 		return (EINVAL);
612 	if (nd > p->p_fd->fd_nfiles) {
613 		/* forgiving; slightly wrong */
614 		nd = p->p_fd->fd_nfiles;
615 	}
616 	ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
617 	if (ni > sizeof(bits[0])) {
618 		caddr_t mbits;
619 
620 		mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO);
621 		pibits[0] = (fd_set *)&mbits[ni * 0];
622 		pibits[1] = (fd_set *)&mbits[ni * 1];
623 		pibits[2] = (fd_set *)&mbits[ni * 2];
624 		pobits[0] = (fd_set *)&mbits[ni * 3];
625 		pobits[1] = (fd_set *)&mbits[ni * 4];
626 		pobits[2] = (fd_set *)&mbits[ni * 5];
627 	} else {
628 		memset(bits, 0, sizeof(bits));
629 		pibits[0] = (fd_set *)&bits[0];
630 		pibits[1] = (fd_set *)&bits[1];
631 		pibits[2] = (fd_set *)&bits[2];
632 		pobits[0] = (fd_set *)&bits[3];
633 		pobits[1] = (fd_set *)&bits[4];
634 		pobits[2] = (fd_set *)&bits[5];
635 	}
636 
637 	kqpoll_init(nd);
638 
639 #define	getbits(name, x) \
640 	if (name && (error = copyin(name, pibits[x], ni))) \
641 		goto done;
642 	getbits(in, 0);
643 	getbits(ou, 1);
644 	getbits(ex, 2);
645 #undef	getbits
646 #ifdef KTRACE
647 	if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) {
648 		if (in) ktrfdset(p, pibits[0], ni);
649 		if (ou) ktrfdset(p, pibits[1], ni);
650 		if (ex) ktrfdset(p, pibits[2], ni);
651 	}
652 #endif
653 
654 	if (sigmask)
655 		dosigsuspend(p, *sigmask &~ sigcantmask);
656 
657 	/* Register kqueue events */
658 	error = pselregister(p, pibits, pobits, nd, &nevents, &ncollected);
659 	if (error != 0)
660 		goto done;
661 
662 	/*
663 	 * The poll/select family of syscalls has been designed to
664 	 * block when file descriptors are not available, even if
665 	 * there's nothing to wait for.
666 	 */
667 	if (nevents == 0 && ncollected == 0) {
668 		uint64_t nsecs = INFSLP;
669 
670 		if (timeout != NULL) {
671 			if (!timespecisset(timeout))
672 				goto done;
673 			nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP));
674 		}
675 		error = tsleep_nsec(&nowake, PSOCK | PCATCH, "kqsel", nsecs);
676 		/* select is not restarted after signals... */
677 		if (error == ERESTART)
678 			error = EINTR;
679 		if (error == EWOULDBLOCK)
680 			error = 0;
681 		goto done;
682 	}
683 
684 	/* Do not block if registering found pending events. */
685 	if (ncollected > 0)
686 		timeout = &zerots;
687 
688 	/* Collect at most `nevents' possibly waiting in kqueue_scan() */
689 	kqueue_scan_setup(&scan, p->p_kq);
690 	while (nevents > 0) {
691 		struct kevent kev[KQ_NEVENTS];
692 		int i, ready, count;
693 
694 		/* Maximum number of events per iteration */
695 		count = MIN(nitems(kev), nevents);
696 		ready = kqueue_scan(&scan, count, kev, timeout, p, &error);
697 #ifdef KTRACE
698 		if (KTRPOINT(p, KTR_STRUCT))
699 			ktrevent(p, kev, ready);
700 #endif
701 		/* Convert back events that are ready. */
702 		for (i = 0; i < ready && error == 0; i++)
703 			error = pselcollect(p, &kev[i], pobits, &ncollected);
704 		/*
705 		 * Stop if there was an error or if we had enough
706 		 * space to collect all events that were ready.
707 		 */
708 		if (error || ready < count)
709 			break;
710 
711 		nevents -= ready;
712 	}
713 	kqueue_scan_finish(&scan);
714 	*retval = ncollected;
715 done:
716 #define	putbits(name, x) \
717 	if (name && (error2 = copyout(pobits[x], name, ni))) \
718 		error = error2;
719 	if (error == 0) {
720 		int error2;
721 
722 		putbits(in, 0);
723 		putbits(ou, 1);
724 		putbits(ex, 2);
725 #undef putbits
726 #ifdef KTRACE
727 		if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) {
728 			if (in) ktrfdset(p, pobits[0], ni);
729 			if (ou) ktrfdset(p, pobits[1], ni);
730 			if (ex) ktrfdset(p, pobits[2], ni);
731 		}
732 #endif
733 	}
734 
735 	if (pibits[0] != (fd_set *)&bits[0])
736 		free(pibits[0], M_TEMP, 6 * ni);
737 
738 	kqpoll_done(nd);
739 
740 	return (error);
741 }
742 
743 /*
744  * Convert fd_set into kqueue events and register them on the
745  * per-thread queue.
746  */
747 int
748 pselregister(struct proc *p, fd_set *pibits[3], fd_set *pobits[3], int nfd,
749     int *nregistered, int *ncollected)
750 {
751 	static const int evf[] = { EVFILT_READ, EVFILT_WRITE, EVFILT_EXCEPT };
752 	static const int evff[] = { 0, 0, NOTE_OOB };
753 	int msk, i, j, fd, nevents = 0, error = 0;
754 	struct kevent kev;
755 	fd_mask bits;
756 
757 	for (msk = 0; msk < 3; msk++) {
758 		for (i = 0; i < nfd; i += NFDBITS) {
759 			bits = pibits[msk]->fds_bits[i / NFDBITS];
760 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
761 				bits &= ~(1 << j);
762 
763 				DPRINTFN(2, "select fd %d mask %d serial %lu\n",
764 				    fd, msk, p->p_kq_serial);
765 				EV_SET(&kev, fd, evf[msk],
766 				    EV_ADD|EV_ENABLE|__EV_SELECT,
767 				    evff[msk], 0, (void *)(p->p_kq_serial));
768 #ifdef KTRACE
769 				if (KTRPOINT(p, KTR_STRUCT))
770 					ktrevent(p, &kev, 1);
771 #endif
772 				error = kqueue_register(p->p_kq, &kev, 0, p);
773 				switch (error) {
774 				case 0:
775 					nevents++;
776 				/* FALLTHROUGH */
777 				case EOPNOTSUPP:/* No underlying kqfilter */
778 				case EINVAL:	/* Unimplemented filter */
779 				case EPERM:	/* Specific to FIFO and
780 						 * __EV_SELECT */
781 					error = 0;
782 					break;
783 				case EPIPE:	/* Specific to pipes */
784 					KASSERT(kev.filter == EVFILT_WRITE);
785 					FD_SET(kev.ident, pobits[1]);
786 					(*ncollected)++;
787 					error = 0;
788 					break;
789 				case ENXIO:	/* Device has been detached */
790 				default:
791 					goto bad;
792 				}
793 			}
794 		}
795 	}
796 
797 	*nregistered = nevents;
798 	return (0);
799 bad:
800 	DPRINTFN(0, "select fd %u filt %d error %d\n", (int)kev.ident,
801 	    kev.filter, error);
802 	return (error);
803 }
804 
805 /*
806  * Convert given kqueue event into corresponding select(2) bit.
807  */
808 int
809 pselcollect(struct proc *p, struct kevent *kevp, fd_set *pobits[3],
810     int *ncollected)
811 {
812 	if ((unsigned long)kevp->udata != p->p_kq_serial) {
813 		panic("%s: spurious kevp %p fd %d udata 0x%lx serial 0x%lx",
814 		    __func__, kevp, (int)kevp->ident,
815 		    (unsigned long)kevp->udata, p->p_kq_serial);
816 	}
817 
818 	if (kevp->flags & EV_ERROR) {
819 		DPRINTFN(2, "select fd %d filt %d error %d\n",
820 		    (int)kevp->ident, kevp->filter, (int)kevp->data);
821 		return (kevp->data);
822 	}
823 
824 	switch (kevp->filter) {
825 	case EVFILT_READ:
826 		FD_SET(kevp->ident, pobits[0]);
827 		break;
828 	case EVFILT_WRITE:
829 		FD_SET(kevp->ident, pobits[1]);
830 		break;
831 	case EVFILT_EXCEPT:
832 		FD_SET(kevp->ident, pobits[2]);
833 		break;
834 	default:
835 		KASSERT(0);
836 	}
837 	(*ncollected)++;
838 
839 	DPRINTFN(2, "select fd %d filt %d\n", (int)kevp->ident, kevp->filter);
840 	return (0);
841 }
842 
843 int
844 seltrue(dev_t dev, int events, struct proc *p)
845 {
846 
847 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
848 }
849 
850 int
851 selfalse(dev_t dev, int events, struct proc *p)
852 {
853 
854 	return (0);
855 }
856 
857 /*
858  * Record a select request.
859  */
860 void
861 selrecord(struct proc *selector, struct selinfo *sip)
862 {
863 	struct proc *p;
864 	pid_t mytid;
865 
866 	KERNEL_ASSERT_LOCKED();
867 
868 	mytid = selector->p_tid;
869 	if (sip->si_seltid == mytid)
870 		return;
871 	if (sip->si_seltid && (p = tfind(sip->si_seltid)) &&
872 	    p->p_wchan == (caddr_t)&selwait)
873 		sip->si_flags |= SI_COLL;
874 	else
875 		sip->si_seltid = mytid;
876 }
877 
878 /*
879  * Do a wakeup when a selectable event occurs.
880  */
881 void
882 selwakeup(struct selinfo *sip)
883 {
884 	KERNEL_LOCK();
885 	KNOTE(&sip->si_note, NOTE_SUBMIT);
886 	doselwakeup(sip);
887 	KERNEL_UNLOCK();
888 }
889 
890 void
891 doselwakeup(struct selinfo *sip)
892 {
893 	struct proc *p;
894 
895 	KERNEL_ASSERT_LOCKED();
896 
897 	if (sip->si_seltid == 0)
898 		return;
899 	if (sip->si_flags & SI_COLL) {
900 		nselcoll++;
901 		sip->si_flags &= ~SI_COLL;
902 		wakeup(&selwait);
903 	}
904 	p = tfind(sip->si_seltid);
905 	sip->si_seltid = 0;
906 	if (p != NULL) {
907 		if (wakeup_proc(p, &selwait)) {
908 			/* nothing else to do */
909 		} else if (p->p_flag & P_SELECT)
910 			atomic_clearbits_int(&p->p_flag, P_SELECT);
911 	}
912 }
913 
914 /*
915  * Only copyout the revents field.
916  */
917 int
918 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds)
919 {
920 	int error = 0;
921 	u_int i = 0;
922 
923 	while (!error && i++ < nfds) {
924 		error = copyout(&pl->revents, &upl->revents,
925 		    sizeof(upl->revents));
926 		pl++;
927 		upl++;
928 	}
929 
930 	return (error);
931 }
932 
933 /*
934  * We are using the same mechanism as select only we encode/decode args
935  * differently.
936  */
937 int
938 sys_poll(struct proc *p, void *v, register_t *retval)
939 {
940 	struct sys_poll_args /* {
941 		syscallarg(struct pollfd *) fds;
942 		syscallarg(u_int) nfds;
943 		syscallarg(int) timeout;
944 	} */ *uap = v;
945 
946 	struct timespec ts, *tsp = NULL;
947 	int msec = SCARG(uap, timeout);
948 
949 	if (msec != INFTIM) {
950 		if (msec < 0)
951 			return (EINVAL);
952 		ts.tv_sec = msec / 1000;
953 		ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000;
954 		tsp = &ts;
955 	}
956 
957 	return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL,
958 	    retval));
959 }
960 
961 int
962 sys_ppoll(struct proc *p, void *v, register_t *retval)
963 {
964 	struct sys_ppoll_args /* {
965 		syscallarg(struct pollfd *) fds;
966 		syscallarg(u_int) nfds;
967 		syscallarg(const struct timespec *) ts;
968 		syscallarg(const sigset_t *) mask;
969 	} */ *uap = v;
970 
971 	int error;
972 	struct timespec ts, *tsp = NULL;
973 	sigset_t ss, *ssp = NULL;
974 
975 	if (SCARG(uap, ts) != NULL) {
976 		if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0)
977 			return (error);
978 #ifdef KTRACE
979 		if (KTRPOINT(p, KTR_STRUCT))
980 			ktrreltimespec(p, &ts);
981 #endif
982 		if (ts.tv_sec < 0 || !timespecisvalid(&ts))
983 			return (EINVAL);
984 		tsp = &ts;
985 	}
986 
987 	if (SCARG(uap, mask) != NULL) {
988 		if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0)
989 			return (error);
990 		ssp = &ss;
991 	}
992 
993 	return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp,
994 	    retval));
995 }
996 
997 int
998 doppoll(struct proc *p, struct pollfd *fds, u_int nfds,
999     struct timespec *timeout, const sigset_t *sigmask, register_t *retval)
1000 {
1001 	struct kqueue_scan_state scan;
1002 	struct timespec zerots = {};
1003 	struct pollfd pfds[4], *pl = pfds;
1004 	int error, ncollected = 0, nevents = 0;
1005 	size_t sz;
1006 
1007 	/* Standards say no more than MAX_OPEN; this is possibly better. */
1008 	if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles))
1009 		return (EINVAL);
1010 
1011 	/* optimize for the default case, of a small nfds value */
1012 	if (nfds > nitems(pfds)) {
1013 		pl = mallocarray(nfds, sizeof(*pl), M_TEMP,
1014 		    M_WAITOK | M_CANFAIL);
1015 		if (pl == NULL)
1016 			return (EINVAL);
1017 	}
1018 
1019 	kqpoll_init(nfds);
1020 
1021 	sz = nfds * sizeof(*pl);
1022 
1023 	if ((error = copyin(fds, pl, sz)) != 0)
1024 		goto bad;
1025 
1026 	if (sigmask)
1027 		dosigsuspend(p, *sigmask &~ sigcantmask);
1028 
1029 	/* Register kqueue events */
1030 	ppollregister(p, pl, nfds, &nevents, &ncollected);
1031 
1032 	/*
1033 	 * The poll/select family of syscalls has been designed to
1034 	 * block when file descriptors are not available, even if
1035 	 * there's nothing to wait for.
1036 	 */
1037 	if (nevents == 0 && ncollected == 0) {
1038 		uint64_t nsecs = INFSLP;
1039 
1040 		if (timeout != NULL) {
1041 			if (!timespecisset(timeout))
1042 				goto done;
1043 			nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP));
1044 		}
1045 
1046 		error = tsleep_nsec(&nowake, PSOCK | PCATCH, "kqpoll", nsecs);
1047 		if (error == ERESTART)
1048 			error = EINTR;
1049 		if (error == EWOULDBLOCK)
1050 			error = 0;
1051 		goto done;
1052 	}
1053 
1054 	/* Do not block if registering found pending events. */
1055 	if (ncollected > 0)
1056 		timeout = &zerots;
1057 
1058 	/* Collect at most `nevents' possibly waiting in kqueue_scan() */
1059 	kqueue_scan_setup(&scan, p->p_kq);
1060 	while (nevents > 0) {
1061 		struct kevent kev[KQ_NEVENTS];
1062 		int i, ready, count;
1063 
1064 		/* Maximum number of events per iteration */
1065 		count = MIN(nitems(kev), nevents);
1066 		ready = kqueue_scan(&scan, count, kev, timeout, p, &error);
1067 #ifdef KTRACE
1068 		if (KTRPOINT(p, KTR_STRUCT))
1069 			ktrevent(p, kev, ready);
1070 #endif
1071 		/* Convert back events that are ready. */
1072 		for (i = 0; i < ready; i++)
1073 			ncollected += ppollcollect(p, &kev[i], pl, nfds);
1074 
1075 		/*
1076 		 * Stop if there was an error or if we had enough
1077 		 * place to collect all events that were ready.
1078 		 */
1079 		if (error || ready < count)
1080 			break;
1081 
1082 		nevents -= ready;
1083 	}
1084 	kqueue_scan_finish(&scan);
1085 	*retval = ncollected;
1086 done:
1087 	/*
1088 	 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is
1089 	 *       ignored (since the whole point is to see what would block).
1090 	 */
1091 	switch (error) {
1092 	case EINTR:
1093 		error = pollout(pl, fds, nfds);
1094 		if (error == 0)
1095 			error = EINTR;
1096 		break;
1097 	case EWOULDBLOCK:
1098 	case 0:
1099 		error = pollout(pl, fds, nfds);
1100 		break;
1101 	}
1102 #ifdef KTRACE
1103 	if (KTRPOINT(p, KTR_STRUCT))
1104 		ktrpollfd(p, pl, nfds);
1105 #endif /* KTRACE */
1106 bad:
1107 	if (pl != pfds)
1108 		free(pl, M_TEMP, sz);
1109 
1110 	kqpoll_done(nfds);
1111 
1112 	return (error);
1113 }
1114 
1115 int
1116 ppollregister_evts(struct proc *p, struct kevent *kevp, int nkev,
1117     struct pollfd *pl, unsigned int pollid)
1118 {
1119 	int i, error, nevents = 0;
1120 
1121 	KASSERT(pl->revents == 0);
1122 
1123 #ifdef KTRACE
1124 	if (KTRPOINT(p, KTR_STRUCT))
1125 		ktrevent(p, kevp, nkev);
1126 #endif
1127 	for (i = 0; i < nkev; i++, kevp++) {
1128 again:
1129 		error = kqueue_register(p->p_kq, kevp, pollid, p);
1130 		switch (error) {
1131 		case 0:
1132 			nevents++;
1133 			break;
1134 		case EOPNOTSUPP:/* No underlying kqfilter */
1135 		case EINVAL:	/* Unimplemented filter */
1136 			break;
1137 		case EBADF:	/* Bad file descriptor */
1138 			pl->revents |= POLLNVAL;
1139 			break;
1140 		case EPERM:	/* Specific to FIFO */
1141 			KASSERT(kevp->filter == EVFILT_WRITE);
1142 			if (nkev == 1) {
1143 				/*
1144 				 * If this is the only filter make sure
1145 				 * POLLHUP is passed to userland.
1146 				 */
1147 				kevp->filter = EVFILT_EXCEPT;
1148 				goto again;
1149 			}
1150 			break;
1151 		case EPIPE:	/* Specific to pipes */
1152 			KASSERT(kevp->filter == EVFILT_WRITE);
1153 			pl->revents |= POLLHUP;
1154 			break;
1155 		default:
1156 			DPRINTFN(0, "poll err %lu fd %d revents %02x serial"
1157 			    " %lu filt %d ERROR=%d\n",
1158 			    ((unsigned long)kevp->udata - p->p_kq_serial),
1159 			    pl->fd, pl->revents, p->p_kq_serial, kevp->filter,
1160 			    error);
1161 			/* FALLTHROUGH */
1162 		case ENXIO:	/* Device has been detached */
1163 			pl->revents |= POLLERR;
1164 			break;
1165 		}
1166 	}
1167 
1168 	return (nevents);
1169 }
1170 
1171 /*
1172  * Convert pollfd into kqueue events and register them on the
1173  * per-thread queue.
1174  *
1175  * At most 3 events can correspond to a single pollfd.
1176  */
1177 void
1178 ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered,
1179     int *ncollected)
1180 {
1181 	int i, nkev, nevt, forcehup;
1182 	struct kevent kev[3], *kevp;
1183 
1184 	for (i = 0; i < nfds; i++) {
1185 		pl[i].events &= ~POLL_NOHUP;
1186 		pl[i].revents = 0;
1187 
1188 		if (pl[i].fd < 0)
1189 			continue;
1190 
1191 		/*
1192 		 * POLLHUP checking is implicit in the event filters.
1193 		 * However, the checking must be even if no events are
1194 		 * requested.
1195 		 */
1196 		forcehup = ((pl[i].events & ~POLLHUP) == 0);
1197 
1198 		DPRINTFN(1, "poll set %d/%d fd %d events %02x serial %lu\n",
1199 		    i+1, nfds, pl[i].fd, pl[i].events, p->p_kq_serial);
1200 
1201 		nevt = 0;
1202 		nkev = 0;
1203 		kevp = kev;
1204 		if (pl[i].events & (POLLIN | POLLRDNORM)) {
1205 			EV_SET(kevp, pl[i].fd, EVFILT_READ,
1206 			    EV_ADD|EV_ENABLE|__EV_POLL, 0, 0,
1207 			    (void *)(p->p_kq_serial + i));
1208 			nkev++;
1209 			kevp++;
1210 		}
1211 		if (pl[i].events & (POLLOUT | POLLWRNORM)) {
1212 			EV_SET(kevp, pl[i].fd, EVFILT_WRITE,
1213 			    EV_ADD|EV_ENABLE|__EV_POLL, 0, 0,
1214 			    (void *)(p->p_kq_serial + i));
1215 			nkev++;
1216 			kevp++;
1217 		}
1218 		if ((pl[i].events & (POLLPRI | POLLRDBAND)) || forcehup) {
1219 			int evff = forcehup ? 0 : NOTE_OOB;
1220 
1221 			EV_SET(kevp, pl[i].fd, EVFILT_EXCEPT,
1222 			    EV_ADD|EV_ENABLE|__EV_POLL, evff, 0,
1223 			    (void *)(p->p_kq_serial + i));
1224 			nkev++;
1225 			kevp++;
1226 		}
1227 
1228 		if (nkev == 0)
1229 			continue;
1230 
1231 		*nregistered += ppollregister_evts(p, kev, nkev, &pl[i], i);
1232 
1233 		if (pl[i].revents != 0)
1234 			(*ncollected)++;
1235 	}
1236 
1237 	DPRINTFN(1, "poll registered = %d, collected = %d\n", *nregistered,
1238 	    *ncollected);
1239 }
1240 
1241 /*
1242  * Convert given kqueue event into corresponding poll(2) revents bit.
1243  */
1244 int
1245 ppollcollect(struct proc *p, struct kevent *kevp, struct pollfd *pl, u_int nfds)
1246 {
1247 	static struct timeval poll_errintvl = { 5, 0 };
1248 	static struct timeval poll_lasterr;
1249 	int already_seen;
1250 	unsigned long i;
1251 
1252 	/*  Extract poll array index */
1253 	i = (unsigned long)kevp->udata - p->p_kq_serial;
1254 
1255 	if (i >= nfds) {
1256 		panic("%s: spurious kevp %p nfds %u udata 0x%lx serial 0x%lx",
1257 		    __func__, kevp, nfds,
1258 		    (unsigned long)kevp->udata, p->p_kq_serial);
1259 	}
1260 	if ((int)kevp->ident != pl[i].fd) {
1261 		panic("%s: kevp %p %lu/%d mismatch fd %d!=%d serial 0x%lx",
1262 		    __func__, kevp, i + 1, nfds, (int)kevp->ident, pl[i].fd,
1263 		    p->p_kq_serial);
1264 	}
1265 
1266 	/*
1267 	 * A given descriptor may already have generated an error
1268 	 * against another filter during kqueue_register().
1269 	 *
1270 	 * Make sure to set the appropriate flags but do not
1271 	 * increment `*retval' more than once.
1272 	 */
1273 	already_seen = (pl[i].revents != 0);
1274 
1275 	/* POLLNVAL preempts other events. */
1276 	if ((kevp->flags & EV_ERROR) && kevp->data == EBADF) {
1277 		pl[i].revents = POLLNVAL;
1278 		goto done;
1279 	} else if (pl[i].revents & POLLNVAL) {
1280 		goto done;
1281 	}
1282 
1283 	switch (kevp->filter) {
1284 	case EVFILT_READ:
1285 		if (kevp->flags & __EV_HUP)
1286 			pl[i].revents |= POLLHUP;
1287 		if (pl[i].events & (POLLIN | POLLRDNORM))
1288 			pl[i].revents |= pl[i].events & (POLLIN | POLLRDNORM);
1289 		break;
1290 	case EVFILT_WRITE:
1291 		/* POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */
1292 		if (kevp->flags & __EV_HUP) {
1293 			pl[i].revents |= POLLHUP;
1294 		} else if (pl[i].events & (POLLOUT | POLLWRNORM)) {
1295 			pl[i].revents |= pl[i].events & (POLLOUT | POLLWRNORM);
1296 		}
1297 		break;
1298 	case EVFILT_EXCEPT:
1299 		if (kevp->flags & __EV_HUP) {
1300 			if (pl[i].events != 0 && pl[i].events != POLLOUT)
1301 				DPRINTFN(0, "weird events %x\n", pl[i].events);
1302 			pl[i].revents |= POLLHUP;
1303 			break;
1304 		}
1305 		if (pl[i].events & (POLLPRI | POLLRDBAND))
1306 			pl[i].revents |= pl[i].events & (POLLPRI | POLLRDBAND);
1307 		break;
1308 	default:
1309 		KASSERT(0);
1310 	}
1311 
1312 done:
1313 	DPRINTFN(1, "poll get %lu/%d fd %d revents %02x serial %lu filt %d\n",
1314 	    i+1, nfds, pl[i].fd, pl[i].revents, (unsigned long)kevp->udata,
1315 	    kevp->filter);
1316 
1317 	/*
1318 	 * Make noise about unclaimed events as they might indicate a bug
1319 	 * and can result in spurious-looking wakeups of poll(2).
1320 	 *
1321 	 * Live-locking within the system call should not happen because
1322 	 * the scan loop in doppoll() has an upper limit for the number
1323 	 * of events to process.
1324 	 */
1325 	if (pl[i].revents == 0 && ratecheck(&poll_lasterr, &poll_errintvl)) {
1326 		printf("%s[%d]: poll index %lu fd %d events 0x%x "
1327 		    "filter %d/0x%x unclaimed\n",
1328 		    p->p_p->ps_comm, p->p_tid, i, pl[i].fd,
1329 		    pl[i].events, kevp->filter, kevp->flags);
1330 	}
1331 
1332 	if (!already_seen && (pl[i].revents != 0))
1333 		return (1);
1334 
1335 	return (0);
1336 }
1337 
1338 /*
1339  * utrace system call
1340  */
1341 int
1342 sys_utrace(struct proc *curp, void *v, register_t *retval)
1343 {
1344 #ifdef KTRACE
1345 	struct sys_utrace_args /* {
1346 		syscallarg(const char *) label;
1347 		syscallarg(const void *) addr;
1348 		syscallarg(size_t) len;
1349 	} */ *uap = v;
1350 
1351 	return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr),
1352 	    SCARG(uap, len)));
1353 #else
1354 	return (0);
1355 #endif
1356 }
1357