xref: /openbsd-src/sys/kern/kern_event.c (revision fb8aa7497fded39583f40e800732f9c046411717)
1 /*	$OpenBSD: kern_event.c,v 1.72 2016/05/13 19:05:07 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/proc.h>
35 #include <sys/pledge.h>
36 #include <sys/malloc.h>
37 #include <sys/unistd.h>
38 #include <sys/file.h>
39 #include <sys/filedesc.h>
40 #include <sys/fcntl.h>
41 #include <sys/selinfo.h>
42 #include <sys/queue.h>
43 #include <sys/event.h>
44 #include <sys/eventvar.h>
45 #include <sys/ktrace.h>
46 #include <sys/pool.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/stat.h>
51 #include <sys/uio.h>
52 #include <sys/mount.h>
53 #include <sys/poll.h>
54 #include <sys/syscallargs.h>
55 #include <sys/timeout.h>
56 
57 int	kqueue_scan(struct kqueue *kq, int maxevents,
58 		    struct kevent *ulistp, const struct timespec *timeout,
59 		    struct proc *p, int *retval);
60 
61 int	kqueue_read(struct file *fp, off_t *poff, struct uio *uio,
62 		    struct ucred *cred);
63 int	kqueue_write(struct file *fp, off_t *poff, struct uio *uio,
64 		    struct ucred *cred);
65 int	kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
66 		    struct proc *p);
67 int	kqueue_poll(struct file *fp, int events, struct proc *p);
68 int	kqueue_kqfilter(struct file *fp, struct knote *kn);
69 int	kqueue_stat(struct file *fp, struct stat *st, struct proc *p);
70 int	kqueue_close(struct file *fp, struct proc *p);
71 void	kqueue_wakeup(struct kqueue *kq);
72 
73 struct fileops kqueueops = {
74 	kqueue_read,
75 	kqueue_write,
76 	kqueue_ioctl,
77 	kqueue_poll,
78 	kqueue_kqfilter,
79 	kqueue_stat,
80 	kqueue_close
81 };
82 
83 void	knote_attach(struct knote *kn, struct filedesc *fdp);
84 void	knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp);
85 void	knote_enqueue(struct knote *kn);
86 void	knote_dequeue(struct knote *kn);
87 #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK))
88 #define knote_free(kn) pool_put(&knote_pool, (kn))
89 
90 void	filt_kqdetach(struct knote *kn);
91 int	filt_kqueue(struct knote *kn, long hint);
92 int	filt_procattach(struct knote *kn);
93 void	filt_procdetach(struct knote *kn);
94 int	filt_proc(struct knote *kn, long hint);
95 int	filt_fileattach(struct knote *kn);
96 void	filt_timerexpire(void *knx);
97 int	filt_timerattach(struct knote *kn);
98 void	filt_timerdetach(struct knote *kn);
99 int	filt_timer(struct knote *kn, long hint);
100 void	filt_seltruedetach(struct knote *kn);
101 
102 struct filterops kqread_filtops =
103 	{ 1, NULL, filt_kqdetach, filt_kqueue };
104 struct filterops proc_filtops =
105 	{ 0, filt_procattach, filt_procdetach, filt_proc };
106 struct filterops file_filtops =
107 	{ 1, filt_fileattach, NULL, NULL };
108 struct filterops timer_filtops =
109         { 0, filt_timerattach, filt_timerdetach, filt_timer };
110 
111 struct	pool knote_pool;
112 struct	pool kqueue_pool;
113 int kq_ntimeouts = 0;
114 int kq_timeoutmax = (4 * 1024);
115 
116 #define KNOTE_ACTIVATE(kn) do {						\
117 	kn->kn_status |= KN_ACTIVE;					\
118 	if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)		\
119 		knote_enqueue(kn);					\
120 } while(0)
121 
122 #define	KN_HASHSIZE		64		/* XXX should be tunable */
123 #define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
124 
125 extern struct filterops sig_filtops;
126 #ifdef notyet
127 extern struct filterops aio_filtops;
128 #endif
129 
130 /*
131  * Table for for all system-defined filters.
132  */
133 struct filterops *sysfilt_ops[] = {
134 	&file_filtops,			/* EVFILT_READ */
135 	&file_filtops,			/* EVFILT_WRITE */
136 	NULL, /*&aio_filtops,*/		/* EVFILT_AIO */
137 	&file_filtops,			/* EVFILT_VNODE */
138 	&proc_filtops,			/* EVFILT_PROC */
139 	&sig_filtops,			/* EVFILT_SIGNAL */
140 	&timer_filtops,			/* EVFILT_TIMER */
141 };
142 
143 void KQREF(struct kqueue *);
144 void KQRELE(struct kqueue *);
145 
146 void
147 KQREF(struct kqueue *kq)
148 {
149 	++kq->kq_refs;
150 }
151 
152 void
153 KQRELE(struct kqueue *kq)
154 {
155 	if (--kq->kq_refs == 0) {
156 		pool_put(&kqueue_pool, kq);
157 	}
158 }
159 
160 void kqueue_init(void);
161 
162 void
163 kqueue_init(void)
164 {
165 
166 	pool_init(&kqueue_pool, sizeof(struct kqueue), 0, 0, PR_WAITOK,
167 	    "kqueuepl", NULL);
168 	pool_init(&knote_pool, sizeof(struct knote), 0, 0, PR_WAITOK,
169 	    "knotepl", NULL);
170 }
171 
172 int
173 filt_fileattach(struct knote *kn)
174 {
175 	struct file *fp = kn->kn_fp;
176 
177 	return fp->f_ops->fo_kqfilter(fp, kn);
178 }
179 
180 int
181 kqueue_kqfilter(struct file *fp, struct knote *kn)
182 {
183 	struct kqueue *kq = kn->kn_fp->f_data;
184 
185 	if (kn->kn_filter != EVFILT_READ)
186 		return (EINVAL);
187 
188 	kn->kn_fop = &kqread_filtops;
189 	SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext);
190 	return (0);
191 }
192 
193 void
194 filt_kqdetach(struct knote *kn)
195 {
196 	struct kqueue *kq = kn->kn_fp->f_data;
197 
198 	SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext);
199 }
200 
201 int
202 filt_kqueue(struct knote *kn, long hint)
203 {
204 	struct kqueue *kq = kn->kn_fp->f_data;
205 
206 	kn->kn_data = kq->kq_count;
207 	return (kn->kn_data > 0);
208 }
209 
210 int
211 filt_procattach(struct knote *kn)
212 {
213 	struct process *pr;
214 
215 	if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
216 	    (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
217 		return pledge_fail(curproc, EPERM, PLEDGE_PROC);
218 
219 	pr = prfind(kn->kn_id);
220 	if (pr == NULL)
221 		return (ESRCH);
222 
223 	/* exiting processes can't be specified */
224 	if (pr->ps_flags & PS_EXITING)
225 		return (ESRCH);
226 
227 	kn->kn_ptr.p_process = pr;
228 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
229 
230 	/*
231 	 * internal flag indicating registration done by kernel
232 	 */
233 	if (kn->kn_flags & EV_FLAG1) {
234 		kn->kn_data = kn->kn_sdata;		/* ppid */
235 		kn->kn_fflags = NOTE_CHILD;
236 		kn->kn_flags &= ~EV_FLAG1;
237 	}
238 
239 	/* XXX lock the proc here while adding to the list? */
240 	SLIST_INSERT_HEAD(&pr->ps_klist, kn, kn_selnext);
241 
242 	return (0);
243 }
244 
245 /*
246  * The knote may be attached to a different process, which may exit,
247  * leaving nothing for the knote to be attached to.  So when the process
248  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
249  * it will be deleted when read out.  However, as part of the knote deletion,
250  * this routine is called, so a check is needed to avoid actually performing
251  * a detach, because the original process does not exist any more.
252  */
253 void
254 filt_procdetach(struct knote *kn)
255 {
256 	struct process *pr = kn->kn_ptr.p_process;
257 
258 	if (kn->kn_status & KN_DETACHED)
259 		return;
260 
261 	/* XXX locking?  this might modify another process. */
262 	SLIST_REMOVE(&pr->ps_klist, kn, knote, kn_selnext);
263 }
264 
265 int
266 filt_proc(struct knote *kn, long hint)
267 {
268 	u_int event;
269 
270 	/*
271 	 * mask off extra data
272 	 */
273 	event = (u_int)hint & NOTE_PCTRLMASK;
274 
275 	/*
276 	 * if the user is interested in this event, record it.
277 	 */
278 	if (kn->kn_sfflags & event)
279 		kn->kn_fflags |= event;
280 
281 	/*
282 	 * process is gone, so flag the event as finished and remove it
283 	 * from the process's klist
284 	 */
285 	if (event == NOTE_EXIT) {
286 		struct process *pr = kn->kn_ptr.p_process;
287 
288 		kn->kn_status |= KN_DETACHED;
289 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
290 		kn->kn_data = pr->ps_mainproc->p_xstat;
291 		SLIST_REMOVE(&pr->ps_klist, kn, knote, kn_selnext);
292 		return (1);
293 	}
294 
295 	/*
296 	 * process forked, and user wants to track the new process,
297 	 * so attach a new knote to it, and immediately report an
298 	 * event with the parent's pid.
299 	 */
300 	if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
301 		struct kevent kev;
302 		int error;
303 
304 		/*
305 		 * register knote with new process.
306 		 */
307 		kev.ident = hint & NOTE_PDATAMASK;	/* pid */
308 		kev.filter = kn->kn_filter;
309 		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
310 		kev.fflags = kn->kn_sfflags;
311 		kev.data = kn->kn_id;			/* parent */
312 		kev.udata = kn->kn_kevent.udata;	/* preserve udata */
313 		error = kqueue_register(kn->kn_kq, &kev, NULL);
314 		if (error)
315 			kn->kn_fflags |= NOTE_TRACKERR;
316 	}
317 
318 	return (kn->kn_fflags != 0);
319 }
320 
321 static void
322 filt_timer_timeout_add(struct knote *kn)
323 {
324 	struct timeval tv;
325 	int tticks;
326 
327 	tv.tv_sec = kn->kn_sdata / 1000;
328 	tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
329 	tticks = tvtohz(&tv);
330 	timeout_add(kn->kn_hook, tticks ? tticks : 1);
331 }
332 
333 void
334 filt_timerexpire(void *knx)
335 {
336 	struct knote *kn = knx;
337 
338 	kn->kn_data++;
339 	KNOTE_ACTIVATE(kn);
340 
341 	if ((kn->kn_flags & EV_ONESHOT) == 0)
342 		filt_timer_timeout_add(kn);
343 }
344 
345 
346 /*
347  * data contains amount of time to sleep, in milliseconds
348  */
349 int
350 filt_timerattach(struct knote *kn)
351 {
352 	struct timeout *to;
353 
354 	if (kq_ntimeouts > kq_timeoutmax)
355 		return (ENOMEM);
356 	kq_ntimeouts++;
357 
358 	kn->kn_flags |= EV_CLEAR;	/* automatically set */
359 	to = malloc(sizeof(*to), M_KEVENT, M_WAITOK);
360 	timeout_set(to, filt_timerexpire, kn);
361 	kn->kn_hook = to;
362 	filt_timer_timeout_add(kn);
363 
364 	return (0);
365 }
366 
367 void
368 filt_timerdetach(struct knote *kn)
369 {
370 	struct timeout *to;
371 
372 	to = (struct timeout *)kn->kn_hook;
373 	timeout_del(to);
374 	free(to, M_KEVENT, sizeof(*to));
375 	kq_ntimeouts--;
376 }
377 
378 int
379 filt_timer(struct knote *kn, long hint)
380 {
381 	return (kn->kn_data != 0);
382 }
383 
384 
385 /*
386  * filt_seltrue:
387  *
388  *	This filter "event" routine simulates seltrue().
389  */
390 int
391 filt_seltrue(struct knote *kn, long hint)
392 {
393 
394 	/*
395 	 * We don't know how much data can be read/written,
396 	 * but we know that it *can* be.  This is about as
397 	 * good as select/poll does as well.
398 	 */
399 	kn->kn_data = 0;
400 	return (1);
401 }
402 
403 /*
404  * This provides full kqfilter entry for device switch tables, which
405  * has same effect as filter using filt_seltrue() as filter method.
406  */
407 void
408 filt_seltruedetach(struct knote *kn)
409 {
410 	/* Nothing to do */
411 }
412 
413 const struct filterops seltrue_filtops =
414 	{ 1, NULL, filt_seltruedetach, filt_seltrue };
415 
416 int
417 seltrue_kqfilter(dev_t dev, struct knote *kn)
418 {
419 	switch (kn->kn_filter) {
420 	case EVFILT_READ:
421 	case EVFILT_WRITE:
422 		kn->kn_fop = &seltrue_filtops;
423 		break;
424 	default:
425 		return (EINVAL);
426 	}
427 
428 	/* Nothing more to do */
429 	return (0);
430 }
431 
432 int
433 sys_kqueue(struct proc *p, void *v, register_t *retval)
434 {
435 	struct filedesc *fdp = p->p_fd;
436 	struct kqueue *kq;
437 	struct file *fp;
438 	int fd, error;
439 
440 	fdplock(fdp);
441 	error = falloc(p, &fp, &fd);
442 	fdpunlock(fdp);
443 	if (error)
444 		return (error);
445 	fp->f_flag = FREAD | FWRITE;
446 	fp->f_type = DTYPE_KQUEUE;
447 	fp->f_ops = &kqueueops;
448 	kq = pool_get(&kqueue_pool, PR_WAITOK|PR_ZERO);
449 	TAILQ_INIT(&kq->kq_head);
450 	fp->f_data = kq;
451 	KQREF(kq);
452 	*retval = fd;
453 	if (fdp->fd_knlistsize < 0)
454 		fdp->fd_knlistsize = 0;		/* this process has a kq */
455 	kq->kq_fdp = fdp;
456 	FILE_SET_MATURE(fp, p);
457 	return (0);
458 }
459 
460 int
461 sys_kevent(struct proc *p, void *v, register_t *retval)
462 {
463 	struct filedesc* fdp = p->p_fd;
464 	struct sys_kevent_args /* {
465 		syscallarg(int)	fd;
466 		syscallarg(const struct kevent *) changelist;
467 		syscallarg(int)	nchanges;
468 		syscallarg(struct kevent *) eventlist;
469 		syscallarg(int)	nevents;
470 		syscallarg(const struct timespec *) timeout;
471 	} */ *uap = v;
472 	struct kevent *kevp;
473 	struct kqueue *kq;
474 	struct file *fp;
475 	struct timespec ts;
476 	int i, n, nerrors, error;
477 
478 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL ||
479 	    (fp->f_type != DTYPE_KQUEUE))
480 		return (EBADF);
481 
482 	FREF(fp);
483 
484 	if (SCARG(uap, timeout) != NULL) {
485 		error = copyin(SCARG(uap, timeout), &ts, sizeof(ts));
486 		if (error)
487 			goto done;
488 #ifdef KTRACE
489 		if (KTRPOINT(p, KTR_STRUCT))
490 			ktrreltimespec(p, &ts);
491 #endif
492 		SCARG(uap, timeout) = &ts;
493 	}
494 
495 	kq = fp->f_data;
496 	nerrors = 0;
497 
498 	while (SCARG(uap, nchanges) > 0) {
499 		n = SCARG(uap, nchanges) > KQ_NEVENTS ?
500 		    KQ_NEVENTS : SCARG(uap, nchanges);
501 		error = copyin(SCARG(uap, changelist), kq->kq_kev,
502 		    n * sizeof(struct kevent));
503 		if (error)
504 			goto done;
505 #ifdef KTRACE
506 		if (KTRPOINT(p, KTR_STRUCT))
507 			ktrevent(p, kq->kq_kev, n);
508 #endif
509 		for (i = 0; i < n; i++) {
510 			kevp = &kq->kq_kev[i];
511 			kevp->flags &= ~EV_SYSFLAGS;
512 			error = kqueue_register(kq, kevp, p);
513 			if (error) {
514 				if (SCARG(uap, nevents) != 0) {
515 					kevp->flags = EV_ERROR;
516 					kevp->data = error;
517 					copyout(kevp, SCARG(uap, eventlist),
518 					    sizeof(*kevp));
519 					SCARG(uap, eventlist)++;
520 					SCARG(uap, nevents)--;
521 					nerrors++;
522 				} else {
523 					goto done;
524 				}
525 			}
526 		}
527 		SCARG(uap, nchanges) -= n;
528 		SCARG(uap, changelist) += n;
529 	}
530 	if (nerrors) {
531 		*retval = nerrors;
532 		error = 0;
533 		goto done;
534 	}
535 
536 	KQREF(kq);
537 	FRELE(fp, p);
538 	error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist),
539 	    SCARG(uap, timeout), p, &n);
540 	KQRELE(kq);
541 	*retval = n;
542 	return (error);
543 
544  done:
545 	FRELE(fp, p);
546 	return (error);
547 }
548 
549 int
550 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
551 {
552 	struct filedesc *fdp = kq->kq_fdp;
553 	struct filterops *fops = NULL;
554 	struct file *fp = NULL;
555 	struct knote *kn = NULL;
556 	int s, error = 0;
557 
558 	if (kev->filter < 0) {
559 		if (kev->filter + EVFILT_SYSCOUNT < 0)
560 			return (EINVAL);
561 		fops = sysfilt_ops[~kev->filter];	/* to 0-base index */
562 	}
563 
564 	if (fops == NULL) {
565 		/*
566 		 * XXX
567 		 * filter attach routine is responsible for ensuring that
568 		 * the identifier can be attached to it.
569 		 */
570 		return (EINVAL);
571 	}
572 
573 	if (fops->f_isfd) {
574 		/* validate descriptor */
575 		if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
576 			return (EBADF);
577 		FREF(fp);
578 
579 		if (kev->ident < fdp->fd_knlistsize) {
580 			SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) {
581 				if (kq == kn->kn_kq &&
582 				    kev->filter == kn->kn_filter)
583 					break;
584 			}
585 		}
586 	} else {
587 		if (fdp->fd_knhashmask != 0) {
588 			struct klist *list;
589 
590 			list = &fdp->fd_knhash[
591 			    KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
592 			SLIST_FOREACH(kn, list, kn_link) {
593 				if (kev->ident == kn->kn_id &&
594 				    kq == kn->kn_kq &&
595 				    kev->filter == kn->kn_filter)
596 					break;
597 			}
598 		}
599 	}
600 
601 	if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
602 		error = ENOENT;
603 		goto done;
604 	}
605 
606 	/*
607 	 * kn now contains the matching knote, or NULL if no match
608 	 */
609 	if (kev->flags & EV_ADD) {
610 
611 		if (kn == NULL) {
612 			kn = knote_alloc();
613 			if (kn == NULL) {
614 				error = ENOMEM;
615 				goto done;
616 			}
617 			kn->kn_fp = fp;
618 			kn->kn_kq = kq;
619 			kn->kn_fop = fops;
620 
621 			/*
622 			 * apply reference count to knote structure, and
623 			 * do not release it at the end of this routine.
624 			 */
625 			fp = NULL;
626 
627 			kn->kn_sfflags = kev->fflags;
628 			kn->kn_sdata = kev->data;
629 			kev->fflags = 0;
630 			kev->data = 0;
631 			kn->kn_kevent = *kev;
632 
633 			knote_attach(kn, fdp);
634 			if ((error = fops->f_attach(kn)) != 0) {
635 				knote_drop(kn, p, fdp);
636 				goto done;
637 			}
638 		} else {
639 			/*
640 			 * The user may change some filter values after the
641 			 * initial EV_ADD, but doing so will not reset any
642 			 * filters which have already been triggered.
643 			 */
644 			kn->kn_sfflags = kev->fflags;
645 			kn->kn_sdata = kev->data;
646 			kn->kn_kevent.udata = kev->udata;
647 		}
648 
649 		s = splhigh();
650 		if (kn->kn_fop->f_event(kn, 0))
651 			KNOTE_ACTIVATE(kn);
652 		splx(s);
653 
654 	} else if (kev->flags & EV_DELETE) {
655 		kn->kn_fop->f_detach(kn);
656 		knote_drop(kn, p, p->p_fd);
657 		goto done;
658 	}
659 
660 	if ((kev->flags & EV_DISABLE) &&
661 	    ((kn->kn_status & KN_DISABLED) == 0)) {
662 		s = splhigh();
663 		kn->kn_status |= KN_DISABLED;
664 		splx(s);
665 	}
666 
667 	if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
668 		s = splhigh();
669 		kn->kn_status &= ~KN_DISABLED;
670 		if ((kn->kn_status & KN_ACTIVE) &&
671 		    ((kn->kn_status & KN_QUEUED) == 0))
672 			knote_enqueue(kn);
673 		splx(s);
674 	}
675 
676 done:
677 	if (fp != NULL)
678 		FRELE(fp, p);
679 	return (error);
680 }
681 
682 int
683 kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp,
684 	const struct timespec *tsp, struct proc *p, int *retval)
685 {
686 	struct kevent *kevp;
687 	struct timeval atv, rtv, ttv;
688 	struct knote *kn, marker;
689 	int s, count, timeout, nkev = 0, error = 0;
690 
691 	count = maxevents;
692 	if (count == 0)
693 		goto done;
694 
695 	if (tsp != NULL) {
696 		TIMESPEC_TO_TIMEVAL(&atv, tsp);
697 		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
698 			/* No timeout, just poll */
699 			timeout = -1;
700 			goto start;
701 		}
702 		if (itimerfix(&atv)) {
703 			error = EINVAL;
704 			goto done;
705 		}
706 
707 		timeout = atv.tv_sec > 24 * 60 * 60 ?
708 		    24 * 60 * 60 * hz : tvtohz(&atv);
709 
710 		getmicrouptime(&rtv);
711 		timeradd(&atv, &rtv, &atv);
712 	} else {
713 		atv.tv_sec = 0;
714 		atv.tv_usec = 0;
715 		timeout = 0;
716 	}
717 	goto start;
718 
719 retry:
720 	if (atv.tv_sec || atv.tv_usec) {
721 		getmicrouptime(&rtv);
722 		if (timercmp(&rtv, &atv, >=))
723 			goto done;
724 		ttv = atv;
725 		timersub(&ttv, &rtv, &ttv);
726 		timeout = ttv.tv_sec > 24 * 60 * 60 ?
727 		    24 * 60 * 60 * hz : tvtohz(&ttv);
728 	}
729 
730 start:
731 	if (kq->kq_state & KQ_DYING) {
732 		error = EBADF;
733 		goto done;
734 	}
735 
736 	kevp = kq->kq_kev;
737 	s = splhigh();
738 	if (kq->kq_count == 0) {
739 		if (timeout < 0) {
740 			error = EWOULDBLOCK;
741 		} else {
742 			kq->kq_state |= KQ_SLEEP;
743 			error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout);
744 		}
745 		splx(s);
746 		if (error == 0)
747 			goto retry;
748 		/* don't restart after signals... */
749 		if (error == ERESTART)
750 			error = EINTR;
751 		else if (error == EWOULDBLOCK)
752 			error = 0;
753 		goto done;
754 	}
755 
756 	TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
757 	while (count) {
758 		kn = TAILQ_FIRST(&kq->kq_head);
759 		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
760 		if (kn == &marker) {
761 			splx(s);
762 			if (count == maxevents)
763 				goto retry;
764 			goto done;
765 		}
766 		if (kn->kn_status & KN_DISABLED) {
767 			kn->kn_status &= ~KN_QUEUED;
768 			kq->kq_count--;
769 			continue;
770 		}
771 		if ((kn->kn_flags & EV_ONESHOT) == 0 &&
772 		    kn->kn_fop->f_event(kn, 0) == 0) {
773 			kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
774 			kq->kq_count--;
775 			continue;
776 		}
777 		*kevp = kn->kn_kevent;
778 		kevp++;
779 		nkev++;
780 		if (kn->kn_flags & EV_ONESHOT) {
781 			kn->kn_status &= ~KN_QUEUED;
782 			kq->kq_count--;
783 			splx(s);
784 			kn->kn_fop->f_detach(kn);
785 			knote_drop(kn, p, p->p_fd);
786 			s = splhigh();
787 		} else if (kn->kn_flags & EV_CLEAR) {
788 			kn->kn_data = 0;
789 			kn->kn_fflags = 0;
790 			kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
791 			kq->kq_count--;
792 		} else {
793 			TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
794 		}
795 		count--;
796 		if (nkev == KQ_NEVENTS) {
797 			splx(s);
798 #ifdef KTRACE
799 			if (KTRPOINT(p, KTR_STRUCT))
800 				ktrevent(p, kq->kq_kev, nkev);
801 #endif
802 			error = copyout(kq->kq_kev, ulistp,
803 			    sizeof(struct kevent) * nkev);
804 			ulistp += nkev;
805 			nkev = 0;
806 			kevp = kq->kq_kev;
807 			s = splhigh();
808 			if (error)
809 				break;
810 		}
811 	}
812 	TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
813 	splx(s);
814 done:
815 	if (nkev != 0) {
816 #ifdef KTRACE
817 		if (KTRPOINT(p, KTR_STRUCT))
818 			ktrevent(p, kq->kq_kev, nkev);
819 #endif
820 		error = copyout(kq->kq_kev, ulistp,
821 		    sizeof(struct kevent) * nkev);
822 	}
823 	*retval = maxevents - count;
824 	return (error);
825 }
826 
827 /*
828  * XXX
829  * This could be expanded to call kqueue_scan, if desired.
830  */
831 int
832 kqueue_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
833 {
834 	return (ENXIO);
835 }
836 
837 int
838 kqueue_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
839 
840 {
841 	return (ENXIO);
842 }
843 
844 int
845 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
846 {
847 	return (ENOTTY);
848 }
849 
850 int
851 kqueue_poll(struct file *fp, int events, struct proc *p)
852 {
853 	struct kqueue *kq = (struct kqueue *)fp->f_data;
854 	int revents = 0;
855 	int s = splhigh();
856 
857 	if (events & (POLLIN | POLLRDNORM)) {
858 		if (kq->kq_count) {
859 			revents |= events & (POLLIN | POLLRDNORM);
860 		} else {
861 			selrecord(p, &kq->kq_sel);
862 			kq->kq_state |= KQ_SEL;
863 		}
864 	}
865 	splx(s);
866 	return (revents);
867 }
868 
869 int
870 kqueue_stat(struct file *fp, struct stat *st, struct proc *p)
871 {
872 	struct kqueue *kq = fp->f_data;
873 
874 	memset(st, 0, sizeof(*st));
875 	st->st_size = kq->kq_count;
876 	st->st_blksize = sizeof(struct kevent);
877 	st->st_mode = S_IFIFO;
878 	return (0);
879 }
880 
881 int
882 kqueue_close(struct file *fp, struct proc *p)
883 {
884 	struct kqueue *kq = fp->f_data;
885 	struct filedesc *fdp = p->p_fd;
886 	struct knote **knp, *kn, *kn0;
887 	int i;
888 
889 	for (i = 0; i < fdp->fd_knlistsize; i++) {
890 		knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
891 		kn = *knp;
892 		while (kn != NULL) {
893 			kn0 = SLIST_NEXT(kn, kn_link);
894 			if (kq == kn->kn_kq) {
895 				kn->kn_fop->f_detach(kn);
896 				FRELE(kn->kn_fp, p);
897 				knote_free(kn);
898 				*knp = kn0;
899 			} else {
900 				knp = &SLIST_NEXT(kn, kn_link);
901 			}
902 			kn = kn0;
903 		}
904 	}
905 	if (fdp->fd_knhashmask != 0) {
906 		for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
907 			knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
908 			kn = *knp;
909 			while (kn != NULL) {
910 				kn0 = SLIST_NEXT(kn, kn_link);
911 				if (kq == kn->kn_kq) {
912 					kn->kn_fop->f_detach(kn);
913 		/* XXX non-fd release of kn->kn_ptr */
914 					knote_free(kn);
915 					*knp = kn0;
916 				} else {
917 					knp = &SLIST_NEXT(kn, kn_link);
918 				}
919 				kn = kn0;
920 			}
921 		}
922 	}
923 	fp->f_data = NULL;
924 
925 	kq->kq_state |= KQ_DYING;
926 	kqueue_wakeup(kq);
927 	KQRELE(kq);
928 
929 	return (0);
930 }
931 
932 void
933 kqueue_wakeup(struct kqueue *kq)
934 {
935 
936 	if (kq->kq_state & KQ_SLEEP) {
937 		kq->kq_state &= ~KQ_SLEEP;
938 		wakeup(kq);
939 	}
940 	if (kq->kq_state & KQ_SEL) {
941 		kq->kq_state &= ~KQ_SEL;
942 		selwakeup(&kq->kq_sel);
943 	} else
944 		KNOTE(&kq->kq_sel.si_note, 0);
945 }
946 
947 /*
948  * activate one knote.
949  */
950 void
951 knote_activate(struct knote *kn)
952 {
953 	KNOTE_ACTIVATE(kn);
954 }
955 
956 /*
957  * walk down a list of knotes, activating them if their event has triggered.
958  */
959 void
960 knote(struct klist *list, long hint)
961 {
962 	struct knote *kn, *kn0;
963 
964 	SLIST_FOREACH_SAFE(kn, list, kn_selnext, kn0)
965 		if (kn->kn_fop->f_event(kn, hint))
966 			KNOTE_ACTIVATE(kn);
967 }
968 
969 /*
970  * remove all knotes from a specified klist
971  */
972 void
973 knote_remove(struct proc *p, struct klist *list)
974 {
975 	struct knote *kn;
976 
977 	while ((kn = SLIST_FIRST(list)) != NULL) {
978 		kn->kn_fop->f_detach(kn);
979 		knote_drop(kn, p, p->p_fd);
980 	}
981 }
982 
983 /*
984  * remove all knotes referencing a specified fd
985  */
986 void
987 knote_fdclose(struct proc *p, int fd)
988 {
989 	struct filedesc *fdp = p->p_fd;
990 	struct klist *list = &fdp->fd_knlist[fd];
991 
992 	knote_remove(p, list);
993 }
994 
995 /*
996  * handle a process exiting, including the triggering of NOTE_EXIT notes
997  * XXX this could be more efficient, doing a single pass down the klist
998  */
999 void
1000 knote_processexit(struct proc *p)
1001 {
1002 	struct process *pr = p->p_p;
1003 
1004 	KNOTE(&pr->ps_klist, NOTE_EXIT);
1005 
1006 	/* remove other knotes hanging off the process */
1007 	knote_remove(p, &pr->ps_klist);
1008 }
1009 
1010 void
1011 knote_attach(struct knote *kn, struct filedesc *fdp)
1012 {
1013 	struct klist *list;
1014 	int size;
1015 
1016 	if (!kn->kn_fop->f_isfd) {
1017 		if (fdp->fd_knhashmask == 0)
1018 			fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP,
1019 			    M_WAITOK, &fdp->fd_knhashmask);
1020 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1021 		goto done;
1022 	}
1023 
1024 	if (fdp->fd_knlistsize <= kn->kn_id) {
1025 		size = fdp->fd_knlistsize;
1026 		while (size <= kn->kn_id)
1027 			size += KQEXTENT;
1028 		list = mallocarray(size, sizeof(struct klist), M_TEMP,
1029 		    M_WAITOK);
1030 		memcpy(list, fdp->fd_knlist,
1031 		    fdp->fd_knlistsize * sizeof(struct klist));
1032 		memset(&list[fdp->fd_knlistsize], 0,
1033 		    (size - fdp->fd_knlistsize) * sizeof(struct klist));
1034 		free(fdp->fd_knlist, M_TEMP,
1035 		    fdp->fd_knlistsize * sizeof(struct klist));
1036 		fdp->fd_knlistsize = size;
1037 		fdp->fd_knlist = list;
1038 	}
1039 	list = &fdp->fd_knlist[kn->kn_id];
1040 done:
1041 	SLIST_INSERT_HEAD(list, kn, kn_link);
1042 	kn->kn_status = 0;
1043 }
1044 
1045 /*
1046  * should be called at spl == 0, since we don't want to hold spl
1047  * while calling FRELE and knote_free.
1048  */
1049 void
1050 knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp)
1051 {
1052 	struct klist *list;
1053 
1054 	if (kn->kn_fop->f_isfd)
1055 		list = &fdp->fd_knlist[kn->kn_id];
1056 	else
1057 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1058 
1059 	SLIST_REMOVE(list, kn, knote, kn_link);
1060 	if (kn->kn_status & KN_QUEUED)
1061 		knote_dequeue(kn);
1062 	if (kn->kn_fop->f_isfd)
1063 		FRELE(kn->kn_fp, p);
1064 	knote_free(kn);
1065 }
1066 
1067 
1068 void
1069 knote_enqueue(struct knote *kn)
1070 {
1071 	struct kqueue *kq = kn->kn_kq;
1072 	int s = splhigh();
1073 
1074 	KASSERT((kn->kn_status & KN_QUEUED) == 0);
1075 
1076 	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1077 	kn->kn_status |= KN_QUEUED;
1078 	kq->kq_count++;
1079 	splx(s);
1080 	kqueue_wakeup(kq);
1081 }
1082 
1083 void
1084 knote_dequeue(struct knote *kn)
1085 {
1086 	struct kqueue *kq = kn->kn_kq;
1087 	int s = splhigh();
1088 
1089 	KASSERT(kn->kn_status & KN_QUEUED);
1090 
1091 	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1092 	kn->kn_status &= ~KN_QUEUED;
1093 	kq->kq_count--;
1094 	splx(s);
1095 }
1096 
1097 void
1098 klist_invalidate(struct klist *list)
1099 {
1100 	struct knote *kn;
1101 
1102 	SLIST_FOREACH(kn, list, kn_selnext) {
1103 		kn->kn_status |= KN_DETACHED;
1104 		kn->kn_flags |= EV_EOF | EV_ONESHOT;
1105 	}
1106 }
1107