xref: /netbsd-src/lib/libc/sys/kqueue.2 (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1.\"	$NetBSD: kqueue.2,v 1.36 2016/12/19 07:48:35 abhinav Exp $
2.\"
3.\" Copyright (c) 2000 Jonathan Lemon
4.\" All rights reserved.
5.\"
6.\" Copyright (c) 2001, 2002, 2003 The NetBSD Foundation, Inc.
7.\" All rights reserved.
8.\"
9.\" Portions of this documentation is derived from text contributed by
10.\" Luke Mewburn.
11.\"
12.\" Redistribution and use in source and binary forms, with or without
13.\" modification, are permitted provided that the following conditions
14.\" are met:
15.\" 1. Redistributions of source code must retain the above copyright
16.\"    notice, this list of conditions and the following disclaimer.
17.\" 2. Redistributions in binary form must reproduce the above copyright
18.\"    notice, this list of conditions and the following disclaimer in the
19.\"    documentation and/or other materials provided with the distribution.
20.\"
21.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND
22.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE.
32.\"
33.\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $
34.\"
35.Dd December 8, 2015
36.Dt KQUEUE 2
37.Os
38.Sh NAME
39.Nm kqueue ,
40.Nm kqueue1 ,
41.Nm kevent
42.Nd kernel event notification mechanism
43.Sh LIBRARY
44.Lb libc
45.Sh SYNOPSIS
46.In sys/event.h
47.In sys/time.h
48.Ft int
49.Fn kqueue "void"
50.Ft int
51.Fn kqueue1 "int flags"
52.Ft int
53.Fn kevent "int kq" "const struct kevent *changelist" "size_t nchanges" "struct kevent *eventlist" "size_t nevents" "const struct timespec *timeout"
54.Fn EV_SET "\*[Am]kev" ident filter flags fflags data udata
55.Sh DESCRIPTION
56.Fn kqueue
57provides a generic method of notifying the user when an event
58happens or a condition holds, based on the results of small
59pieces of kernel code termed filters.
60A kevent is identified by the (ident, filter) pair; there may only
61be one unique kevent per kqueue.
62.Pp
63The filter is executed upon the initial registration of a kevent
64in order to detect whether a preexisting condition is present, and is also
65executed whenever an event is passed to the filter for evaluation.
66If the filter determines that the condition should be reported,
67then the kevent is placed on the kqueue for the user to retrieve.
68.Pp
69The filter is also run when the user attempts to retrieve the kevent
70from the kqueue.
71If the filter indicates that the condition that triggered
72the event no longer holds, the kevent is removed from the kqueue and
73is not returned.
74.Pp
75Multiple events which trigger the filter do not result in multiple
76kevents being placed on the kqueue; instead, the filter will aggregate
77the events into a single struct kevent.
78Calling
79.Fn close
80on a file descriptor will remove any kevents that reference the descriptor.
81.Pp
82.Fn kqueue
83creates a new kernel event queue and returns a descriptor.
84.Pp
85The
86.Fn kqueue1
87also allows to set the following
88.Fa flags
89on the returned file descriptor:
90.Bl -column O_NONBLOCK -offset indent
91.It Dv O_CLOEXEC
92Set the close on exec property.
93.It Dv O_NONBLOCK
94Sets non-blocking I/O.
95.It Dv O_NOSIGPIPE
96Return
97.Er EPIPE
98instead of raising
99.Dv SIGPIPE .
100.El
101The queue is not inherited by a child created with
102.Xr fork 2 .
103.\" However, if
104.\" .Xr rfork 2
105.\" is called without the
106.\" .Dv RFFDG
107.\" flag, then the descriptor table is shared,
108.\" which will allow sharing of the kqueue between two processes.
109.Pp
110.Fn kevent
111is used to register events with the queue, and return any pending
112events to the user.
113.Fa changelist
114is a pointer to an array of
115.Va kevent
116structures, as defined in
117.In sys/event.h .
118All changes contained in the
119.Fa changelist
120are applied before any pending events are read from the queue.
121.Fa nchanges
122gives the size of
123.Fa changelist .
124.Fa eventlist
125is a pointer to an array of kevent structures.
126.Fa nevents
127determines the size of
128.Fa eventlist .
129If
130.Fa timeout
131is a
132.No non- Ns Dv NULL
133pointer, it specifies a maximum interval to wait
134for an event, which will be interpreted as a struct timespec.
135If
136.Fa timeout
137is a
138.Dv NULL
139pointer,
140.Fn kevent
141waits indefinitely.
142To effect a poll, the
143.Fa timeout
144argument should be
145.No non- Ns Dv NULL ,
146pointing to a zero-valued
147.Va timespec
148structure.
149The same array may be used for the
150.Fa changelist
151and
152.Fa eventlist .
153.Pp
154.Fn EV_SET
155is a macro which is provided for ease of initializing a
156kevent structure.
157.Pp
158The
159.Va kevent
160structure is defined as:
161.Bd -literal
162struct kevent {
163	uintptr_t ident;	/* identifier for this event */
164	uint32_t  filter;	/* filter for event */
165	uint32_t  flags;	/* action flags for kqueue */
166	uint32_t  fflags;	/* filter flag value */
167	int64_t   data;		/* filter data value */
168	intptr_t  udata;	/* opaque user data identifier */
169};
170.Ed
171.Pp
172The fields of
173.Fa struct kevent
174are:
175.Bl -tag -width XXXfilter -offset indent
176.It ident
177Value used to identify this event.
178The exact interpretation is determined by the attached filter,
179but often is a file descriptor.
180.It filter
181Identifies the kernel filter used to process this event.
182There are pre-defined system filters (which are described below), and
183other filters may be added by kernel subsystems as necessary.
184.It flags
185Actions to perform on the event.
186.It fflags
187Filter-specific flags.
188.It data
189Filter-specific data value.
190.It udata
191Opaque user-defined value passed through the kernel unchanged.
192.El
193.Pp
194The
195.Va flags
196field can contain the following values:
197.Bl -tag -width XXXEV_ONESHOT -offset indent
198.It EV_ADD
199Adds the event to the kqueue.
200Re-adding an existing event will modify the parameters of the original
201event, and not result in a duplicate entry.
202Adding an event automatically enables it,
203unless overridden by the EV_DISABLE flag.
204.It EV_ENABLE
205Permit
206.Fn kevent
207to return the event if it is triggered.
208.It EV_DISABLE
209Disable the event so
210.Fn kevent
211will not return it.
212The filter itself is not disabled.
213.It EV_DISPATCH
214Disable the event source immediately after delivery of an event.
215See
216.Dv EV_DISABLE
217above.
218.It EV_DELETE
219Removes the event from the kqueue.
220Events which are attached to file descriptors are automatically deleted
221on the last close of the descriptor.
222.It EV_RECEIPT
223This flag is useful for making bulk changes to a kqueue without draining
224any pending events.
225When passed as input, it forces
226.Dv EV_ERROR
227to always be returned.
228When a filter is successfully added the
229.Va data
230field will be zero.
231.It EV_ONESHOT
232Causes the event to return only the first occurrence of the filter
233being triggered.
234After the user retrieves the event from the kqueue, it is deleted.
235.It EV_CLEAR
236After the event is retrieved by the user, its state is reset.
237This is useful for filters which report state transitions
238instead of the current state.
239Note that some filters may automatically set this flag internally.
240.It EV_EOF
241Filters may set this flag to indicate filter-specific EOF condition.
242.It EV_ERROR
243See
244.Sx RETURN VALUES
245below.
246.El
247.Ss Filters
248Filters are identified by a number.
249There are two types of filters; pre-defined filters which
250are described below, and third-party filters that may be added with
251.Xr kfilter_register 9
252by kernel sub-systems, third-party device drivers, or loadable
253kernel modules.
254.Pp
255As a third-party filter is referenced by a well-known name instead
256of a statically assigned number, two
257.Xr ioctl 2 Ns s
258are supported on the file descriptor returned by
259.Fn kqueue
260to map a filter name to a filter number, and vice-versa (passing
261arguments in a structure described below):
262.Bl -tag -width KFILTER_BYFILTER -offset indent
263.It KFILTER_BYFILTER
264Map
265.Va filter
266to
267.Va name ,
268which is of size
269.Va len .
270.It KFILTER_BYNAME
271Map
272.Va name
273to
274.Va filter .
275.Va len
276is ignored.
277.El
278.Pp
279The following structure is used to pass arguments in and out of the
280.Xr ioctl 2 :
281.Bd -literal -offset indent
282struct kfilter_mapping {
283	char	 *name;		/* name to lookup or return */
284	size_t	 len;		/* length of name */
285	uint32_t filter;	/* filter to lookup or return */
286};
287.Ed
288.Pp
289Arguments may be passed to and from the filter via the
290.Va fflags
291and
292.Va data
293fields in the kevent structure.
294.Pp
295The predefined system filters are:
296.Bl -tag -width EVFILT_SIGNAL
297.It EVFILT_READ
298Takes a descriptor as the identifier, and returns whenever
299there is data available to read.
300The behavior of the filter is slightly different depending
301on the descriptor type.
302.Bl -tag -width 2n
303.It Sockets
304Sockets which have previously been passed to
305.Fn listen
306return when there is an incoming connection pending.
307.Va data
308contains the size of the listen backlog (i.e., the number of
309connections ready to be accepted with
310.Xr accept 2 . )
311.Pp
312Other socket descriptors return when there is data to be read,
313subject to the
314.Dv SO_RCVLOWAT
315value of the socket buffer.
316This may be overridden with a per-filter low water mark at the
317time the filter is added by setting the
318NOTE_LOWAT
319flag in
320.Va fflags ,
321and specifying the new low water mark in
322.Va data .
323On return,
324.Va data
325contains the number of bytes in the socket buffer.
326.Pp
327If the read direction of the socket has shutdown, then the filter
328also sets EV_EOF in
329.Va flags ,
330and returns the socket error (if any) in
331.Va fflags .
332It is possible for EOF to be returned (indicating the connection is gone)
333while there is still data pending in the socket buffer.
334.It Vnodes
335Returns when the file pointer is not at the end of file.
336.Va data
337contains the offset from current position to end of file,
338and may be negative.
339.It "Fifos, Pipes"
340Returns when there is data to read;
341.Va data
342contains the number of bytes available.
343.Pp
344When the last writer disconnects, the filter will set EV_EOF in
345.Va flags .
346This may be cleared by passing in EV_CLEAR, at which point the
347filter will resume waiting for data to become available before
348returning.
349.El
350.It EVFILT_WRITE
351Takes a descriptor as the identifier, and returns whenever
352it is possible to write to the descriptor.
353For sockets, pipes, fifos, and ttys,
354.Va data
355will contain the amount of space remaining in the write buffer.
356The filter will set EV_EOF when the reader disconnects, and for
357the fifo case, this may be cleared by use of EV_CLEAR.
358Note that this filter is not supported for vnodes.
359.Pp
360For sockets, the low water mark and socket error handling is
361identical to the EVFILT_READ case.
362.It EVFILT_AIO
363This is not implemented in
364.Nx .
365.ig
366The sigevent portion of the AIO request is filled in, with
367.Va sigev_notify_kqueue
368containing the descriptor of the kqueue that the event should
369be attached to,
370.Va sigev_value
371containing the udata value, and
372.Va sigev_notify
373set to SIGEV_EVENT.
374When the aio_* function is called, the event will be registered
375with the specified kqueue, and the
376.Va ident
377argument set to the
378.Fa struct aiocb
379returned by the aio_* function.
380The filter returns under the same conditions as aio_error.
381.Pp
382Alternatively, a kevent structure may be initialized, with
383.Va ident
384containing the descriptor of the kqueue, and the
385address of the kevent structure placed in the
386.Va aio_lio_opcode
387field of the AIO request.
388However, this approach will not work on
389architectures with 64-bit pointers, and should be considered deprecated.
390..
391.It EVFILT_VNODE
392Takes a file descriptor as the identifier and the events to watch for in
393.Va fflags ,
394and returns when one or more of the requested events occurs on the descriptor.
395The events to monitor are:
396.Bl -tag -width XXNOTE_RENAME
397.It NOTE_DELETE
398.Fn unlink
399was called on the file referenced by the descriptor.
400.It NOTE_WRITE
401A write occurred on the file referenced by the descriptor.
402.It NOTE_EXTEND
403The file referenced by the descriptor was extended.
404.It NOTE_ATTRIB
405The file referenced by the descriptor had its attributes changed.
406.It NOTE_LINK
407The link count on the file changed.
408.It NOTE_RENAME
409The file referenced by the descriptor was renamed.
410.It NOTE_REVOKE
411Access to the file was revoked via
412.Xr revoke 2
413or the underlying fileystem was unmounted.
414.El
415.Pp
416On return,
417.Va fflags
418contains the events which triggered the filter.
419.It EVFILT_PROC
420Takes the process ID to monitor as the identifier and the events to watch for
421in
422.Va fflags ,
423and returns when the process performs one or more of the requested events.
424If a process can normally see another process, it can attach an event to it.
425The events to monitor are:
426.Bl -tag -width XXNOTE_TRACKERR
427.It NOTE_EXIT
428The process has exited.
429The exit code of the process is stored in
430.Va data .
431.It NOTE_FORK
432The process has called
433.Fn fork .
434.It NOTE_EXEC
435The process has executed a new process via
436.Xr execve 2
437or similar call.
438.It NOTE_TRACK
439Follow a process across
440.Fn fork
441calls.
442The parent process will return with NOTE_TRACK set in the
443.Va fflags
444field, while the child process will return with NOTE_CHILD set in
445.Va fflags
446and the parent PID in
447.Va data .
448.It NOTE_TRACKERR
449This flag is returned if the system was unable to attach an event to
450the child process, usually due to resource limitations.
451.El
452.Pp
453On return,
454.Va fflags
455contains the events which triggered the filter.
456.It EVFILT_SIGNAL
457Takes the signal number to monitor as the identifier and returns
458when the given signal is delivered to the current process.
459This coexists with the
460.Fn signal
461and
462.Fn sigaction
463facilities, and has a lower precedence.
464The filter will record
465all attempts to deliver a signal to a process, even if the signal has
466been marked as SIG_IGN.
467Event notification happens after normal signal delivery processing.
468.Va data
469returns the number of times the signal has occurred since the last call to
470.Fn kevent .
471This filter automatically sets the EV_CLEAR flag internally.
472.It EVFILT_TIMER
473Establishes an arbitrary timer identified by
474.Va ident .
475When adding a timer,
476.Va data
477specifies the timeout period in milliseconds.
478The timer will be periodic unless EV_ONESHOT is specified.
479On return,
480.Va data
481contains the number of times the timeout has expired since the last call to
482.Fn kevent .
483This filter automatically sets the EV_CLEAR flag internally.
484.El
485.Sh RETURN VALUES
486.Fn kqueue
487creates a new kernel event queue and returns a file descriptor.
488If there was an error creating the kernel event queue, a value of \-1 is
489returned and
490.Dv errno
491is set.
492.Pp
493.Fn kevent
494returns the number of events placed in the
495.Fa eventlist ,
496up to the value given by
497.Fa nevents .
498If an error occurs while processing an element of the
499.Fa changelist
500and there is enough room in the
501.Fa eventlist ,
502then the event will be placed in the
503.Fa eventlist
504with
505.Dv EV_ERROR
506set in
507.Va flags
508and the system error in
509.Va data .
510Otherwise,
511.Dv \-1
512will be returned, and
513.Dv errno
514will be set to indicate the error condition.
515If the time limit expires, then
516.Fn kevent
517returns 0.
518.Sh EXAMPLES
519The following example program monitors a file (provided to it as the first
520argument) and prints information about some common events it receives
521notifications for:
522.Bd -literal -offset indent
523#include \*[Lt]sys/types.h\*[Gt]
524#include \*[Lt]sys/event.h\*[Gt]
525#include \*[Lt]sys/time.h\*[Gt]
526#include \*[Lt]stdio.h\*[Gt]
527#include \*[Lt]unistd.h\*[Gt]
528#include \*[Lt]stdlib.h\*[Gt]
529#include \*[Lt]fcntl.h\*[Gt]
530#include \*[Lt]err.h\*[Gt]
531
532int
533main(int argc, char *argv[])
534{
535        int fd, kq, nev;
536        struct kevent ev;
537        static const struct timespec tout = { 1, 0 };
538
539        if ((fd = open(argv[1], O_RDONLY)) == -1)
540                err(1, "Cannot open `%s'", argv[1]);
541
542        if ((kq = kqueue()) == -1)
543                err(1, "Cannot create kqueue");
544
545        EV_SET(\*[Am]ev, fd, EVFILT_VNODE, EV_ADD | EV_ENABLE | EV_CLEAR,
546            NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_ATTRIB|NOTE_LINK|
547            NOTE_RENAME|NOTE_REVOKE, 0, 0);
548        if (kevent(kq, \*[Am]ev, 1, NULL, 0, \*[Am]tout) == -1)
549                err(1, "kevent");
550        for (;;) {
551                nev = kevent(kq, NULL, 0, \*[Am]ev, 1, \*[Am]tout);
552                if (nev == -1)
553                        err(1, "kevent");
554                if (nev == 0)
555                        continue;
556                if (ev.fflags \*[Am] NOTE_DELETE) {
557                        printf("deleted ");
558                        ev.fflags \*[Am]= ~NOTE_DELETE;
559                }
560                if (ev.fflags \*[Am] NOTE_WRITE) {
561                        printf("written ");
562                        ev.fflags \*[Am]= ~NOTE_WRITE;
563                }
564                if (ev.fflags \*[Am] NOTE_EXTEND) {
565                        printf("extended ");
566                        ev.fflags \*[Am]= ~NOTE_EXTEND;
567                }
568                if (ev.fflags \*[Am] NOTE_ATTRIB) {
569                        printf("chmod/chown/utimes ");
570                        ev.fflags \*[Am]= ~NOTE_ATTRIB;
571                }
572                if (ev.fflags \*[Am] NOTE_LINK) {
573                        printf("hardlinked ");
574                        ev.fflags \*[Am]= ~NOTE_LINK;
575                }
576                if (ev.fflags \*[Am] NOTE_RENAME) {
577                        printf("renamed ");
578                        ev.fflags \*[Am]= ~NOTE_RENAME;
579                }
580                if (ev.fflags \*[Am] NOTE_REVOKE) {
581                        printf("revoked ");
582                        ev.fflags \*[Am]= ~NOTE_REVOKE;
583                }
584                printf("\\n");
585                if (ev.fflags)
586                        warnx("unknown event 0x%x\\n", ev.fflags);
587        }
588}
589.Ed
590.Sh ERRORS
591The
592.Fn kqueue
593function fails if:
594.Bl -tag -width Er
595.It Bq Er EMFILE
596The per-process descriptor table is full.
597.It Bq Er ENFILE
598The system file table is full.
599.It Bq Er ENOMEM
600The kernel failed to allocate enough memory for the kernel queue.
601.El
602.Pp
603The
604.Fn kevent
605function fails if:
606.Bl -tag -width Er
607.It Bq Er EACCES
608The process does not have permission to register a filter.
609.It Bq Er EBADF
610The specified descriptor is invalid.
611.It Bq Er EFAULT
612There was an error reading or writing the
613.Va kevent
614structure.
615.It Bq Er EINTR
616A signal was delivered before the timeout expired and before any
617events were placed on the kqueue for return.
618.It Bq Er EINVAL
619The specified time limit or filter is invalid.
620.It Bq Er ENOENT
621The event could not be found to be modified or deleted.
622.It Bq Er ENOMEM
623No memory was available to register the event.
624.It Bq Er EOPNOTSUPP
625This type of file descriptor is not supported for
626.Fn kevent
627operations.
628.It Bq Er ESRCH
629The specified process to attach to does not exist.
630.El
631.Sh SEE ALSO
632.\" .Xr aio_error 2 ,
633.\" .Xr aio_read 2 ,
634.\" .Xr aio_return 2 ,
635.Xr ioctl 2 ,
636.Xr poll 2 ,
637.Xr read 2 ,
638.Xr select 2 ,
639.Xr sigaction 2 ,
640.Xr write 2 ,
641.Xr signal 3 ,
642.Xr kfilter_register 9 ,
643.Xr knote 9
644.Rs
645.%A Jonathan Lemon
646.%T "Kqueue: A Generic and Scalable Event Notification Facility"
647.%I USENIX Association
648.%B Proceedings of the FREENIX Track: 2001 USENIX Annual Technical Conference
649.%D June 25-30, 2001
650.%U http://www.usenix.org/event/usenix01/freenix01/full_papers/lemon/lemon.pdf
651.Re
652.Sh HISTORY
653The
654.Fn kqueue
655and
656.Fn kevent
657functions first appeared in
658.Fx 4.1 ,
659and then in
660.Nx 2.0 .
661The
662.Fn kqueue1
663function first appeared in
664.Nx 6.0 .
665