xref: /netbsd-src/lib/libc/sys/kqueue.2 (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1.\"	$NetBSD: kqueue.2,v 1.35 2015/12/08 14:52:06 christos Exp $
2.\"
3.\" Copyright (c) 2000 Jonathan Lemon
4.\" All rights reserved.
5.\"
6.\" Copyright (c) 2001, 2002, 2003 The NetBSD Foundation, Inc.
7.\" All rights reserved.
8.\"
9.\" Portions of this documentation is derived from text contributed by
10.\" Luke Mewburn.
11.\"
12.\" Redistribution and use in source and binary forms, with or without
13.\" modification, are permitted provided that the following conditions
14.\" are met:
15.\" 1. Redistributions of source code must retain the above copyright
16.\"    notice, this list of conditions and the following disclaimer.
17.\" 2. Redistributions in binary form must reproduce the above copyright
18.\"    notice, this list of conditions and the following disclaimer in the
19.\"    documentation and/or other materials provided with the distribution.
20.\"
21.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND
22.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE.
32.\"
33.\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $
34.\"
35.Dd December 8, 2015
36.Dt KQUEUE 2
37.Os
38.Sh NAME
39.Nm kqueue ,
40.Nm kqueue1 ,
41.Nm kevent
42.Nd kernel event notification mechanism
43.Sh LIBRARY
44.Lb libc
45.Sh SYNOPSIS
46.In sys/event.h
47.In sys/time.h
48.Ft int
49.Fn kqueue "void"
50.Ft int
51.Fn kqueue1 "int flags"
52.Ft int
53.Fn kevent "int kq" "const struct kevent *changelist" "size_t nchanges" "struct kevent *eventlist" "size_t nevents" "const struct timespec *timeout"
54.Fn EV_SET "\*[Am]kev" ident filter flags fflags data udata
55.Sh DESCRIPTION
56.Fn kqueue
57provides a generic method of notifying the user when an event
58happens or a condition holds, based on the results of small
59pieces of kernel code termed filters.
60A kevent is identified by the (ident, filter) pair; there may only
61be one unique kevent per kqueue.
62.Pp
63The filter is executed upon the initial registration of a kevent
64in order to detect whether a preexisting condition is present, and is also
65executed whenever an event is passed to the filter for evaluation.
66If the filter determines that the condition should be reported,
67then the kevent is placed on the kqueue for the user to retrieve.
68.Pp
69The filter is also run when the user attempts to retrieve the kevent
70from the kqueue.
71If the filter indicates that the condition that triggered
72the event no longer holds, the kevent is removed from the kqueue and
73is not returned.
74.Pp
75Multiple events which trigger the filter do not result in multiple
76kevents being placed on the kqueue; instead, the filter will aggregate
77the events into a single struct kevent.
78Calling
79.Fn close
80on a file descriptor will remove any kevents that reference the descriptor.
81.Pp
82.Fn kqueue
83creates a new kernel event queue and returns a descriptor.
84.Pp
85The
86.Fn kqueue1
87also allows to set the following
88.Fa flags
89on the returned file descriptor:
90.Bl -column O_NONBLOCK -offset indent
91.It Dv O_CLOEXEC
92Set the close on exec property.
93.It Dv O_NONBLOCK
94Sets non-blocking I/O.
95.It Dv O_NOSIGPIPE
96Return
97.Er EPIPE
98instead of raising
99.Dv SIGPIPE .
100.El
101The queue is not inherited by a child created with
102.Xr fork 2 .
103.\" However, if
104.\" .Xr rfork 2
105.\" is called without the
106.\" .Dv RFFDG
107.\" flag, then the descriptor table is shared,
108.\" which will allow sharing of the kqueue between two processes.
109.Pp
110.Fn kevent
111is used to register events with the queue, and return any pending
112events to the user.
113.Fa changelist
114is a pointer to an array of
115.Va kevent
116structures, as defined in
117.In sys/event.h .
118All changes contained in the
119.Fa changelist
120are applied before any pending events are read from the queue.
121.Fa nchanges
122gives the size of
123.Fa changelist .
124.Fa eventlist
125is a pointer to an array of kevent structures.
126.Fa nevents
127determines the size of
128.Fa eventlist .
129If
130.Fa timeout
131is a
132.No non- Ns Dv NULL
133pointer, it specifies a maximum interval to wait
134for an event, which will be interpreted as a struct timespec.
135If
136.Fa timeout
137is a
138.Dv NULL
139pointer,
140.Fn kevent
141waits indefinitely.
142To effect a poll, the
143.Fa timeout
144argument should be
145.No non- Ns Dv NULL ,
146pointing to a zero-valued
147.Va timespec
148structure.
149The same array may be used for the
150.Fa changelist
151and
152.Fa eventlist .
153.Pp
154.Fn EV_SET
155is a macro which is provided for ease of initializing a
156kevent structure.
157.Pp
158The
159.Va kevent
160structure is defined as:
161.Bd -literal
162struct kevent {
163	uintptr_t ident;	/* identifier for this event */
164	uint32_t  filter;	/* filter for event */
165	uint32_t  flags;	/* action flags for kqueue */
166	uint32_t  fflags;	/* filter flag value */
167	int64_t   data;		/* filter data value */
168	intptr_t  udata;	/* opaque user data identifier */
169};
170.Ed
171.Pp
172The fields of
173.Fa struct kevent
174are:
175.Bl -tag -width XXXfilter -offset indent
176.It ident
177Value used to identify this event.
178The exact interpretation is determined by the attached filter,
179but often is a file descriptor.
180.It filter
181Identifies the kernel filter used to process this event.
182There are pre-defined system filters (which are described below), and
183other filters may be added by kernel subsystems as necessary.
184.It flags
185Actions to perform on the event.
186.It fflags
187Filter-specific flags.
188.It data
189Filter-specific data value.
190.It udata
191Opaque user-defined value passed through the kernel unchanged.
192.El
193.Pp
194The
195.Va flags
196field can contain the following values:
197.Bl -tag -width XXXEV_ONESHOT -offset indent
198.It EV_ADD
199Adds the event to the kqueue.
200Re-adding an existing event will modify the parameters of the original
201event, and not result in a duplicate entry.
202Adding an event automatically enables it,
203unless overridden by the EV_DISABLE flag.
204.It EV_ENABLE
205Permit
206.Fn kevent
207to return the event if it is triggered.
208.It EV_DISABLE
209Disable the event so
210.Fn kevent
211will not return it.
212The filter itself is not disabled.
213.It EV_DISPATCH
214Disable the event source immediately after delivery of an event.
215See
216.Dv EV_DISABLE
217above.
218.It EV_DELETE
219Removes the event from the kqueue.
220Events which are attached to file descriptors are automatically deleted
221on the last close of the descriptor.
222.It EV_RECEIPT
223This flag is useful for making bulk changes to a kqueue without draining
224any pending events.
225When passed as input, it forces
226.Dv EV_ERROR
227to always be returned.
228When a filter is successfully added the
229.Va data
230field will be zero.
231.It EV_ONESHOT
232Causes the event to return only the first occurrence of the filter
233being triggered.
234After the user retrieves the event from the kqueue, it is deleted.
235.It EV_CLEAR
236After the event is retrieved by the user, its state is reset.
237This is useful for filters which report state transitions
238instead of the current state.
239Note that some filters may automatically set this flag internally.
240.It EV_EOF
241Filters may set this flag to indicate filter-specific EOF condition.
242.It EV_ERROR
243See
244.Sx RETURN VALUES
245below.
246.El
247.Ss Filters
248Filters are identified by a number.
249There are two types of filters; pre-defined filters which
250are described below, and third-party filters that may be added with
251.Xr kfilter_register 9
252by kernel sub-systems, third-party device drivers, or loadable
253kernel modules.
254.Pp
255As a third-party filter is referenced by a well-known name instead
256of a statically assigned number, two
257.Xr ioctl 2 Ns s
258are supported on the file descriptor returned by
259.Fn kqueue
260to map a filter name to a filter number, and vice-versa (passing
261arguments in a structure described below):
262.Bl -tag -width KFILTER_BYFILTER -offset indent
263.It KFILTER_BYFILTER
264Map
265.Va filter
266to
267.Va name ,
268which is of size
269.Va len .
270.It KFILTER_BYNAME
271Map
272.Va name
273to
274.Va filter .
275.Va len
276is ignored.
277.El
278.Pp
279The following structure is used to pass arguments in and out of the
280.Xr ioctl 2 :
281.Bd -literal -offset indent
282struct kfilter_mapping {
283	char	 *name;		/* name to lookup or return */
284	size_t	 len;		/* length of name */
285	uint32_t filter;	/* filter to lookup or return */
286};
287.Ed
288.Pp
289Arguments may be passed to and from the filter via the
290.Va fflags
291and
292.Va data
293fields in the kevent structure.
294.Pp
295The predefined system filters are:
296.Bl -tag -width EVFILT_SIGNAL
297.It EVFILT_READ
298Takes a descriptor as the identifier, and returns whenever
299there is data available to read.
300The behavior of the filter is slightly different depending
301on the descriptor type.
302.Pp
303.Bl -tag -width 2n
304.It Sockets
305Sockets which have previously been passed to
306.Fn listen
307return when there is an incoming connection pending.
308.Va data
309contains the size of the listen backlog (i.e., the number of
310connections ready to be accepted with
311.Xr accept 2 . )
312.Pp
313Other socket descriptors return when there is data to be read,
314subject to the
315.Dv SO_RCVLOWAT
316value of the socket buffer.
317This may be overridden with a per-filter low water mark at the
318time the filter is added by setting the
319NOTE_LOWAT
320flag in
321.Va fflags ,
322and specifying the new low water mark in
323.Va data .
324On return,
325.Va data
326contains the number of bytes in the socket buffer.
327.Pp
328If the read direction of the socket has shutdown, then the filter
329also sets EV_EOF in
330.Va flags ,
331and returns the socket error (if any) in
332.Va fflags .
333It is possible for EOF to be returned (indicating the connection is gone)
334while there is still data pending in the socket buffer.
335.It Vnodes
336Returns when the file pointer is not at the end of file.
337.Va data
338contains the offset from current position to end of file,
339and may be negative.
340.It "Fifos, Pipes"
341Returns when there is data to read;
342.Va data
343contains the number of bytes available.
344.Pp
345When the last writer disconnects, the filter will set EV_EOF in
346.Va flags .
347This may be cleared by passing in EV_CLEAR, at which point the
348filter will resume waiting for data to become available before
349returning.
350.El
351.It EVFILT_WRITE
352Takes a descriptor as the identifier, and returns whenever
353it is possible to write to the descriptor.
354For sockets, pipes, fifos, and ttys,
355.Va data
356will contain the amount of space remaining in the write buffer.
357The filter will set EV_EOF when the reader disconnects, and for
358the fifo case, this may be cleared by use of EV_CLEAR.
359Note that this filter is not supported for vnodes.
360.Pp
361For sockets, the low water mark and socket error handling is
362identical to the EVFILT_READ case.
363.It EVFILT_AIO
364This is not implemented in
365.Nx .
366.ig
367The sigevent portion of the AIO request is filled in, with
368.Va sigev_notify_kqueue
369containing the descriptor of the kqueue that the event should
370be attached to,
371.Va sigev_value
372containing the udata value, and
373.Va sigev_notify
374set to SIGEV_EVENT.
375When the aio_* function is called, the event will be registered
376with the specified kqueue, and the
377.Va ident
378argument set to the
379.Fa struct aiocb
380returned by the aio_* function.
381The filter returns under the same conditions as aio_error.
382.Pp
383Alternatively, a kevent structure may be initialized, with
384.Va ident
385containing the descriptor of the kqueue, and the
386address of the kevent structure placed in the
387.Va aio_lio_opcode
388field of the AIO request.
389However, this approach will not work on
390architectures with 64-bit pointers, and should be considered deprecated.
391..
392.It EVFILT_VNODE
393Takes a file descriptor as the identifier and the events to watch for in
394.Va fflags ,
395and returns when one or more of the requested events occurs on the descriptor.
396The events to monitor are:
397.Bl -tag -width XXNOTE_RENAME
398.It NOTE_DELETE
399.Fn unlink
400was called on the file referenced by the descriptor.
401.It NOTE_WRITE
402A write occurred on the file referenced by the descriptor.
403.It NOTE_EXTEND
404The file referenced by the descriptor was extended.
405.It NOTE_ATTRIB
406The file referenced by the descriptor had its attributes changed.
407.It NOTE_LINK
408The link count on the file changed.
409.It NOTE_RENAME
410The file referenced by the descriptor was renamed.
411.It NOTE_REVOKE
412Access to the file was revoked via
413.Xr revoke 2
414or the underlying fileystem was unmounted.
415.El
416.Pp
417On return,
418.Va fflags
419contains the events which triggered the filter.
420.It EVFILT_PROC
421Takes the process ID to monitor as the identifier and the events to watch for
422in
423.Va fflags ,
424and returns when the process performs one or more of the requested events.
425If a process can normally see another process, it can attach an event to it.
426The events to monitor are:
427.Bl -tag -width XXNOTE_TRACKERR
428.It NOTE_EXIT
429The process has exited.
430The exit code of the process is stored in
431.Va data .
432.It NOTE_FORK
433The process has called
434.Fn fork .
435.It NOTE_EXEC
436The process has executed a new process via
437.Xr execve 2
438or similar call.
439.It NOTE_TRACK
440Follow a process across
441.Fn fork
442calls.
443The parent process will return with NOTE_TRACK set in the
444.Va fflags
445field, while the child process will return with NOTE_CHILD set in
446.Va fflags
447and the parent PID in
448.Va data .
449.It NOTE_TRACKERR
450This flag is returned if the system was unable to attach an event to
451the child process, usually due to resource limitations.
452.El
453.Pp
454On return,
455.Va fflags
456contains the events which triggered the filter.
457.It EVFILT_SIGNAL
458Takes the signal number to monitor as the identifier and returns
459when the given signal is delivered to the current process.
460This coexists with the
461.Fn signal
462and
463.Fn sigaction
464facilities, and has a lower precedence.
465The filter will record
466all attempts to deliver a signal to a process, even if the signal has
467been marked as SIG_IGN.
468Event notification happens after normal signal delivery processing.
469.Va data
470returns the number of times the signal has occurred since the last call to
471.Fn kevent .
472This filter automatically sets the EV_CLEAR flag internally.
473.It EVFILT_TIMER
474Establishes an arbitrary timer identified by
475.Va ident .
476When adding a timer,
477.Va data
478specifies the timeout period in milliseconds.
479The timer will be periodic unless EV_ONESHOT is specified.
480On return,
481.Va data
482contains the number of times the timeout has expired since the last call to
483.Fn kevent .
484This filter automatically sets the EV_CLEAR flag internally.
485.El
486.Sh RETURN VALUES
487.Fn kqueue
488creates a new kernel event queue and returns a file descriptor.
489If there was an error creating the kernel event queue, a value of \-1 is
490returned and errno set.
491.Pp
492.Fn kevent
493returns the number of events placed in the
494.Fa eventlist ,
495up to the value given by
496.Fa nevents .
497If an error occurs while processing an element of the
498.Fa changelist
499and there is enough room in the
500.Fa eventlist ,
501then the event will be placed in the
502.Fa eventlist
503with
504.Dv EV_ERROR
505set in
506.Va flags
507and the system error in
508.Va data .
509Otherwise,
510.Dv \-1
511will be returned, and
512.Dv errno
513will be set to indicate the error condition.
514If the time limit expires, then
515.Fn kevent
516returns 0.
517.Sh EXAMPLES
518The following example program monitors a file (provided to it as the first
519argument) and prints information about some common events it receives
520notifications for:
521.Bd -literal -offset indent
522#include \*[Lt]sys/types.h\*[Gt]
523#include \*[Lt]sys/event.h\*[Gt]
524#include \*[Lt]sys/time.h\*[Gt]
525#include \*[Lt]stdio.h\*[Gt]
526#include \*[Lt]unistd.h\*[Gt]
527#include \*[Lt]stdlib.h\*[Gt]
528#include \*[Lt]fcntl.h\*[Gt]
529#include \*[Lt]err.h\*[Gt]
530
531int
532main(int argc, char *argv[])
533{
534        int fd, kq, nev;
535        struct kevent ev;
536        static const struct timespec tout = { 1, 0 };
537
538        if ((fd = open(argv[1], O_RDONLY)) == -1)
539                err(1, "Cannot open `%s'", argv[1]);
540
541        if ((kq = kqueue()) == -1)
542                err(1, "Cannot create kqueue");
543
544        EV_SET(\*[Am]ev, fd, EVFILT_VNODE, EV_ADD | EV_ENABLE | EV_CLEAR,
545            NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_ATTRIB|NOTE_LINK|
546            NOTE_RENAME|NOTE_REVOKE, 0, 0);
547        if (kevent(kq, \*[Am]ev, 1, NULL, 0, \*[Am]tout) == -1)
548                err(1, "kevent");
549        for (;;) {
550                nev = kevent(kq, NULL, 0, \*[Am]ev, 1, \*[Am]tout);
551                if (nev == -1)
552                        err(1, "kevent");
553                if (nev == 0)
554                        continue;
555                if (ev.fflags \*[Am] NOTE_DELETE) {
556                        printf("deleted ");
557                        ev.fflags \*[Am]= ~NOTE_DELETE;
558                }
559                if (ev.fflags \*[Am] NOTE_WRITE) {
560                        printf("written ");
561                        ev.fflags \*[Am]= ~NOTE_WRITE;
562                }
563                if (ev.fflags \*[Am] NOTE_EXTEND) {
564                        printf("extended ");
565                        ev.fflags \*[Am]= ~NOTE_EXTEND;
566                }
567                if (ev.fflags \*[Am] NOTE_ATTRIB) {
568                        printf("chmod/chown/utimes ");
569                        ev.fflags \*[Am]= ~NOTE_ATTRIB;
570                }
571                if (ev.fflags \*[Am] NOTE_LINK) {
572                        printf("hardlinked ");
573                        ev.fflags \*[Am]= ~NOTE_LINK;
574                }
575                if (ev.fflags \*[Am] NOTE_RENAME) {
576                        printf("renamed ");
577                        ev.fflags \*[Am]= ~NOTE_RENAME;
578                }
579                if (ev.fflags \*[Am] NOTE_REVOKE) {
580                        printf("revoked ");
581                        ev.fflags \*[Am]= ~NOTE_REVOKE;
582                }
583                printf("\\n");
584                if (ev.fflags)
585                        warnx("unknown event 0x%x\\n", ev.fflags);
586        }
587}
588.Ed
589.Sh ERRORS
590The
591.Fn kqueue
592function fails if:
593.Bl -tag -width Er
594.It Bq Er EMFILE
595The per-process descriptor table is full.
596.It Bq Er ENFILE
597The system file table is full.
598.It Bq Er ENOMEM
599The kernel failed to allocate enough memory for the kernel queue.
600.El
601.Pp
602The
603.Fn kevent
604function fails if:
605.Bl -tag -width Er
606.It Bq Er EACCES
607The process does not have permission to register a filter.
608.It Bq Er EBADF
609The specified descriptor is invalid.
610.It Bq Er EFAULT
611There was an error reading or writing the
612.Va kevent
613structure.
614.It Bq Er EINTR
615A signal was delivered before the timeout expired and before any
616events were placed on the kqueue for return.
617.It Bq Er EINVAL
618The specified time limit or filter is invalid.
619.It Bq Er ENOENT
620The event could not be found to be modified or deleted.
621.It Bq Er ENOMEM
622No memory was available to register the event.
623.It Bq Er EOPNOTSUPP
624This type of file descriptor is not supported for
625.Fn kevent
626operations.
627.It Bq Er ESRCH
628The specified process to attach to does not exist.
629.El
630.Sh SEE ALSO
631.\" .Xr aio_error 2 ,
632.\" .Xr aio_read 2 ,
633.\" .Xr aio_return 2 ,
634.Xr ioctl 2 ,
635.Xr poll 2 ,
636.Xr read 2 ,
637.Xr select 2 ,
638.Xr sigaction 2 ,
639.Xr write 2 ,
640.Xr signal 3 ,
641.Xr kfilter_register 9 ,
642.Xr knote 9
643.Rs
644.%A Jonathan Lemon
645.%T "Kqueue: A Generic and Scalable Event Notification Facility"
646.%I USENIX Association
647.%B Proceedings of the FREENIX Track: 2001 USENIX Annual Technical Conference
648.%D June 25-30, 2001
649.%U http://www.usenix.org/event/usenix01/freenix01/full_papers/lemon/lemon.pdf
650.Re
651.Sh HISTORY
652The
653.Fn kqueue
654and
655.Fn kevent
656functions first appeared in
657.Fx 4.1 ,
658and then in
659.Nx 2.0 .
660The
661.Fn kqueue1
662function first appeared in
663.Nx 6.0 .
664