1 /* $NetBSD: kqueue.c,v 1.3 2021/04/07 03:36:48 christos Exp $ */ 2 /* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ 3 4 /* 5 * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu> 6 * Copyright 2007-2012 Niels Provos and Nick Mathewson 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include "event2/event-config.h" 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: kqueue.c,v 1.3 2021/04/07 03:36:48 christos Exp $"); 33 #include "evconfig-private.h" 34 35 #ifdef EVENT__HAVE_KQUEUE 36 37 #include <sys/types.h> 38 #ifdef EVENT__HAVE_SYS_TIME_H 39 #include <sys/time.h> 40 #endif 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <limits.h> 44 #include <signal.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <errno.h> 50 #ifdef EVENT__HAVE_INTTYPES_H 51 #include <inttypes.h> 52 #endif 53 54 /* Some platforms apparently define the udata field of struct kevent as 55 * intptr_t, whereas others define it as void*. There doesn't seem to be an 56 * easy way to tell them apart via autoconf, so we need to use OS macros. */ 57 #if defined(__NetBSD__) 58 #define PTR_TO_UDATA(x) ((typeof(((struct kevent *)0)->udata))(x)) 59 #define INT_TO_UDATA(x) ((typeof(((struct kevent *)0)->udata))(intptr_t)(x)) 60 #elif defined(EVENT__HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) && !defined(__CloudABI__) 61 #define PTR_TO_UDATA(x) ((intptr_t)(x)) 62 #define INT_TO_UDATA(x) ((intptr_t)(x)) 63 #else 64 #define PTR_TO_UDATA(x) (x) 65 #define INT_TO_UDATA(x) ((void*)(x)) 66 #endif 67 68 #include "event-internal.h" 69 #include "log-internal.h" 70 #include "evmap-internal.h" 71 #include "event2/thread.h" 72 #include "event2/util.h" 73 #include "evthread-internal.h" 74 #include "changelist-internal.h" 75 76 #include "kqueue-internal.h" 77 78 #define NEVENT 64 79 80 struct kqop { 81 struct kevent *changes; 82 int changes_size; 83 84 struct kevent *events; 85 int events_size; 86 int kq; 87 int notify_event_added; 88 pid_t pid; 89 }; 90 91 static void kqop_free(struct kqop *kqop); 92 93 static void *kq_init(struct event_base *); 94 static int kq_sig_add(struct event_base *, int, short, short, void *); 95 static int kq_sig_del(struct event_base *, int, short, short, void *); 96 static int kq_dispatch(struct event_base *, struct timeval *); 97 static void kq_dealloc(struct event_base *); 98 99 const struct eventop kqops = { 100 "kqueue", 101 kq_init, 102 event_changelist_add_, 103 event_changelist_del_, 104 kq_dispatch, 105 kq_dealloc, 106 1 /* need reinit */, 107 EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS, 108 EVENT_CHANGELIST_FDINFO_SIZE 109 }; 110 111 static const struct eventop kqsigops = { 112 "kqueue_signal", 113 NULL, 114 kq_sig_add, 115 kq_sig_del, 116 NULL, 117 NULL, 118 1 /* need reinit */, 119 0, 120 0 121 }; 122 123 static void * 124 kq_init(struct event_base *base) 125 { 126 int kq = -1; 127 struct kqop *kqueueop = NULL; 128 129 if (!(kqueueop = mm_calloc(1, sizeof(struct kqop)))) 130 return (NULL); 131 132 /* Initialize the kernel queue */ 133 134 if ((kq = kqueue()) == -1) { 135 event_warn("kqueue"); 136 goto err; 137 } 138 139 kqueueop->kq = kq; 140 141 kqueueop->pid = getpid(); 142 143 /* Initialize fields */ 144 kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent)); 145 if (kqueueop->changes == NULL) 146 goto err; 147 kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent)); 148 if (kqueueop->events == NULL) 149 goto err; 150 kqueueop->events_size = kqueueop->changes_size = NEVENT; 151 152 /* Check for Mac OS X kqueue bug. */ 153 memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]); 154 kqueueop->changes[0].ident = -1; 155 kqueueop->changes[0].filter = EVFILT_READ; 156 kqueueop->changes[0].flags = EV_ADD; 157 /* 158 * If kqueue works, then kevent will succeed, and it will 159 * stick an error in events[0]. If kqueue is broken, then 160 * kevent will fail. 161 */ 162 if (kevent(kq, 163 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 164 (int)kqueueop->events[0].ident != -1 || 165 !(kqueueop->events[0].flags & EV_ERROR)) { 166 event_warn("%s: detected broken kqueue; not using.", __func__); 167 goto err; 168 } 169 170 base->evsigsel = &kqsigops; 171 172 return (kqueueop); 173 err: 174 if (kqueueop) 175 kqop_free(kqueueop); 176 177 return (NULL); 178 } 179 180 #define ADD_UDATA 0x30303 181 182 static void 183 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change) 184 { 185 memset(out, 0, sizeof(struct kevent)); 186 out->ident = fd; 187 out->filter = filter; 188 189 if (change & EV_CHANGE_ADD) { 190 out->flags = EV_ADD; 191 /* We set a magic number here so that we can tell 'add' 192 * errors from 'del' errors. */ 193 out->udata = INT_TO_UDATA(ADD_UDATA); 194 if (change & EV_ET) 195 out->flags |= EV_CLEAR; 196 #ifdef NOTE_EOF 197 /* Make it behave like select() and poll() */ 198 if (filter == EVFILT_READ) 199 out->fflags = NOTE_EOF; 200 #endif 201 } else { 202 EVUTIL_ASSERT(change & EV_CHANGE_DEL); 203 out->flags = EV_DELETE; 204 } 205 } 206 207 static int 208 kq_build_changes_list(const struct event_changelist *changelist, 209 struct kqop *kqop) 210 { 211 int i; 212 int n_changes = 0; 213 214 for (i = 0; i < changelist->n_changes; ++i) { 215 struct event_change *in_ch = &changelist->changes[i]; 216 struct kevent *out_ch; 217 if (n_changes >= kqop->changes_size - 1) { 218 int newsize; 219 struct kevent *newchanges; 220 221 if (kqop->changes_size > INT_MAX / 2 || 222 (size_t)kqop->changes_size * 2 > EV_SIZE_MAX / 223 sizeof(struct kevent)) { 224 event_warnx("%s: int overflow", __func__); 225 return (-1); 226 } 227 228 newsize = kqop->changes_size * 2; 229 newchanges = mm_realloc(kqop->changes, 230 newsize * sizeof(struct kevent)); 231 if (newchanges == NULL) { 232 event_warn("%s: realloc", __func__); 233 return (-1); 234 } 235 kqop->changes = newchanges; 236 kqop->changes_size = newsize; 237 } 238 if (in_ch->read_change) { 239 out_ch = &kqop->changes[n_changes++]; 240 kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ, 241 in_ch->read_change); 242 } 243 if (in_ch->write_change) { 244 out_ch = &kqop->changes[n_changes++]; 245 kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE, 246 in_ch->write_change); 247 } 248 } 249 return n_changes; 250 } 251 252 static int 253 kq_grow_events(struct kqop *kqop, size_t new_size) 254 { 255 struct kevent *newresult; 256 257 newresult = mm_realloc(kqop->events, 258 new_size * sizeof(struct kevent)); 259 260 if (newresult) { 261 kqop->events = newresult; 262 kqop->events_size = new_size; 263 return 0; 264 } else { 265 return -1; 266 } 267 } 268 269 static int 270 kq_dispatch(struct event_base *base, struct timeval *tv) 271 { 272 struct kqop *kqop = base->evbase; 273 struct kevent *events = kqop->events; 274 struct kevent *changes; 275 struct timespec ts, *ts_p = NULL; 276 int i, n_changes, res; 277 278 if (tv != NULL) { 279 ts.tv_sec = tv->tv_sec; 280 ts.tv_nsec = tv->tv_usec * 1000; 281 ts_p = &ts; 282 } 283 284 /* Build "changes" from "base->changes" */ 285 EVUTIL_ASSERT(kqop->changes); 286 n_changes = kq_build_changes_list(&base->changelist, kqop); 287 if (n_changes < 0) 288 return -1; 289 290 event_changelist_remove_all_(&base->changelist, base); 291 292 /* steal the changes array in case some broken code tries to call 293 * dispatch twice at once. */ 294 changes = kqop->changes; 295 kqop->changes = NULL; 296 297 /* Make sure that 'events' is at least as long as the list of changes: 298 * otherwise errors in the changes can get reported as a -1 return 299 * value from kevent() rather than as EV_ERROR events in the events 300 * array. 301 * 302 * (We could instead handle -1 return values from kevent() by 303 * retrying with a smaller changes array or a larger events array, 304 * but this approach seems less risky for now.) 305 */ 306 if (kqop->events_size < n_changes) { 307 int new_size = kqop->events_size; 308 do { 309 new_size *= 2; 310 } while (new_size < n_changes); 311 312 kq_grow_events(kqop, new_size); 313 events = kqop->events; 314 } 315 316 EVBASE_RELEASE_LOCK(base, th_base_lock); 317 318 res = kevent(kqop->kq, changes, n_changes, 319 events, kqop->events_size, ts_p); 320 321 EVBASE_ACQUIRE_LOCK(base, th_base_lock); 322 323 EVUTIL_ASSERT(kqop->changes == NULL); 324 kqop->changes = changes; 325 326 if (res == -1) { 327 if (errno != EINTR) { 328 event_warn("kevent"); 329 return (-1); 330 } 331 332 return (0); 333 } 334 335 event_debug(("%s: kevent reports %d", __func__, res)); 336 337 for (i = 0; i < res; i++) { 338 int which = 0; 339 340 if (events[i].flags & EV_ERROR) { 341 switch (events[i].data) { 342 343 /* Can occur on delete if we are not currently 344 * watching any events on this fd. That can 345 * happen when the fd was closed and another 346 * file was opened with that fd. */ 347 case ENOENT: 348 /* Can occur for reasons not fully understood 349 * on FreeBSD. */ 350 case EINVAL: 351 continue; 352 #if defined(__FreeBSD__) 353 /* 354 * This currently occurs if an FD is closed 355 * before the EV_DELETE makes it out via kevent(). 356 * The FreeBSD capabilities code sees the blank 357 * capability set and rejects the request to 358 * modify an event. 359 * 360 * To be strictly correct - when an FD is closed, 361 * all the registered events are also removed. 362 * Queuing EV_DELETE to a closed FD is wrong. 363 * The event(s) should just be deleted from 364 * the pending changelist. 365 */ 366 case ENOTCAPABLE: 367 continue; 368 #endif 369 370 /* Can occur on a delete if the fd is closed. */ 371 case EBADF: 372 /* XXXX On NetBSD, we can also get EBADF if we 373 * try to add the write side of a pipe, but 374 * the read side has already been closed. 375 * Other BSDs call this situation 'EPIPE'. It 376 * would be good if we had a way to report 377 * this situation. */ 378 continue; 379 /* These two can occur on an add if the fd was one side 380 * of a pipe, and the other side was closed. */ 381 case EPERM: 382 case EPIPE: 383 /* Report read events, if we're listening for 384 * them, so that the user can learn about any 385 * add errors. (If the operation was a 386 * delete, then udata should be cleared.) */ 387 if (events[i].udata) { 388 /* The operation was an add: 389 * report the error as a read. */ 390 which |= EV_READ; 391 break; 392 } else { 393 /* The operation was a del: 394 * report nothing. */ 395 continue; 396 } 397 398 /* Other errors shouldn't occur. */ 399 default: 400 errno = events[i].data; 401 return (-1); 402 } 403 } else if (events[i].filter == EVFILT_READ) { 404 which |= EV_READ; 405 } else if (events[i].filter == EVFILT_WRITE) { 406 which |= EV_WRITE; 407 } else if (events[i].filter == EVFILT_SIGNAL) { 408 which |= EV_SIGNAL; 409 #ifdef EVFILT_USER 410 } else if (events[i].filter == EVFILT_USER) { 411 base->is_notify_pending = 0; 412 #endif 413 } 414 415 if (!which) 416 continue; 417 418 if (events[i].filter == EVFILT_SIGNAL) { 419 evmap_signal_active_(base, events[i].ident, 1); 420 } else { 421 evmap_io_active_(base, events[i].ident, which | EV_ET); 422 } 423 } 424 425 if (res == kqop->events_size) { 426 /* We used all the events space that we have. Maybe we should 427 make it bigger. */ 428 kq_grow_events(kqop, kqop->events_size * 2); 429 } 430 431 return (0); 432 } 433 434 static void 435 kqop_free(struct kqop *kqop) 436 { 437 if (kqop->changes) 438 mm_free(kqop->changes); 439 if (kqop->events) 440 mm_free(kqop->events); 441 if (kqop->kq >= 0 && kqop->pid == getpid()) 442 close(kqop->kq); 443 memset(kqop, 0, sizeof(struct kqop)); 444 mm_free(kqop); 445 } 446 447 static void 448 kq_dealloc(struct event_base *base) 449 { 450 struct kqop *kqop = base->evbase; 451 evsig_dealloc_(base); 452 kqop_free(kqop); 453 } 454 455 /* signal handling */ 456 static int 457 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p) 458 { 459 struct kqop *kqop = base->evbase; 460 struct kevent kev; 461 struct timespec timeout = { 0, 0 }; 462 (void)p; 463 464 EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG); 465 466 memset(&kev, 0, sizeof(kev)); 467 kev.ident = nsignal; 468 kev.filter = EVFILT_SIGNAL; 469 kev.flags = EV_ADD; 470 471 /* Be ready for the signal if it is sent any 472 * time between now and the next call to 473 * kq_dispatch. */ 474 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 475 return (-1); 476 477 /* We can set the handler for most signals to SIG_IGN and 478 * still have them reported to us in the queue. However, 479 * if the handler for SIGCHLD is SIG_IGN, the system reaps 480 * zombie processes for us, and we don't get any notification. 481 * This appears to be the only signal with this quirk. */ 482 if (evsig_set_handler_(base, nsignal, 483 nsignal == SIGCHLD ? SIG_DFL : SIG_IGN) == -1) 484 return (-1); 485 486 return (0); 487 } 488 489 static int 490 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p) 491 { 492 struct kqop *kqop = base->evbase; 493 struct kevent kev; 494 495 struct timespec timeout = { 0, 0 }; 496 (void)p; 497 498 EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG); 499 500 memset(&kev, 0, sizeof(kev)); 501 kev.ident = nsignal; 502 kev.filter = EVFILT_SIGNAL; 503 kev.flags = EV_DELETE; 504 505 /* Because we insert signal events 506 * immediately, we need to delete them 507 * immediately, too */ 508 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 509 return (-1); 510 511 if (evsig_restore_handler_(base, nsignal) == -1) 512 return (-1); 513 514 return (0); 515 } 516 517 518 /* OSX 10.6 and FreeBSD 8.1 add support for EVFILT_USER, which we can use 519 * to wake up the event loop from another thread. */ 520 521 /* Magic number we use for our filter ID. */ 522 #define NOTIFY_IDENT 42 523 524 int 525 event_kq_add_notify_event_(struct event_base *base) 526 { 527 struct kqop *kqop = base->evbase; 528 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER) 529 struct kevent kev; 530 struct timespec timeout = { 0, 0 }; 531 #endif 532 533 if (kqop->notify_event_added) 534 return 0; 535 536 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER) 537 memset(&kev, 0, sizeof(kev)); 538 kev.ident = NOTIFY_IDENT; 539 kev.filter = EVFILT_USER; 540 kev.flags = EV_ADD | EV_CLEAR; 541 542 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) { 543 event_warn("kevent: adding EVFILT_USER event"); 544 return -1; 545 } 546 547 kqop->notify_event_added = 1; 548 549 return 0; 550 #else 551 return -1; 552 #endif 553 } 554 555 int 556 event_kq_notify_base_(struct event_base *base) 557 { 558 struct kqop *kqop = base->evbase; 559 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER) 560 struct kevent kev; 561 struct timespec timeout = { 0, 0 }; 562 #endif 563 if (! kqop->notify_event_added) 564 return -1; 565 566 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER) 567 memset(&kev, 0, sizeof(kev)); 568 kev.ident = NOTIFY_IDENT; 569 kev.filter = EVFILT_USER; 570 kev.fflags = NOTE_TRIGGER; 571 572 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) { 573 event_warn("kevent: triggering EVFILT_USER event"); 574 return -1; 575 } 576 577 return 0; 578 #else 579 return -1; 580 #endif 581 } 582 583 #endif /* EVENT__HAVE_KQUEUE */ 584