1 /* $NetBSD: udp.c,v 1.15 2025/01/26 16:25:44 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 #include <unistd.h> 17 18 #include <isc/async.h> 19 #include <isc/atomic.h> 20 #include <isc/barrier.h> 21 #include <isc/buffer.h> 22 #include <isc/condition.h> 23 #include <isc/errno.h> 24 #include <isc/log.h> 25 #include <isc/magic.h> 26 #include <isc/mem.h> 27 #include <isc/netmgr.h> 28 #include <isc/random.h> 29 #include <isc/refcount.h> 30 #include <isc/region.h> 31 #include <isc/result.h> 32 #include <isc/sockaddr.h> 33 #include <isc/stdtime.h> 34 #include <isc/thread.h> 35 #include <isc/util.h> 36 #include <isc/uv.h> 37 38 #include "../loop_p.h" 39 #include "netmgr-int.h" 40 41 #ifdef HAVE_NET_ROUTE_H 42 #include <net/route.h> 43 #if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR) 44 #define USE_ROUTE_SOCKET 1 45 #define ROUTE_SOCKET_PF PF_ROUTE 46 #define ROUTE_SOCKET_PROTOCOL 0 47 #define MSGHDR rt_msghdr 48 #define MSGTYPE rtm_type 49 #endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \ 50 * defined(RTM_DELADDR) */ 51 #endif /* ifdef HAVE_NET_ROUTE_H */ 52 53 #if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) 54 #include <linux/netlink.h> 55 #include <linux/rtnetlink.h> 56 #if defined(RTM_NEWADDR) && defined(RTM_DELADDR) 57 #define USE_ROUTE_SOCKET 1 58 #define USE_NETLINK 1 59 #define ROUTE_SOCKET_PF PF_NETLINK 60 #define ROUTE_SOCKET_PROTOCOL NETLINK_ROUTE 61 #define MSGHDR nlmsghdr 62 #define MSGTYPE nlmsg_type 63 #endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */ 64 #endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ 65 */ 66 67 static void 68 udp_send_cb(uv_udp_send_t *req, int status); 69 70 static void 71 udp_close_cb(uv_handle_t *handle); 72 73 static uv_os_sock_t 74 isc__nm_udp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { 75 isc_result_t result; 76 uv_os_sock_t sock = -1; 77 78 result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock); 79 RUNTIME_CHECK(result == ISC_R_SUCCESS); 80 81 (void)isc__nm_socket_disable_pmtud(sock, sa_family); 82 (void)isc__nm_socket_v6only(sock, sa_family); 83 84 result = isc__nm_socket_reuse(sock, 1); 85 RUNTIME_CHECK(result == ISC_R_SUCCESS); 86 87 if (mgr->load_balance_sockets) { 88 result = isc__nm_socket_reuse_lb(sock); 89 RUNTIME_CHECK(result == ISC_R_SUCCESS); 90 } 91 92 return sock; 93 } 94 95 /* 96 * Asynchronous 'udplisten' call handler: start listening on a UDP socket. 97 */ 98 static void 99 start_udp_child_job(void *arg) { 100 isc_nmsocket_t *sock = arg; 101 102 REQUIRE(VALID_NMSOCK(sock)); 103 REQUIRE(VALID_NMSOCK(sock->parent)); 104 REQUIRE(sock->type == isc_nm_udpsocket); 105 REQUIRE(sock->tid == isc_tid()); 106 107 int r, uv_bind_flags = 0; 108 int uv_init_flags = 0; 109 sa_family_t sa_family = sock->iface.type.sa.sa_family; 110 isc_result_t result = ISC_R_UNSET; 111 isc_nm_t *mgr = sock->worker->netmgr; 112 isc_loop_t *loop = sock->worker->loop; 113 114 (void)isc__nm_socket_min_mtu(sock->fd, sa_family); 115 116 #if HAVE_DECL_UV_UDP_RECVMMSG 117 uv_init_flags |= UV_UDP_RECVMMSG; 118 #endif 119 r = uv_udp_init_ex(&loop->loop, &sock->uv_handle.udp, uv_init_flags); 120 UV_RUNTIME_CHECK(uv_udp_init_ex, r); 121 uv_handle_set_data(&sock->uv_handle.handle, sock); 122 /* This keeps the socket alive after everything else is gone */ 123 isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL }); 124 125 r = uv_timer_init(&loop->loop, &sock->read_timer); 126 UV_RUNTIME_CHECK(uv_timer_init, r); 127 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); 128 129 r = uv_udp_open(&sock->uv_handle.udp, sock->fd); 130 if (r < 0) { 131 isc__nm_closesocket(sock->fd); 132 isc__nm_incstats(sock, STATID_OPENFAIL); 133 goto done; 134 } 135 isc__nm_incstats(sock, STATID_OPEN); 136 137 if (sa_family == AF_INET6) { 138 uv_bind_flags |= UV_UDP_IPV6ONLY; 139 } 140 141 if (mgr->load_balance_sockets) { 142 r = isc__nm_udp_freebind(&sock->uv_handle.udp, 143 &sock->parent->iface.type.sa, 144 uv_bind_flags); 145 if (r < 0) { 146 isc__nm_incstats(sock, STATID_BINDFAIL); 147 goto done; 148 } 149 } else if (sock->tid == 0) { 150 /* This thread is first, bind the socket */ 151 r = isc__nm_udp_freebind(&sock->uv_handle.udp, 152 &sock->parent->iface.type.sa, 153 uv_bind_flags); 154 if (r < 0) { 155 isc__nm_incstats(sock, STATID_BINDFAIL); 156 goto done; 157 } 158 sock->parent->uv_handle.udp.flags = sock->uv_handle.udp.flags; 159 } else { 160 /* The socket is already bound, just copy the flags */ 161 sock->uv_handle.udp.flags = sock->parent->uv_handle.udp.flags; 162 } 163 164 isc__nm_set_network_buffers(mgr, &sock->uv_handle.handle); 165 166 r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb, 167 isc__nm_udp_read_cb); 168 if (r != 0) { 169 isc__nm_incstats(sock, STATID_BINDFAIL); 170 goto done; 171 } 172 173 done: 174 result = isc_uverr2result(r); 175 176 sock->result = result; 177 178 REQUIRE(!loop->paused); 179 180 if (sock->tid != 0) { 181 isc_barrier_wait(&sock->parent->listen_barrier); 182 } 183 } 184 185 static void 186 start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, 187 uv_os_sock_t fd, int tid) { 188 isc__networker_t *worker = &mgr->workers[tid]; 189 isc_nmsocket_t *csock = &sock->children[tid]; 190 191 isc__nmsocket_init(csock, worker, isc_nm_udpsocket, iface, sock); 192 csock->recv_cb = sock->recv_cb; 193 csock->recv_cbarg = sock->recv_cbarg; 194 csock->inactive_handles_max = ISC_NM_NMHANDLES_MAX; 195 196 if (mgr->load_balance_sockets) { 197 csock->fd = isc__nm_udp_lb_socket(mgr, 198 iface->type.sa.sa_family); 199 } else { 200 csock->fd = dup(fd); 201 } 202 INSIST(csock->fd >= 0); 203 204 if (tid == 0) { 205 start_udp_child_job(csock); 206 } else { 207 isc_async_run(worker->loop, start_udp_child_job, csock); 208 } 209 } 210 211 isc_result_t 212 isc_nm_listenudp(isc_nm_t *mgr, uint32_t workers, isc_sockaddr_t *iface, 213 isc_nm_recv_cb_t cb, void *cbarg, isc_nmsocket_t **sockp) { 214 isc_result_t result = ISC_R_UNSET; 215 isc_nmsocket_t *sock = NULL; 216 uv_os_sock_t fd = -1; 217 isc__networker_t *worker = NULL; 218 219 REQUIRE(VALID_NM(mgr)); 220 REQUIRE(isc_tid() == 0); 221 222 worker = &mgr->workers[0]; 223 224 if (isc__nm_closing(worker)) { 225 return ISC_R_SHUTTINGDOWN; 226 } 227 228 if (workers == 0) { 229 workers = mgr->nloops; 230 } 231 REQUIRE(workers <= mgr->nloops); 232 233 sock = isc_mempool_get(worker->nmsocket_pool); 234 isc__nmsocket_init(sock, worker, isc_nm_udplistener, iface, NULL); 235 236 sock->nchildren = (workers == ISC_NM_LISTEN_ALL) ? (uint32_t)mgr->nloops 237 : workers; 238 sock->children = isc_mem_cget(worker->mctx, sock->nchildren, 239 sizeof(sock->children[0])); 240 241 isc__nmsocket_barrier_init(sock); 242 243 sock->recv_cb = cb; 244 sock->recv_cbarg = cbarg; 245 246 if (!mgr->load_balance_sockets) { 247 fd = isc__nm_udp_lb_socket(mgr, iface->type.sa.sa_family); 248 } 249 250 start_udp_child(mgr, iface, sock, fd, 0); 251 result = sock->children[0].result; 252 INSIST(result != ISC_R_UNSET); 253 254 for (size_t i = 1; i < sock->nchildren; i++) { 255 start_udp_child(mgr, iface, sock, fd, i); 256 } 257 258 isc_barrier_wait(&sock->listen_barrier); 259 260 if (!mgr->load_balance_sockets) { 261 isc__nm_closesocket(fd); 262 } 263 264 /* 265 * If any of the child sockets have failed then isc_nm_listenudp 266 * fails. 267 */ 268 for (size_t i = 1; i < sock->nchildren; i++) { 269 if (result == ISC_R_SUCCESS && 270 sock->children[i].result != ISC_R_SUCCESS) 271 { 272 result = sock->children[i].result; 273 } 274 } 275 276 if (result != ISC_R_SUCCESS) { 277 sock->active = false; 278 isc__nm_udp_stoplistening(sock); 279 isc_nmsocket_close(&sock); 280 281 return result; 282 } 283 284 sock->active = true; 285 286 *sockp = sock; 287 return ISC_R_SUCCESS; 288 } 289 290 #ifdef USE_ROUTE_SOCKET 291 static isc_result_t 292 route_socket(uv_os_sock_t *fdp) { 293 isc_result_t result; 294 uv_os_sock_t fd = -1; 295 #ifdef USE_NETLINK 296 struct sockaddr_nl sa; 297 int r; 298 #endif 299 300 result = isc__nm_socket(ROUTE_SOCKET_PF, SOCK_RAW, 301 ROUTE_SOCKET_PROTOCOL, &fd); 302 if (result != ISC_R_SUCCESS) { 303 return result; 304 } 305 306 #ifdef USE_NETLINK 307 sa.nl_family = PF_NETLINK; 308 sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; 309 r = bind(fd, (struct sockaddr *)&sa, sizeof(sa)); 310 if (r < 0) { 311 isc__nm_closesocket(fd); 312 return isc_errno_toresult(r); 313 } 314 #endif 315 316 *fdp = fd; 317 return ISC_R_SUCCESS; 318 } 319 320 static isc_result_t 321 route_connect_direct(isc_nmsocket_t *sock) { 322 isc__networker_t *worker = NULL; 323 int r; 324 325 REQUIRE(sock->tid == isc_tid()); 326 327 worker = sock->worker; 328 329 sock->connecting = true; 330 331 r = uv_udp_init(&worker->loop->loop, &sock->uv_handle.udp); 332 UV_RUNTIME_CHECK(uv_udp_init, r); 333 uv_handle_set_data(&sock->uv_handle.handle, sock); 334 335 r = uv_timer_init(&worker->loop->loop, &sock->read_timer); 336 UV_RUNTIME_CHECK(uv_timer_init, r); 337 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); 338 339 if (isc__nm_closing(worker)) { 340 return ISC_R_SHUTTINGDOWN; 341 } 342 343 r = uv_udp_open(&sock->uv_handle.udp, sock->fd); 344 if (r != 0) { 345 return isc_uverr2result(r); 346 } 347 348 isc__nm_set_network_buffers(sock->worker->netmgr, 349 &sock->uv_handle.handle); 350 351 sock->connecting = false; 352 sock->connected = true; 353 354 return ISC_R_SUCCESS; 355 } 356 357 #endif /* USE_ROUTE_SOCKET */ 358 359 isc_result_t 360 isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg) { 361 #ifdef USE_ROUTE_SOCKET 362 isc_result_t result = ISC_R_SUCCESS; 363 isc_nmsocket_t *sock = NULL; 364 isc__nm_uvreq_t *req = NULL; 365 isc__networker_t *worker = NULL; 366 uv_os_sock_t fd = -1; 367 368 REQUIRE(VALID_NM(mgr)); 369 REQUIRE(isc_tid() == 0); 370 371 worker = &mgr->workers[isc_tid()]; 372 373 if (isc__nm_closing(worker)) { 374 return ISC_R_SHUTTINGDOWN; 375 } 376 377 result = route_socket(&fd); 378 if (result != ISC_R_SUCCESS) { 379 return result; 380 } 381 382 sock = isc_mempool_get(worker->nmsocket_pool); 383 isc__nmsocket_init(sock, worker, isc_nm_udpsocket, NULL, NULL); 384 385 sock->connect_cb = cb; 386 sock->connect_cbarg = cbarg; 387 sock->client = true; 388 sock->route_sock = true; 389 sock->fd = fd; 390 391 req = isc__nm_uvreq_get(sock); 392 req->cb.connect = cb; 393 req->cbarg = cbarg; 394 req->handle = isc__nmhandle_get(sock, NULL, NULL); 395 396 sock->active = true; 397 398 result = route_connect_direct(sock); 399 if (result != ISC_R_SUCCESS) { 400 sock->active = false; 401 isc__nm_udp_close(sock); 402 } 403 404 isc__nm_connectcb(sock, req, result, true); 405 406 isc__nmsocket_detach(&sock); 407 408 return ISC_R_SUCCESS; 409 #else /* USE_ROUTE_SOCKET */ 410 UNUSED(mgr); 411 UNUSED(cb); 412 UNUSED(cbarg); 413 UNUSED(extrahandlesize); 414 return ISC_R_NOTIMPLEMENTED; 415 #endif /* USE_ROUTE_SOCKET */ 416 } 417 418 /* 419 * Asynchronous 'udpstop' call handler: stop listening on a UDP socket. 420 */ 421 static void 422 stop_udp_child_job(void *arg) { 423 isc_nmsocket_t *sock = arg; 424 REQUIRE(VALID_NMSOCK(sock)); 425 REQUIRE(sock->tid == isc_tid()); 426 REQUIRE(sock->parent != NULL); 427 428 sock->active = false; 429 430 isc__nm_udp_close(sock); 431 432 REQUIRE(!sock->worker->loop->paused); 433 isc_barrier_wait(&sock->parent->stop_barrier); 434 } 435 436 static void 437 stop_udp_child(isc_nmsocket_t *sock) { 438 REQUIRE(VALID_NMSOCK(sock)); 439 440 if (sock->tid == 0) { 441 stop_udp_child_job(sock); 442 } else { 443 isc_async_run(sock->worker->loop, stop_udp_child_job, sock); 444 } 445 } 446 447 void 448 isc__nm_udp_stoplistening(isc_nmsocket_t *sock) { 449 REQUIRE(VALID_NMSOCK(sock)); 450 REQUIRE(sock->type == isc_nm_udplistener); 451 REQUIRE(sock->tid == isc_tid()); 452 REQUIRE(sock->tid == 0); 453 REQUIRE(!sock->closing); 454 455 sock->closing = true; 456 457 /* Mark the parent socket inactive */ 458 sock->active = false; 459 460 /* Stop all the other threads' children */ 461 for (size_t i = 1; i < sock->nchildren; i++) { 462 stop_udp_child(&sock->children[i]); 463 } 464 465 /* Stop the child for the main thread */ 466 stop_udp_child(&sock->children[0]); 467 468 /* Stop the parent */ 469 sock->closed = true; 470 isc__nmsocket_prep_destroy(sock); 471 } 472 473 /* 474 * udp_recv_cb handles incoming UDP packet from uv. The buffer here is 475 * reused for a series of packets, so we need to allocate a new one. 476 * This new one can be reused to send the response then. 477 */ 478 void 479 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, 480 const struct sockaddr *addr, unsigned int flags) { 481 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); 482 isc__nm_uvreq_t *req = NULL; 483 uint32_t maxudp; 484 isc_result_t result; 485 isc_sockaddr_t sockaddr, *sa = NULL; 486 487 REQUIRE(VALID_NMSOCK(sock)); 488 REQUIRE(sock->tid == isc_tid()); 489 490 /* 491 * When using recvmmsg(2), if no errors occur, there will be a final 492 * callback with nrecv set to 0, addr set to NULL and the buffer 493 * pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK 494 * flag cleared and the UV_UDP_MMSG_FREE flag set. 495 */ 496 #if HAVE_DECL_UV_UDP_MMSG_FREE 497 if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) { 498 INSIST(nrecv == 0); 499 INSIST(addr == NULL); 500 goto free; 501 } 502 #else 503 UNUSED(flags); 504 #endif 505 /* 506 * Possible reasons to return now without processing: 507 * 508 * - If we're simulating a firewall blocking UDP packets 509 * bigger than 'maxudp' bytes for testing purposes. 510 */ 511 maxudp = atomic_load_relaxed(&sock->worker->netmgr->maxudp); 512 if (maxudp != 0 && (uint32_t)nrecv > maxudp) { 513 /* 514 * We need to keep the read_cb intact in case, so the 515 * readtimeout_cb can trigger and not crash because of 516 * missing read_req. 517 */ 518 goto free; 519 } 520 521 /* 522 * - If there was a networking error. 523 */ 524 if (nrecv < 0) { 525 isc__nm_failed_read_cb(sock, isc_uverr2result(nrecv), false); 526 goto free; 527 } 528 529 /* 530 * - If the network manager is shutting down 531 */ 532 if (isc__nm_closing(sock->worker)) { 533 isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false); 534 goto free; 535 } 536 537 /* 538 * - If the socket is no longer active. 539 */ 540 if (!isc__nmsocket_active(sock)) { 541 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false); 542 goto free; 543 } 544 545 /* 546 * End of the current (iteration) datagram stream, just free the buffer. 547 * The callback with nrecv == 0 and addr == NULL is called for both 548 * normal UDP sockets and recvmmsg sockets at the end of every event 549 * loop iteration. 550 */ 551 if (nrecv == 0 && addr == NULL) { 552 INSIST(flags == 0); 553 goto free; 554 } 555 556 /* 557 * We could receive an empty datagram in which case: 558 * nrecv == 0 and addr != NULL 559 */ 560 INSIST(addr != NULL); 561 562 if (!sock->route_sock) { 563 result = isc_sockaddr_fromsockaddr(&sockaddr, addr); 564 RUNTIME_CHECK(result == ISC_R_SUCCESS); 565 sa = &sockaddr; 566 } 567 568 req = isc__nm_get_read_req(sock, sa); 569 570 /* 571 * The callback will be called synchronously, because result is 572 * ISC_R_SUCCESS, so we are ok of passing the buf directly. 573 */ 574 req->uvbuf.base = buf->base; 575 req->uvbuf.len = nrecv; 576 577 sock->reading = false; 578 579 /* 580 * The client isc_nm_read() expects just a single message, so we need to 581 * stop reading now. The reading could be restarted in the read 582 * callback with another isc_nm_read() call. 583 */ 584 if (sock->client) { 585 isc__nmsocket_timer_stop(sock); 586 isc__nm_stop_reading(sock); 587 isc__nmsocket_clearcb(sock); 588 } 589 590 REQUIRE(!sock->processing); 591 sock->processing = true; 592 isc__nm_readcb(sock, req, ISC_R_SUCCESS, false); 593 sock->processing = false; 594 595 free: 596 #if HAVE_DECL_UV_UDP_MMSG_CHUNK 597 /* 598 * When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag 599 * set, those must not be freed. 600 */ 601 if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) { 602 return; 603 } 604 #endif 605 606 /* 607 * When using recvmmsg(2), if a UDP socket error occurs, nrecv will be < 608 * 0. In either scenario, the callee can now safely free the provided 609 * buffer. 610 */ 611 if (nrecv < 0) { 612 /* 613 * The buffer may be a null buffer on error. 614 */ 615 if (buf->base == NULL && buf->len == 0) { 616 return; 617 } 618 } 619 620 isc__nm_free_uvbuf(sock, buf); 621 } 622 623 static void 624 udp_send_cb(uv_udp_send_t *req, int status) { 625 isc_result_t result = ISC_R_SUCCESS; 626 isc__nm_uvreq_t *uvreq = uv_handle_get_data((uv_handle_t *)req); 627 isc_nmsocket_t *sock = NULL; 628 629 REQUIRE(VALID_UVREQ(uvreq)); 630 REQUIRE(VALID_NMHANDLE(uvreq->handle)); 631 632 sock = uvreq->sock; 633 634 REQUIRE(VALID_NMSOCK(sock)); 635 REQUIRE(sock->tid == isc_tid()); 636 637 if (status < 0) { 638 isc__nm_incstats(sock, STATID_SENDFAIL); 639 isc__nm_failed_send_cb(sock, uvreq, isc_uverr2result(status), 640 false); 641 return; 642 } 643 644 isc__nm_sendcb(sock, uvreq, result, false); 645 } 646 647 static _Atomic(isc_stdtime_t) last_udpsends_log = 0; 648 649 static bool 650 can_log_udp_sends(void) { 651 isc_stdtime_t now = isc_stdtime_now(); 652 isc_stdtime_t last = atomic_exchange_relaxed(&last_udpsends_log, now); 653 if (now != last) { 654 return true; 655 } 656 657 return false; 658 } 659 660 /* 661 * Send the data in 'region' to a peer via a UDP socket. We try to find 662 * a proper sibling/child socket so that we won't have to jump to 663 * another thread. 664 */ 665 void 666 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region, 667 isc_nm_cb_t cb, void *cbarg) { 668 isc_nmsocket_t *sock = handle->sock; 669 const isc_sockaddr_t *peer = &handle->peer; 670 const struct sockaddr *sa = NULL; 671 isc__nm_uvreq_t *uvreq = NULL; 672 isc__networker_t *worker = NULL; 673 uint32_t maxudp; 674 int r; 675 isc_result_t result; 676 677 REQUIRE(VALID_NMSOCK(sock)); 678 REQUIRE(sock->type == isc_nm_udpsocket); 679 REQUIRE(sock->tid == isc_tid()); 680 681 worker = sock->worker; 682 maxudp = atomic_load(&worker->netmgr->maxudp); 683 sa = sock->connected ? NULL : &peer->type.sa; 684 685 /* 686 * We're simulating a firewall blocking UDP packets bigger than 687 * 'maxudp' bytes, for testing purposes. 688 * 689 * The client would ordinarily have unreferenced the handle 690 * in the callback, but that won't happen in this case, so 691 * we need to do so here. 692 */ 693 if (maxudp != 0 && region->length > maxudp) { 694 isc_nmhandle_detach(&handle); 695 return; 696 } 697 698 uvreq = isc__nm_uvreq_get(sock); 699 uvreq->uvbuf.base = (char *)region->base; 700 uvreq->uvbuf.len = region->length; 701 702 isc_nmhandle_attach(handle, &uvreq->handle); 703 704 uvreq->cb.send = cb; 705 uvreq->cbarg = cbarg; 706 707 if (isc__nm_closing(worker)) { 708 result = ISC_R_SHUTTINGDOWN; 709 goto fail; 710 } 711 712 if (isc__nmsocket_closing(sock)) { 713 result = ISC_R_CANCELED; 714 goto fail; 715 } 716 717 if (uv_udp_get_send_queue_size(&sock->uv_handle.udp) > 718 ISC_NETMGR_UDP_SENDBUF_SIZE) 719 { 720 /* 721 * The kernel UDP send queue is full, try sending the UDP 722 * response synchronously instead of just failing. 723 */ 724 r = uv_udp_try_send(&sock->uv_handle.udp, &uvreq->uvbuf, 1, sa); 725 if (r < 0) { 726 if (can_log_udp_sends()) { 727 isc__netmgr_log( 728 worker->netmgr, ISC_LOG_ERROR, 729 "Sending UDP messages failed: %s", 730 isc_result_totext(isc_uverr2result(r))); 731 } 732 733 isc__nm_incstats(sock, STATID_SENDFAIL); 734 result = isc_uverr2result(r); 735 goto fail; 736 } 737 738 RUNTIME_CHECK(r == (int)region->length); 739 isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, true); 740 741 } else { 742 /* Send the message asynchronously */ 743 r = uv_udp_send(&uvreq->uv_req.udp_send, &sock->uv_handle.udp, 744 &uvreq->uvbuf, 1, sa, udp_send_cb); 745 if (r < 0) { 746 isc__nm_incstats(sock, STATID_SENDFAIL); 747 result = isc_uverr2result(r); 748 goto fail; 749 } 750 } 751 return; 752 fail: 753 isc__nm_failed_send_cb(sock, uvreq, result, true); 754 } 755 756 static isc_result_t 757 udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { 758 int uv_bind_flags = 0; 759 int r; 760 isc__networker_t *worker = sock->worker; 761 isc_result_t result; 762 763 r = uv_udp_init(&worker->loop->loop, &sock->uv_handle.udp); 764 UV_RUNTIME_CHECK(uv_udp_init, r); 765 uv_handle_set_data(&sock->uv_handle.handle, sock); 766 767 r = uv_timer_init(&worker->loop->loop, &sock->read_timer); 768 UV_RUNTIME_CHECK(uv_timer_init, r); 769 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); 770 771 r = uv_udp_open(&sock->uv_handle.udp, sock->fd); 772 if (r != 0) { 773 isc__nm_incstats(sock, STATID_OPENFAIL); 774 return isc_uverr2result(r); 775 } 776 isc__nm_incstats(sock, STATID_OPEN); 777 778 /* 779 * uv_udp_open() enables REUSE_ADDR, we need to disable it again. 780 */ 781 result = isc__nm_socket_reuse(sock->fd, 0); 782 RUNTIME_CHECK(result == ISC_R_SUCCESS); 783 784 if (sock->iface.type.sa.sa_family == AF_INET6) { 785 uv_bind_flags |= UV_UDP_IPV6ONLY; 786 } 787 788 #if HAVE_DECL_UV_UDP_LINUX_RECVERR 789 uv_bind_flags |= UV_UDP_LINUX_RECVERR; 790 #endif 791 792 r = uv_udp_bind(&sock->uv_handle.udp, &sock->iface.type.sa, 793 uv_bind_flags); 794 if (r != 0) { 795 isc__nm_incstats(sock, STATID_BINDFAIL); 796 return isc_uverr2result(r); 797 } 798 799 isc__nm_set_network_buffers(sock->worker->netmgr, 800 &sock->uv_handle.handle); 801 802 /* 803 * On FreeBSD the UDP connect() call sometimes results in a 804 * spurious transient EADDRINUSE. Try a few more times before 805 * giving up. 806 */ 807 do { 808 r = uv_udp_connect(&sock->uv_handle.udp, &req->peer.type.sa); 809 } while (r == UV_EADDRINUSE && --req->connect_tries > 0); 810 if (r != 0) { 811 isc__nm_incstats(sock, STATID_CONNECTFAIL); 812 return isc_uverr2result(r); 813 } 814 isc__nm_incstats(sock, STATID_CONNECT); 815 816 return ISC_R_SUCCESS; 817 } 818 819 void 820 isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, 821 isc_nm_cb_t cb, void *cbarg, unsigned int timeout) { 822 isc_result_t result = ISC_R_SUCCESS; 823 isc_nmsocket_t *sock = NULL; 824 isc__nm_uvreq_t *req = NULL; 825 sa_family_t sa_family; 826 isc__networker_t *worker = NULL; 827 uv_os_sock_t fd = -1; 828 829 REQUIRE(VALID_NM(mgr)); 830 REQUIRE(local != NULL); 831 REQUIRE(peer != NULL); 832 833 worker = &mgr->workers[isc_tid()]; 834 835 if (isc__nm_closing(worker)) { 836 cb(NULL, ISC_R_SHUTTINGDOWN, cbarg); 837 return; 838 } 839 840 sa_family = peer->type.sa.sa_family; 841 842 result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &fd); 843 if (result != ISC_R_SUCCESS) { 844 cb(NULL, result, cbarg); 845 return; 846 } 847 848 /* Initialize the new socket */ 849 sock = isc_mempool_get(worker->nmsocket_pool); 850 isc__nmsocket_init(sock, worker, isc_nm_udpsocket, local, NULL); 851 852 sock->connect_cb = cb; 853 sock->connect_cbarg = cbarg; 854 sock->read_timeout = timeout; 855 sock->peer = *peer; 856 sock->client = true; 857 858 sock->fd = fd; 859 860 (void)isc__nm_socket_disable_pmtud(sock->fd, sa_family); 861 862 (void)isc__nm_socket_min_mtu(sock->fd, sa_family); 863 864 /* Initialize the request */ 865 req = isc__nm_uvreq_get(sock); 866 req->cb.connect = cb; 867 req->cbarg = cbarg; 868 req->peer = *peer; 869 req->local = *local; 870 req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface); 871 872 sock->active = true; 873 sock->connecting = true; 874 875 result = udp_connect_direct(sock, req); 876 if (result != ISC_R_SUCCESS) { 877 sock->active = false; 878 isc__nm_failed_connect_cb(sock, req, result, true); 879 isc__nmsocket_detach(&sock); 880 return; 881 } 882 883 sock->connecting = false; 884 sock->connected = true; 885 886 isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true); 887 isc__nmsocket_detach(&sock); 888 } 889 890 void 891 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, 892 bool async) { 893 REQUIRE(VALID_NMSOCK(sock)); 894 REQUIRE(result != ISC_R_SUCCESS); 895 REQUIRE(sock->tid == isc_tid()); 896 897 /* 898 * For UDP server socket, we don't have child socket via 899 * "accept", so we: 900 * - we continue to read 901 * - we don't clear the callbacks 902 * - we don't destroy it (only stoplistening could do that) 903 */ 904 905 if (sock->client) { 906 isc__nmsocket_timer_stop(sock); 907 isc__nm_stop_reading(sock); 908 } 909 910 /* Nobody expects the callback if isc_nm_read() wasn't called */ 911 if (sock->reading) { 912 sock->reading = false; 913 914 if (sock->recv_cb != NULL) { 915 isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL); 916 isc__nm_readcb(sock, req, result, async); 917 } 918 } 919 920 if (sock->client) { 921 isc__nmsocket_clearcb(sock); 922 isc__nmsocket_prep_destroy(sock); 923 return; 924 } 925 } 926 927 void 928 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { 929 isc_nmsocket_t *sock = NULL; 930 isc_result_t result; 931 932 REQUIRE(VALID_NMHANDLE(handle)); 933 934 sock = handle->sock; 935 936 REQUIRE(VALID_NMSOCK(sock)); 937 REQUIRE(sock->type == isc_nm_udpsocket); 938 REQUIRE(sock->statichandle == handle); 939 REQUIRE(sock->tid == isc_tid()); 940 941 /* 942 * We need to initialize the callback before checking for shutdown 943 * conditions, so the callback is always called even on error condition. 944 */ 945 sock->recv_cb = cb; 946 sock->recv_cbarg = cbarg; 947 sock->reading = true; 948 949 if (isc__nm_closing(sock->worker)) { 950 result = ISC_R_SHUTTINGDOWN; 951 goto fail; 952 } 953 954 if (isc__nmsocket_closing(sock)) { 955 result = ISC_R_CANCELED; 956 goto fail; 957 } 958 959 result = isc__nm_start_reading(sock); 960 if (result != ISC_R_SUCCESS) { 961 goto fail; 962 } 963 964 isc__nmsocket_timer_restart(sock); 965 return; 966 967 fail: 968 sock->reading = true; /* required by the next call */ 969 isc__nm_failed_read_cb(sock, result, true); 970 } 971 972 static void 973 udp_close_cb(uv_handle_t *handle) { 974 isc_nmsocket_t *sock = uv_handle_get_data(handle); 975 uv_handle_set_data(handle, NULL); 976 977 REQUIRE(VALID_NMSOCK(sock)); 978 REQUIRE(sock->tid == isc_tid()); 979 REQUIRE(sock->closing); 980 REQUIRE(!sock->closed); 981 982 sock->closed = true; 983 984 isc__nm_incstats(sock, STATID_CLOSE); 985 986 if (sock->parent != NULL) { 987 /* listening socket (listen) */ 988 isc__nmsocket_detach(&sock); 989 } else { 990 /* client and server sockets */ 991 sock->connected = false; 992 isc__nmsocket_prep_destroy(sock); 993 } 994 } 995 996 void 997 isc__nm_udp_close(isc_nmsocket_t *sock) { 998 REQUIRE(VALID_NMSOCK(sock)); 999 REQUIRE(sock->type == isc_nm_udpsocket); 1000 REQUIRE(sock->tid == isc_tid()); 1001 REQUIRE(!sock->closing); 1002 1003 sock->closing = true; 1004 1005 isc__nmsocket_clearcb(sock); 1006 isc__nmsocket_timer_stop(sock); 1007 isc__nm_stop_reading(sock); 1008 1009 /* 1010 * The order of the close operation is important here, the uv_close() 1011 * gets scheduled in the reverse order, so we need to close the timer 1012 * last, so its gone by the time we destroy the socket 1013 */ 1014 1015 /* 2. close the listening socket */ 1016 isc__nmsocket_clearcb(sock); 1017 isc__nm_stop_reading(sock); 1018 uv_close(&sock->uv_handle.handle, udp_close_cb); 1019 1020 /* 1. close the read timer */ 1021 isc__nmsocket_timer_stop(sock); 1022 uv_close((uv_handle_t *)&sock->read_timer, NULL); 1023 } 1024 1025 void 1026 isc__nm_udp_shutdown(isc_nmsocket_t *sock) { 1027 REQUIRE(VALID_NMSOCK(sock)); 1028 REQUIRE(sock->tid == isc_tid()); 1029 REQUIRE(sock->type == isc_nm_udpsocket); 1030 1031 /* 1032 * If the socket is active, mark it inactive and 1033 * continue. If it isn't active, stop now. 1034 */ 1035 if (!sock->active) { 1036 return; 1037 } 1038 sock->active = false; 1039 1040 /* uv_udp_connect is synchronous, we can't be in connected state */ 1041 REQUIRE(!sock->connecting); 1042 1043 /* 1044 * When the client detaches the last handle, the 1045 * sock->statichandle would be NULL, in that case, nobody is 1046 * interested in the callback. 1047 */ 1048 if (sock->statichandle != NULL) { 1049 isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false); 1050 return; 1051 } 1052 1053 /* Destroy the non-listening socket */ 1054 if (sock->parent == NULL) { 1055 isc__nmsocket_prep_destroy(sock); 1056 return; 1057 } 1058 1059 /* Destroy the listening socket if on the same loop */ 1060 if (sock->tid == sock->parent->tid) { 1061 isc__nmsocket_prep_destroy(sock->parent); 1062 } 1063 } 1064