1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /*! \file */ 18 19 #include <sys/socket.h> 20 #include <sys/time.h> 21 #include <sys/uio.h> 22 23 #include <netinet/tcp.h> 24 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stddef.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include <isc/buffer.h> 33 #include <isc/bufferlist.h> 34 35 #include <isc/list.h> 36 #include <isc/log.h> 37 #include <isc/net.h> 38 #include <isc/region.h> 39 #include <isc/socket.h> 40 #include <isc/task.h> 41 #include <isc/util.h> 42 43 #include "errno2result.h" 44 45 #include "socket_p.h" 46 #include "../task_p.h" 47 48 struct isc_socketwait { 49 fd_set *readset; 50 fd_set *writeset; 51 int nfds; 52 int maxfd; 53 }; 54 55 /* 56 * Set by the -T dscp option on the command line. If set to a value 57 * other than -1, we check to make sure DSCP values match it, and 58 * assert if not. 59 */ 60 int isc_dscp_check_value = -1; 61 62 /*% 63 * Some systems define the socket length argument as an int, some as size_t, 64 * some as socklen_t. This is here so it can be easily changed if needed. 65 */ 66 67 /*% 68 * Define what the possible "soft" errors can be. These are non-fatal returns 69 * of various network related functions, like recv() and so on. 70 * 71 * For some reason, BSDI (and perhaps others) will sometimes return <0 72 * from recv() but will have errno==0. This is broken, but we have to 73 * work around it here. 74 */ 75 #define SOFT_ERROR(e) ((e) == EAGAIN || \ 76 (e) == EWOULDBLOCK || \ 77 (e) == EINTR || \ 78 (e) == 0) 79 80 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 81 82 /*!< 83 * DLVL(90) -- Function entry/exit and other tracing. 84 * DLVL(60) -- Socket data send/receive 85 * DLVL(50) -- Event tracing, including receiving/sending completion events. 86 * DLVL(20) -- Socket creation/destruction. 87 */ 88 #define TRACE_LEVEL 90 89 #define IOEVENT_LEVEL 60 90 #define EVENT_LEVEL 50 91 #define CREATION_LEVEL 20 92 93 #define TRACE DLVL(TRACE_LEVEL) 94 #define IOEVENT DLVL(IOEVENT_LEVEL) 95 #define EVENT DLVL(EVENT_LEVEL) 96 #define CREATION DLVL(CREATION_LEVEL) 97 98 typedef isc_event_t intev_t; 99 100 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 101 #define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) 102 103 /*! 104 * IPv6 control information. If the socket is an IPv6 socket we want 105 * to collect the destination address and interface so the client can 106 * set them on outgoing packets. 107 */ 108 109 /*% 110 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have 111 * a setsockopt() like interface to request timestamps, and if the OS 112 * doesn't do it for us, call gettimeofday() on every UDP receive? 113 */ 114 115 /*% 116 * Instead of calculating the cmsgbuf lengths every time we take 117 * a rule of thumb approach - sizes are taken from x86_64 linux, 118 * multiplied by 2, everything should fit. Those sizes are not 119 * large enough to cause any concern. 120 */ 121 #define CMSG_SP_IN6PKT 40 122 123 #define CMSG_SP_TIMESTAMP 32 124 125 #define CMSG_SP_TCTOS 24 126 127 #define CMSG_SP_INT 24 128 129 #define RECVCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS)+1) 130 #define SENDCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS)+1) 131 132 /*% 133 * The number of times a send operation is repeated if the result is EINTR. 134 */ 135 #define NRETRIES 10 136 137 typedef struct isc__socket isc__socket_t; 138 typedef struct isc__socketmgr isc__socketmgr_t; 139 140 struct isc__socket { 141 /* Not locked. */ 142 isc_socket_t common; 143 isc__socketmgr_t *manager; 144 isc_sockettype_t type; 145 146 /* Locked by socket lock. */ 147 ISC_LINK(isc__socket_t) link; 148 unsigned int references; 149 int fd; 150 int pf; 151 152 ISC_LIST(isc_socketevent_t) send_list; 153 ISC_LIST(isc_socketevent_t) recv_list; 154 isc_socket_connev_t *connect_ev; 155 156 /* 157 * Internal events. Posted when a descriptor is readable or 158 * writable. These are statically allocated and never freed. 159 * They will be set to non-purgable before use. 160 */ 161 intev_t readable_ev; 162 intev_t writable_ev; 163 164 isc_sockaddr_t peer_address; /* remote address */ 165 166 unsigned int pending_recv : 1, 167 pending_send : 1, 168 connected : 1, 169 connecting : 1, /* connect pending */ 170 bound : 1, /* bound to local addr */ 171 active : 1, /* currently active */ 172 pktdscp : 1; /* per packet dscp */ 173 unsigned int dscp; 174 }; 175 176 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 177 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 178 179 struct isc__socketmgr { 180 /* Not locked. */ 181 isc_socketmgr_t common; 182 int fd_bufsize; 183 unsigned int maxsocks; 184 185 isc__socket_t **fds; 186 int *fdstate; 187 188 /* Locked by manager lock. */ 189 ISC_LIST(isc__socket_t) socklist; 190 fd_set *read_fds; 191 fd_set *read_fds_copy; 192 fd_set *write_fds; 193 fd_set *write_fds_copy; 194 int maxfd; 195 unsigned int refs; 196 }; 197 198 static isc__socketmgr_t *socketmgr = NULL; 199 200 #define CLOSED 0 /* this one must be zero */ 201 #define MANAGED 1 202 #define CLOSE_PENDING 2 203 204 /* 205 * send() and recv() iovec counts 206 */ 207 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 208 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 209 210 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, 211 isc_sockettype_t type, 212 isc_socket_t **socketp); 213 static void send_recvdone_event(isc__socket_t *, isc_socketevent_t **); 214 static void send_senddone_event(isc__socket_t *, isc_socketevent_t **); 215 static void free_socket(isc__socket_t **); 216 static isc_result_t allocate_socket(isc__socketmgr_t *, isc_sockettype_t, 217 isc__socket_t **); 218 static void destroy(isc__socket_t **); 219 static void internal_connect(isc_task_t *, isc_event_t *); 220 static void internal_recv(isc_task_t *, isc_event_t *); 221 static void internal_send(isc_task_t *, isc_event_t *); 222 static void process_cmsg(isc__socket_t *, struct msghdr *, isc_socketevent_t *); 223 static void build_msghdr_send(isc__socket_t *, char *, isc_socketevent_t *, 224 struct msghdr *, struct iovec *, size_t *); 225 static void build_msghdr_recv(isc__socket_t *, char *, isc_socketevent_t *, 226 struct msghdr *, struct iovec *, size_t *); 227 228 /*% 229 * The following are intended for internal use (indicated by "isc__" 230 * prefix) but are not declared as static, allowing direct access from 231 * unit tests etc. 232 */ 233 234 isc_result_t 235 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 236 isc_socket_t **socketp); 237 void 238 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp); 239 void 240 isc__socket_detach(isc_socket_t **socketp); 241 isc_result_t 242 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 243 unsigned int minimum, isc_task_t *task, 244 isc_taskaction_t action, void *arg); 245 isc_result_t 246 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 247 isc_task_t *task, isc_taskaction_t action, void *arg); 248 isc_result_t 249 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist, 250 isc_task_t *task, isc_taskaction_t action, void *arg, 251 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 252 unsigned int flags); 253 isc_result_t 254 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 255 unsigned int options); 256 isc_result_t 257 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 258 isc_task_t *task, isc_taskaction_t action, 259 void *arg); 260 void 261 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); 262 263 isc_result_t 264 isc__socketmgr_create(isc_socketmgr_t **managerp); 265 isc_result_t 266 isc__socketmgr_create2(isc_socketmgr_t **managerp, 267 unsigned int maxsocks); 268 isc_result_t 269 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager0, unsigned int *nsockp); 270 void 271 isc__socketmgr_destroy(isc_socketmgr_t **managerp); 272 273 static struct { 274 isc_socketmethods_t methods; 275 276 /*% 277 * The following are defined just for avoiding unused static functions. 278 */ 279 void *recvv, *sendv; 280 } socketmethods = { 281 { 282 isc__socket_attach, 283 isc__socket_detach, 284 isc__socket_bind, 285 isc__socket_connect, 286 isc__socket_cancel, 287 }, 288 (void *)isc__socket_recvv, 289 (void *)isc__socket_sendv, 290 }; 291 292 static isc_socketmgrmethods_t socketmgrmethods = { 293 isc__socketmgr_destroy, 294 isc__socket_create 295 }; 296 297 #define SELECT_POKE_SHUTDOWN (-1) 298 #define SELECT_POKE_READ (-3) 299 #define SELECT_POKE_WRITE (-4) 300 #define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ 301 #define SELECT_POKE_CLOSE (-5) 302 303 #define SOCK_DEAD(s) ((s)->references == 0) 304 305 /*% 306 * Shortcut index arrays to get access to statistics counters. 307 */ 308 enum { 309 STATID_OPEN = 0, 310 STATID_OPENFAIL = 1, 311 STATID_CLOSE = 2, 312 STATID_BINDFAIL = 3, 313 STATID_CONNECTFAIL = 4, 314 STATID_CONNECT = 5, 315 STATID_ACCEPTFAIL = 6, 316 STATID_ACCEPT = 7, 317 STATID_SENDFAIL = 8, 318 STATID_RECVFAIL = 9, 319 STATID_ACTIVE = 10 320 }; 321 322 323 static void 324 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 325 isc_logcategory_t *category, isc_logmodule_t *module, int level, 326 const char *fmt, ...) __attribute__((__format__(__printf__, 6, 7))); 327 static void 328 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 329 isc_logcategory_t *category, isc_logmodule_t *module, int level, 330 const char *fmt, ...) 331 { 332 char msgbuf[2048]; 333 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 334 va_list ap; 335 336 if (! isc_log_wouldlog(isc_lctx, level)) 337 return; 338 339 va_start(ap, fmt); 340 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 341 va_end(ap); 342 343 if (address == NULL) { 344 isc_log_write(isc_lctx, category, module, level, 345 "socket %p: %s", sock, msgbuf); 346 } else { 347 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 348 isc_log_write(isc_lctx, category, module, level, 349 "socket %p %s: %s", sock, peerbuf, msgbuf); 350 } 351 } 352 353 static inline isc_result_t 354 watch_fd(isc__socketmgr_t *manager, int fd, int msg) { 355 isc_result_t result = ISC_R_SUCCESS; 356 357 if (msg == SELECT_POKE_READ) 358 FD_SET(fd, manager->read_fds); 359 if (msg == SELECT_POKE_WRITE) 360 FD_SET(fd, manager->write_fds); 361 362 return (result); 363 } 364 365 static inline isc_result_t 366 unwatch_fd(isc__socketmgr_t *manager, int fd, int msg) { 367 isc_result_t result = ISC_R_SUCCESS; 368 369 if (msg == SELECT_POKE_READ) 370 FD_CLR(fd, manager->read_fds); 371 else if (msg == SELECT_POKE_WRITE) 372 FD_CLR(fd, manager->write_fds); 373 374 return (result); 375 } 376 377 static void 378 wakeup_socket(isc__socketmgr_t *manager, int fd, int msg) { 379 isc_result_t result; 380 381 /* 382 * This is a wakeup on a socket. If the socket is not in the 383 * process of being closed, start watching it for either reads 384 * or writes. 385 */ 386 387 INSIST(fd >= 0 && fd < (int)manager->maxsocks); 388 389 if (msg == SELECT_POKE_CLOSE) { 390 /* No one should be updating fdstate, so no need to lock it */ 391 INSIST(manager->fdstate[fd] == CLOSE_PENDING); 392 manager->fdstate[fd] = CLOSED; 393 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 394 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 395 (void)close(fd); 396 return; 397 } 398 399 if (manager->fdstate[fd] == CLOSE_PENDING) { 400 401 /* 402 * We accept (and ignore) any error from unwatch_fd() as we are 403 * closing the socket, hoping it doesn't leave dangling state in 404 * the kernel. 405 */ 406 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 407 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 408 return; 409 } 410 if (manager->fdstate[fd] != MANAGED) { 411 return; 412 } 413 414 /* 415 * Set requested bit. 416 */ 417 result = watch_fd(manager, fd, msg); 418 if (result != ISC_R_SUCCESS) { 419 /* 420 * XXXJT: what should we do? Ignoring the failure of watching 421 * a socket will make the application dysfunctional, but there 422 * seems to be no reasonable recovery process. 423 */ 424 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 425 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 426 "failed to start watching FD (%d): %s", 427 fd, isc_result_totext(result)); 428 } 429 } 430 431 /* 432 * Update the state of the socketmgr when something changes. 433 */ 434 static void 435 select_poke(isc__socketmgr_t *manager, int fd, int msg) { 436 if (msg == SELECT_POKE_SHUTDOWN) 437 return; 438 else if (fd >= 0) 439 wakeup_socket(manager, fd, msg); 440 return; 441 } 442 443 /* 444 * Make a fd non-blocking. 445 */ 446 static isc_result_t 447 make_nonblock(int fd) { 448 int ret; 449 int flags; 450 451 flags = fcntl(fd, F_GETFL, 0); 452 flags |= O_NONBLOCK; 453 ret = fcntl(fd, F_SETFL, flags); 454 455 if (ret == -1) { 456 UNEXPECTED_ERROR(__FILE__, __LINE__, 457 "fcntl(%d, F_SETFL, %d): %s", fd, flags, 458 strerror(errno)); 459 return (ISC_R_UNEXPECTED); 460 } 461 462 return (ISC_R_SUCCESS); 463 } 464 465 /* 466 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. 467 * In order to ensure as much portability as possible, we provide wrapper 468 * functions of these macros. 469 * Note that cmsg_space() could run slow on OSes that do not have 470 * CMSG_SPACE. 471 */ 472 static inline socklen_t 473 cmsg_len(socklen_t len) { 474 return (CMSG_LEN(len)); 475 } 476 477 static inline socklen_t 478 cmsg_space(socklen_t len) { 479 return (CMSG_SPACE(len)); 480 } 481 482 /* 483 * Process control messages received on a socket. 484 */ 485 static void 486 process_cmsg(isc__socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { 487 struct cmsghdr *cmsgp; 488 struct in6_pktinfo *pktinfop; 489 void *timevalp; 490 491 /* 492 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. 493 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. 494 * They are all here, outside of the CPP tests, because it is 495 * more consistent with the usual ISC coding style. 496 */ 497 UNUSED(sock); 498 UNUSED(msg); 499 UNUSED(dev); 500 501 if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC) 502 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; 503 504 if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC) 505 dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; 506 507 if (msg->msg_controllen == 0U || msg->msg_control == NULL) 508 return; 509 510 timevalp = NULL; 511 pktinfop = NULL; 512 513 cmsgp = CMSG_FIRSTHDR(msg); 514 while (cmsgp != NULL) { 515 socket_log(sock, NULL, TRACE, 516 "processing cmsg %p", cmsgp); 517 518 if (cmsgp->cmsg_level == IPPROTO_IPV6 519 && cmsgp->cmsg_type == IPV6_PKTINFO) { 520 521 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 522 memmove(&dev->pktinfo, pktinfop, 523 sizeof(struct in6_pktinfo)); 524 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 525 socket_log(sock, NULL, TRACE, 526 "interface received on ifindex %u", 527 dev->pktinfo.ipi6_ifindex); 528 if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) 529 dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; 530 goto next; 531 } 532 533 if (cmsgp->cmsg_level == SOL_SOCKET 534 && cmsgp->cmsg_type == SCM_TIMESTAMP) { 535 struct timeval tv; 536 timevalp = CMSG_DATA(cmsgp); 537 memmove(&tv, timevalp, sizeof(tv)); 538 dev->timestamp.seconds = tv.tv_sec; 539 dev->timestamp.nanoseconds = tv.tv_usec * 1000; 540 dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; 541 goto next; 542 } 543 544 if (cmsgp->cmsg_level == IPPROTO_IPV6 545 && cmsgp->cmsg_type == IPV6_TCLASS) { 546 dev->dscp = *(int *)CMSG_DATA(cmsgp); 547 dev->dscp >>= 2; 548 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 549 goto next; 550 } 551 552 if (cmsgp->cmsg_level == IPPROTO_IP 553 && (cmsgp->cmsg_type == IP_TOS)) { 554 dev->dscp = (int) *(unsigned char *)CMSG_DATA(cmsgp); 555 dev->dscp >>= 2; 556 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 557 goto next; 558 } 559 next: 560 cmsgp = CMSG_NXTHDR(msg, cmsgp); 561 } 562 563 } 564 565 /* 566 * Construct an iov array and attach it to the msghdr passed in. This is 567 * the SEND constructor, which will use the used region of the buffer 568 * (if using a buffer list) or will use the internal region (if a single 569 * buffer I/O is requested). 570 * 571 * Nothing can be NULL, and the done event must list at least one buffer 572 * on the buffer linked list for this function to be meaningful. 573 * 574 * If write_countp != NULL, *write_countp will hold the number of bytes 575 * this transaction can send. 576 */ 577 static void 578 build_msghdr_send(isc__socket_t *sock, char* cmsgbuf, isc_socketevent_t *dev, 579 struct msghdr *msg, struct iovec *iov, size_t *write_countp) 580 { 581 unsigned int iovcount; 582 isc_buffer_t *buffer; 583 isc_region_t used; 584 size_t write_count; 585 size_t skip_count; 586 struct cmsghdr *cmsgp; 587 588 memset(msg, 0, sizeof(*msg)); 589 590 if (!sock->connected) { 591 msg->msg_name = (void *)&dev->address.type.sa; 592 msg->msg_namelen = dev->address.length; 593 } else { 594 msg->msg_name = NULL; 595 msg->msg_namelen = 0; 596 } 597 598 buffer = ISC_LIST_HEAD(dev->bufferlist); 599 write_count = 0; 600 iovcount = 0; 601 602 /* 603 * Single buffer I/O? Skip what we've done so far in this region. 604 */ 605 if (buffer == NULL) { 606 write_count = dev->region.length - dev->n; 607 iov[0].iov_base = (void *)(dev->region.base + dev->n); 608 iov[0].iov_len = write_count; 609 iovcount = 1; 610 611 goto config; 612 } 613 614 /* 615 * Multibuffer I/O. 616 * Skip the data in the buffer list that we have already written. 617 */ 618 skip_count = dev->n; 619 while (buffer != NULL) { 620 REQUIRE(ISC_BUFFER_VALID(buffer)); 621 if (skip_count < isc_buffer_usedlength(buffer)) 622 break; 623 skip_count -= isc_buffer_usedlength(buffer); 624 buffer = ISC_LIST_NEXT(buffer, link); 625 } 626 627 while (buffer != NULL) { 628 INSIST(iovcount < MAXSCATTERGATHER_SEND); 629 630 isc_buffer_usedregion(buffer, &used); 631 632 if (used.length > 0) { 633 iov[iovcount].iov_base = (void *)(used.base 634 + skip_count); 635 iov[iovcount].iov_len = used.length - skip_count; 636 write_count += (used.length - skip_count); 637 skip_count = 0; 638 iovcount++; 639 } 640 buffer = ISC_LIST_NEXT(buffer, link); 641 } 642 643 INSIST(skip_count == 0U); 644 645 config: 646 msg->msg_iov = iov; 647 msg->msg_iovlen = iovcount; 648 649 msg->msg_control = NULL; 650 msg->msg_controllen = 0; 651 msg->msg_flags = 0; 652 653 if ((sock->type == isc_sockettype_udp) && 654 ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) 655 { 656 struct in6_pktinfo *pktinfop; 657 658 socket_log(sock, NULL, TRACE, 659 "sendto pktinfo data, ifindex %u", 660 dev->pktinfo.ipi6_ifindex); 661 662 msg->msg_control = (void *)cmsgbuf; 663 msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); 664 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 665 666 cmsgp = (struct cmsghdr *)cmsgbuf; 667 cmsgp->cmsg_level = IPPROTO_IPV6; 668 cmsgp->cmsg_type = IPV6_PKTINFO; 669 cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); 670 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 671 memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); 672 } 673 674 if ((sock->type == isc_sockettype_udp) && 675 ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0)) 676 { 677 int use_min_mtu = 1; /* -1, 0, 1 */ 678 679 cmsgp = (struct cmsghdr *)(cmsgbuf + 680 msg->msg_controllen); 681 682 msg->msg_control = (void *)cmsgbuf; 683 msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); 684 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 685 686 cmsgp->cmsg_level = IPPROTO_IPV6; 687 cmsgp->cmsg_type = IPV6_USE_MIN_MTU; 688 cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); 689 memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); 690 } 691 692 if (isc_dscp_check_value > -1) { 693 if (sock->type == isc_sockettype_udp) 694 INSIST((int)dev->dscp == isc_dscp_check_value); 695 else if (sock->type == isc_sockettype_tcp) 696 INSIST((int)sock->dscp == isc_dscp_check_value); 697 } 698 699 if ((sock->type == isc_sockettype_udp) && 700 ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0)) 701 { 702 int dscp = (dev->dscp << 2) & 0xff; 703 704 INSIST(dev->dscp < 0x40); 705 706 if (sock->pf == AF_INET && sock->pktdscp) { 707 cmsgp = (struct cmsghdr *)(cmsgbuf + 708 msg->msg_controllen); 709 msg->msg_control = (void *)cmsgbuf; 710 msg->msg_controllen += cmsg_space(sizeof(dscp)); 711 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 712 713 cmsgp->cmsg_level = IPPROTO_IP; 714 cmsgp->cmsg_type = IP_TOS; 715 cmsgp->cmsg_len = cmsg_len(sizeof(char)); 716 *(unsigned char*)CMSG_DATA(cmsgp) = dscp; 717 } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) { 718 if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, 719 (void *)&dscp, sizeof(int)) < 0) 720 { 721 UNEXPECTED_ERROR(__FILE__, __LINE__, 722 "setsockopt(%d, IP_TOS, %.02x)" 723 " %s: %s", 724 sock->fd, dscp >> 2, 725 "failed", strerror(errno)); 726 } else 727 sock->dscp = dscp; 728 } 729 730 if (sock->pf == AF_INET6 && sock->pktdscp) { 731 cmsgp = (struct cmsghdr *)(cmsgbuf + 732 msg->msg_controllen); 733 msg->msg_control = (void *)cmsgbuf; 734 msg->msg_controllen += cmsg_space(sizeof(dscp)); 735 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 736 737 cmsgp->cmsg_level = IPPROTO_IPV6; 738 cmsgp->cmsg_type = IPV6_TCLASS; 739 cmsgp->cmsg_len = cmsg_len(sizeof(dscp)); 740 memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp)); 741 } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) { 742 if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, 743 (void *)&dscp, sizeof(int)) < 0) { 744 UNEXPECTED_ERROR(__FILE__, __LINE__, 745 "setsockopt(%d, IPV6_TCLASS, " 746 "%.02x) %s: %s", 747 sock->fd, dscp >> 2, 748 "failed", strerror(errno)); 749 } else 750 sock->dscp = dscp; 751 } 752 753 if (msg->msg_controllen != 0 && 754 msg->msg_controllen < SENDCMSGBUFLEN) 755 { 756 memset(cmsgbuf + msg->msg_controllen, 0, 757 SENDCMSGBUFLEN - msg->msg_controllen); 758 } 759 } 760 761 if (write_countp != NULL) 762 *write_countp = write_count; 763 } 764 765 /* 766 * Construct an iov array and attach it to the msghdr passed in. This is 767 * the RECV constructor, which will use the available region of the buffer 768 * (if using a buffer list) or will use the internal region (if a single 769 * buffer I/O is requested). 770 * 771 * Nothing can be NULL, and the done event must list at least one buffer 772 * on the buffer linked list for this function to be meaningful. 773 * 774 * If read_countp != NULL, *read_countp will hold the number of bytes 775 * this transaction can receive. 776 */ 777 static void 778 build_msghdr_recv(isc__socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, 779 struct msghdr *msg, struct iovec *iov, size_t *read_countp) 780 { 781 unsigned int iovcount; 782 isc_buffer_t *buffer; 783 isc_region_t available; 784 size_t read_count; 785 786 memset(msg, 0, sizeof(struct msghdr)); 787 788 if (sock->type == isc_sockettype_udp) { 789 memset(&dev->address, 0, sizeof(dev->address)); 790 msg->msg_name = (void *)&dev->address.type.sa; 791 msg->msg_namelen = sizeof(dev->address.type); 792 } else { /* TCP */ 793 msg->msg_name = NULL; 794 msg->msg_namelen = 0; 795 dev->address = sock->peer_address; 796 } 797 798 buffer = ISC_LIST_HEAD(dev->bufferlist); 799 read_count = 0; 800 801 /* 802 * Single buffer I/O? Skip what we've done so far in this region. 803 */ 804 if (buffer == NULL) { 805 read_count = dev->region.length - dev->n; 806 iov[0].iov_base = (void *)(dev->region.base + dev->n); 807 iov[0].iov_len = read_count; 808 iovcount = 1; 809 810 goto config; 811 } 812 813 /* 814 * Multibuffer I/O. 815 * Skip empty buffers. 816 */ 817 while (buffer != NULL) { 818 REQUIRE(ISC_BUFFER_VALID(buffer)); 819 if (isc_buffer_availablelength(buffer) != 0) 820 break; 821 buffer = ISC_LIST_NEXT(buffer, link); 822 } 823 824 iovcount = 0; 825 while (buffer != NULL) { 826 INSIST(iovcount < MAXSCATTERGATHER_RECV); 827 828 isc_buffer_availableregion(buffer, &available); 829 830 if (available.length > 0) { 831 iov[iovcount].iov_base = (void *)(available.base); 832 iov[iovcount].iov_len = available.length; 833 read_count += available.length; 834 iovcount++; 835 } 836 buffer = ISC_LIST_NEXT(buffer, link); 837 } 838 839 config: 840 841 /* 842 * If needed, set up to receive that one extra byte. 843 */ 844 msg->msg_iov = iov; 845 msg->msg_iovlen = iovcount; 846 847 msg->msg_control = cmsgbuf; 848 msg->msg_controllen = RECVCMSGBUFLEN; 849 msg->msg_flags = 0; 850 851 if (read_countp != NULL) 852 *read_countp = read_count; 853 } 854 855 static void 856 set_dev_address(isc_sockaddr_t *address, isc__socket_t *sock, 857 isc_socketevent_t *dev) 858 { 859 if (sock->type == isc_sockettype_udp) { 860 if (address != NULL) 861 dev->address = *address; 862 else 863 dev->address = sock->peer_address; 864 } else if (sock->type == isc_sockettype_tcp) { 865 INSIST(address == NULL); 866 dev->address = sock->peer_address; 867 } 868 } 869 870 static void 871 destroy_socketevent(isc_event_t *event) { 872 isc_socketevent_t *ev = (isc_socketevent_t *)event; 873 874 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 875 876 (ev->destroy)(event); 877 } 878 879 static isc_socketevent_t * 880 allocate_socketevent(void *sender, 881 isc_eventtype_t eventtype, isc_taskaction_t action, 882 void *arg) 883 { 884 isc_socketevent_t *ev; 885 886 ev = (isc_socketevent_t *)isc_event_allocate(sender, 887 eventtype, action, arg, 888 sizeof(*ev)); 889 890 if (ev == NULL) 891 return (NULL); 892 893 ev->result = ISC_R_UNSET; 894 ISC_LINK_INIT(ev, ev_link); 895 ISC_LIST_INIT(ev->bufferlist); 896 ev->region.base = NULL; 897 ev->n = 0; 898 ev->offset = 0; 899 ev->attributes = 0; 900 ev->destroy = ev->ev_destroy; 901 ev->ev_destroy = destroy_socketevent; 902 ev->dscp = 0; 903 904 return (ev); 905 } 906 907 #define DOIO_SUCCESS 0 /* i/o ok, event sent */ 908 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 909 #define DOIO_HARD 2 /* i/o error, event sent */ 910 #define DOIO_EOF 3 /* EOF, no event sent */ 911 912 static int 913 doio_recv(isc__socket_t *sock, isc_socketevent_t *dev) { 914 int cc; 915 struct iovec iov[MAXSCATTERGATHER_RECV]; 916 size_t read_count; 917 size_t actual_count; 918 struct msghdr msghdr; 919 isc_buffer_t *buffer; 920 int recv_errno; 921 char cmsgbuf[RECVCMSGBUFLEN] = {0}; 922 923 build_msghdr_recv(sock, cmsgbuf, dev, &msghdr, iov, &read_count); 924 925 cc = recvmsg(sock->fd, &msghdr, 0); 926 recv_errno = errno; 927 928 if (cc < 0) { 929 if (SOFT_ERROR(recv_errno)) 930 return (DOIO_SOFT); 931 932 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 933 socket_log(sock, NULL, IOEVENT, 934 "doio_recv: recvmsg(%d) %d bytes, err %d/%s", 935 sock->fd, cc, recv_errno, 936 strerror(recv_errno)); 937 } 938 939 #define SOFT_OR_HARD(_system, _isc) \ 940 if (recv_errno == _system) { \ 941 if (sock->connected) { \ 942 dev->result = _isc; \ 943 return (DOIO_HARD); \ 944 } \ 945 return (DOIO_SOFT); \ 946 } 947 #define ALWAYS_HARD(_system, _isc) \ 948 if (recv_errno == _system) { \ 949 dev->result = _isc; \ 950 return (DOIO_HARD); \ 951 } 952 953 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 954 SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); 955 SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 956 SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); 957 /* HPUX 11.11 can return EADDRNOTAVAIL. */ 958 SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 959 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 960 /* Should never get this one but it was seen. */ 961 SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH); 962 /* 963 * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 964 * errors. 965 */ 966 SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); 967 SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); 968 969 #undef SOFT_OR_HARD 970 #undef ALWAYS_HARD 971 972 dev->result = isc__errno2result(recv_errno); 973 return (DOIO_HARD); 974 } 975 976 /* 977 * On TCP and UNIX sockets, zero length reads indicate EOF, 978 * while on UDP sockets, zero length reads are perfectly valid, 979 * although strange. 980 */ 981 switch (sock->type) { 982 case isc_sockettype_tcp: 983 if (cc == 0) 984 return (DOIO_EOF); 985 break; 986 case isc_sockettype_udp: 987 break; 988 default: 989 INSIST(0); 990 } 991 992 if (sock->type == isc_sockettype_udp) { 993 dev->address.length = msghdr.msg_namelen; 994 if (isc_sockaddr_getport(&dev->address) == 0) { 995 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 996 socket_log(sock, &dev->address, IOEVENT, 997 "dropping source port zero packet"); 998 } 999 return (DOIO_SOFT); 1000 } 1001 } 1002 1003 socket_log(sock, &dev->address, IOEVENT, 1004 "packet received correctly"); 1005 1006 /* 1007 * Overflow bit detection. If we received MORE bytes than we should, 1008 * this indicates an overflow situation. Set the flag in the 1009 * dev entry and adjust how much we read by one. 1010 */ 1011 /* 1012 * If there are control messages attached, run through them and pull 1013 * out the interesting bits. 1014 */ 1015 process_cmsg(sock, &msghdr, dev); 1016 1017 /* 1018 * update the buffers (if any) and the i/o count 1019 */ 1020 dev->n += cc; 1021 actual_count = cc; 1022 buffer = ISC_LIST_HEAD(dev->bufferlist); 1023 while (buffer != NULL && actual_count > 0U) { 1024 REQUIRE(ISC_BUFFER_VALID(buffer)); 1025 if (isc_buffer_availablelength(buffer) <= actual_count) { 1026 actual_count -= isc_buffer_availablelength(buffer); 1027 isc_buffer_add(buffer, 1028 isc_buffer_availablelength(buffer)); 1029 } else { 1030 isc_buffer_add(buffer, actual_count); 1031 actual_count = 0; 1032 POST(actual_count); 1033 break; 1034 } 1035 buffer = ISC_LIST_NEXT(buffer, link); 1036 if (buffer == NULL) { 1037 INSIST(actual_count == 0U); 1038 } 1039 } 1040 1041 /* 1042 * If we read less than we expected, update counters, 1043 * and let the upper layer poke the descriptor. 1044 */ 1045 if (((size_t)cc != read_count) && (dev->n < dev->minimum)) 1046 return (DOIO_SOFT); 1047 1048 /* 1049 * Full reads are posted, or partials if partials are ok. 1050 */ 1051 dev->result = ISC_R_SUCCESS; 1052 return (DOIO_SUCCESS); 1053 } 1054 1055 /* 1056 * Returns: 1057 * DOIO_SUCCESS The operation succeeded. dev->result contains 1058 * ISC_R_SUCCESS. 1059 * 1060 * DOIO_HARD A hard or unexpected I/O error was encountered. 1061 * dev->result contains the appropriate error. 1062 * 1063 * DOIO_SOFT A soft I/O error was encountered. No senddone 1064 * event was sent. The operation should be retried. 1065 * 1066 * No other return values are possible. 1067 */ 1068 static int 1069 doio_send(isc__socket_t *sock, isc_socketevent_t *dev) { 1070 int cc; 1071 struct iovec iov[MAXSCATTERGATHER_SEND]; 1072 size_t write_count; 1073 struct msghdr msghdr; 1074 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1075 int attempts = 0; 1076 int send_errno; 1077 char cmsgbuf[SENDCMSGBUFLEN] = {0}; 1078 1079 build_msghdr_send(sock, cmsgbuf, dev, &msghdr, iov, &write_count); 1080 1081 resend: 1082 cc = sendmsg(sock->fd, &msghdr, 0); 1083 send_errno = errno; 1084 1085 /* 1086 * Check for error or block condition. 1087 */ 1088 if (cc < 0) { 1089 if (send_errno == EINTR && ++attempts < NRETRIES) 1090 goto resend; 1091 1092 if (SOFT_ERROR(send_errno)) { 1093 if (errno == EWOULDBLOCK || errno == EAGAIN) 1094 dev->result = ISC_R_WOULDBLOCK; 1095 return (DOIO_SOFT); 1096 } 1097 1098 #define SOFT_OR_HARD(_system, _isc) \ 1099 if (send_errno == _system) { \ 1100 if (sock->connected) { \ 1101 dev->result = _isc; \ 1102 return (DOIO_HARD); \ 1103 } \ 1104 return (DOIO_SOFT); \ 1105 } 1106 #define ALWAYS_HARD(_system, _isc) \ 1107 if (send_errno == _system) { \ 1108 dev->result = _isc; \ 1109 return (DOIO_HARD); \ 1110 } 1111 1112 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 1113 ALWAYS_HARD(EACCES, ISC_R_NOPERM); 1114 ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 1115 ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 1116 ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 1117 ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); 1118 ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); 1119 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 1120 ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); 1121 ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); 1122 ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); 1123 1124 #undef SOFT_OR_HARD 1125 #undef ALWAYS_HARD 1126 1127 /* 1128 * The other error types depend on whether or not the 1129 * socket is UDP or TCP. If it is UDP, some errors 1130 * that we expect to be fatal under TCP are merely 1131 * annoying, and are really soft errors. 1132 * 1133 * However, these soft errors are still returned as 1134 * a status. 1135 */ 1136 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1137 UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", 1138 addrbuf, strerror(send_errno)); 1139 dev->result = isc__errno2result(send_errno); 1140 return (DOIO_HARD); 1141 } 1142 1143 if (cc == 0) { 1144 UNEXPECTED_ERROR(__FILE__, __LINE__, 1145 "doio_send: send() %s 0", "returned"); 1146 } 1147 1148 /* 1149 * If we write less than we expected, update counters, poke. 1150 */ 1151 dev->n += cc; 1152 if ((size_t)cc != write_count) 1153 return (DOIO_SOFT); 1154 1155 /* 1156 * Exactly what we wanted to write. We're done with this 1157 * entry. Post its completion event. 1158 */ 1159 dev->result = ISC_R_SUCCESS; 1160 return (DOIO_SUCCESS); 1161 } 1162 1163 /* 1164 * Kill. 1165 * 1166 * Caller must ensure that the socket is not locked and no external 1167 * references exist. 1168 */ 1169 static void 1170 socketclose(isc__socketmgr_t *manager, isc__socket_t *sock, int fd) { 1171 /* 1172 * No one has this socket open, so the watcher doesn't have to be 1173 * poked, and the socket doesn't have to be locked. 1174 */ 1175 manager->fds[fd] = NULL; 1176 manager->fdstate[fd] = CLOSE_PENDING; 1177 select_poke(manager, fd, SELECT_POKE_CLOSE); 1178 1179 if (sock->active == 1) { 1180 sock->active = 0; 1181 } 1182 1183 /* 1184 * update manager->maxfd here (XXX: this should be implemented more 1185 * efficiently) 1186 */ 1187 if (manager->maxfd == fd) { 1188 int i; 1189 1190 manager->maxfd = 0; 1191 for (i = fd - 1; i >= 0; i--) { 1192 if (manager->fdstate[i] == MANAGED) { 1193 manager->maxfd = i; 1194 break; 1195 } 1196 } 1197 } 1198 1199 } 1200 1201 static void 1202 destroy(isc__socket_t **sockp) { 1203 int fd; 1204 isc__socket_t *sock = *sockp; 1205 isc__socketmgr_t *manager = sock->manager; 1206 1207 socket_log(sock, NULL, CREATION, "destroying"); 1208 1209 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1210 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1211 INSIST(sock->connect_ev == NULL); 1212 INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks); 1213 1214 if (sock->fd >= 0) { 1215 fd = sock->fd; 1216 sock->fd = -1; 1217 socketclose(manager, sock, fd); 1218 } 1219 1220 ISC_LIST_UNLINK(manager->socklist, sock, link); 1221 1222 /* can't unlock manager as its memory context is still used */ 1223 free_socket(sockp); 1224 } 1225 1226 static isc_result_t 1227 allocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type, 1228 isc__socket_t **socketp) 1229 { 1230 isc__socket_t *sock; 1231 1232 sock = malloc(sizeof(*sock)); 1233 1234 if (sock == NULL) 1235 return (ISC_R_NOMEMORY); 1236 1237 sock->common.magic = 0; 1238 sock->common.impmagic = 0; 1239 sock->references = 0; 1240 1241 sock->manager = manager; 1242 sock->type = type; 1243 sock->fd = -1; 1244 sock->dscp = 0; /* TOS/TCLASS is zero until set. */ 1245 sock->active = 0; 1246 1247 ISC_LINK_INIT(sock, link); 1248 1249 /* 1250 * Set up list of readers and writers to be initially empty. 1251 */ 1252 ISC_LIST_INIT(sock->recv_list); 1253 ISC_LIST_INIT(sock->send_list); 1254 sock->connect_ev = NULL; 1255 sock->pending_recv = 0; 1256 sock->pending_send = 0; 1257 sock->connected = 0; 1258 sock->connecting = 0; 1259 sock->bound = 0; 1260 sock->pktdscp = 0; 1261 1262 /* 1263 * Initialize readable and writable events. 1264 */ 1265 ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t), 1266 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR, 1267 NULL, sock, sock, NULL); 1268 ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t), 1269 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW, 1270 NULL, sock, sock, NULL); 1271 1272 sock->common.magic = ISCAPI_SOCKET_MAGIC; 1273 sock->common.impmagic = SOCKET_MAGIC; 1274 *socketp = sock; 1275 1276 return (ISC_R_SUCCESS); 1277 } 1278 1279 /* 1280 * This event requires that the various lists be empty, that the reference 1281 * count be 1, and that the magic number is valid. The other socket bits, 1282 * like the lock, must be initialized as well. The fd associated must be 1283 * marked as closed, by setting it to -1 on close, or this routine will 1284 * also close the socket. 1285 */ 1286 static void 1287 free_socket(isc__socket_t **socketp) { 1288 isc__socket_t *sock = *socketp; 1289 1290 INSIST(VALID_SOCKET(sock)); 1291 INSIST(sock->references == 0); 1292 INSIST(!sock->connecting); 1293 INSIST(!sock->pending_recv); 1294 INSIST(!sock->pending_send); 1295 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1296 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1297 INSIST(!ISC_LINK_LINKED(sock, link)); 1298 1299 sock->common.magic = 0; 1300 sock->common.impmagic = 0; 1301 1302 free(sock); 1303 1304 *socketp = NULL; 1305 } 1306 1307 static void 1308 use_min_mtu(isc__socket_t *sock) { 1309 /* use minimum MTU */ 1310 if (sock->pf == AF_INET6) { 1311 int on = 1; 1312 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 1313 (void *)&on, sizeof(on)); 1314 } 1315 } 1316 1317 static void 1318 set_tcp_maxseg(isc__socket_t *sock, int size) { 1319 if (sock->type == isc_sockettype_tcp) 1320 (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG, 1321 (void *)&size, sizeof(size)); 1322 } 1323 1324 static isc_result_t 1325 opensocket(isc__socket_t *sock) 1326 { 1327 isc_result_t result; 1328 const char *err = "socket"; 1329 int on = 1; 1330 1331 switch (sock->type) { 1332 case isc_sockettype_udp: 1333 sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); 1334 break; 1335 case isc_sockettype_tcp: 1336 sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 1337 break; 1338 } 1339 1340 if (sock->fd < 0) { 1341 switch (errno) { 1342 case EMFILE: 1343 case ENFILE: 1344 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 1345 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 1346 "%s: %s", err, strerror(errno)); 1347 /* fallthrough */ 1348 case ENOBUFS: 1349 return (ISC_R_NORESOURCES); 1350 1351 case EPROTONOSUPPORT: 1352 case EPFNOSUPPORT: 1353 case EAFNOSUPPORT: 1354 /* 1355 * Linux 2.2 (and maybe others) return EINVAL instead of 1356 * EAFNOSUPPORT. 1357 */ 1358 case EINVAL: 1359 return (ISC_R_FAMILYNOSUPPORT); 1360 1361 default: 1362 UNEXPECTED_ERROR(__FILE__, __LINE__, 1363 "%s() %s: %s", err, "failed", 1364 strerror(errno)); 1365 return (ISC_R_UNEXPECTED); 1366 } 1367 } 1368 1369 result = make_nonblock(sock->fd); 1370 if (result != ISC_R_SUCCESS) { 1371 (void)close(sock->fd); 1372 return (result); 1373 } 1374 1375 /* 1376 * Use minimum mtu if possible. 1377 */ 1378 if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) { 1379 use_min_mtu(sock); 1380 set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */ 1381 } 1382 1383 if (sock->type == isc_sockettype_udp) { 1384 1385 if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, 1386 (void *)&on, sizeof(on)) < 0 1387 && errno != ENOPROTOOPT) { 1388 UNEXPECTED_ERROR(__FILE__, __LINE__, 1389 "setsockopt(%d, SO_TIMESTAMP) %s: %s", 1390 sock->fd, "failed", strerror(errno)); 1391 /* Press on... */ 1392 } 1393 1394 /* RFC 3542 */ 1395 if ((sock->pf == AF_INET6) 1396 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1397 (void *)&on, sizeof(on)) < 0)) { 1398 UNEXPECTED_ERROR(__FILE__, __LINE__, 1399 "setsockopt(%d, IPV6_RECVPKTINFO) " 1400 "%s: %s", sock->fd, "failed", 1401 strerror(errno)); 1402 } 1403 } 1404 1405 if (sock->active == 0) { 1406 sock->active = 1; 1407 } 1408 1409 return (ISC_R_SUCCESS); 1410 } 1411 1412 /* 1413 * Create a 'type' socket managed 1414 * by 'manager'. Events will be posted to 'task' and when dispatched 1415 * 'action' will be called with 'arg' as the arg value. The new 1416 * socket is returned in 'socketp'. 1417 */ 1418 static isc_result_t 1419 socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1420 isc_socket_t **socketp) 1421 { 1422 isc__socket_t *sock = NULL; 1423 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 1424 isc_result_t result; 1425 1426 REQUIRE(VALID_MANAGER(manager)); 1427 REQUIRE(socketp != NULL && *socketp == NULL); 1428 1429 result = allocate_socket(manager, type, &sock); 1430 if (result != ISC_R_SUCCESS) 1431 return (result); 1432 1433 switch (sock->type) { 1434 case isc_sockettype_udp: 1435 #define DCSPPKT(pf) ((pf == AF_INET) ? ISC_NET_DSCPPKTV4 : ISC_NET_DSCPPKTV6) 1436 sock->pktdscp = (isc_net_probedscp() & DCSPPKT(pf)) != 0; 1437 break; 1438 case isc_sockettype_tcp: 1439 break; 1440 default: 1441 INSIST(0); 1442 } 1443 1444 sock->pf = pf; 1445 1446 result = opensocket(sock); 1447 if (result != ISC_R_SUCCESS) { 1448 free_socket(&sock); 1449 return (result); 1450 } 1451 1452 sock->common.methods = (isc_socketmethods_t *)&socketmethods; 1453 sock->references = 1; 1454 *socketp = (isc_socket_t *)sock; 1455 1456 /* 1457 * Note we don't have to lock the socket like we normally would because 1458 * there are no external references to it yet. 1459 */ 1460 1461 manager->fds[sock->fd] = sock; 1462 manager->fdstate[sock->fd] = MANAGED; 1463 1464 ISC_LIST_APPEND(manager->socklist, sock, link); 1465 if (manager->maxfd < sock->fd) 1466 manager->maxfd = sock->fd; 1467 1468 socket_log(sock, NULL, CREATION, "created"); 1469 1470 return (ISC_R_SUCCESS); 1471 } 1472 1473 /*% 1474 * Create a new 'type' socket managed by 'manager'. Events 1475 * will be posted to 'task' and when dispatched 'action' will be 1476 * called with 'arg' as the arg value. The new socket is returned 1477 * in 'socketp'. 1478 */ 1479 isc_result_t 1480 isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1481 isc_socket_t **socketp) 1482 { 1483 return (socket_create(manager0, pf, type, socketp)); 1484 } 1485 1486 /* 1487 * Attach to a socket. Caller must explicitly detach when it is done. 1488 */ 1489 void 1490 isc__socket_attach(isc_socket_t *sock0, isc_socket_t **socketp) { 1491 isc__socket_t *sock = (isc__socket_t *)sock0; 1492 1493 REQUIRE(VALID_SOCKET(sock)); 1494 REQUIRE(socketp != NULL && *socketp == NULL); 1495 1496 sock->references++; 1497 1498 *socketp = (isc_socket_t *)sock; 1499 } 1500 1501 /* 1502 * Dereference a socket. If this is the last reference to it, clean things 1503 * up by destroying the socket. 1504 */ 1505 void 1506 isc__socket_detach(isc_socket_t **socketp) { 1507 isc__socket_t *sock; 1508 isc_boolean_t kill_socket = ISC_FALSE; 1509 1510 REQUIRE(socketp != NULL); 1511 sock = (isc__socket_t *)*socketp; 1512 REQUIRE(VALID_SOCKET(sock)); 1513 1514 REQUIRE(sock->references > 0); 1515 sock->references--; 1516 if (sock->references == 0) 1517 kill_socket = ISC_TRUE; 1518 1519 if (kill_socket) 1520 destroy(&sock); 1521 1522 *socketp = NULL; 1523 } 1524 1525 /* 1526 * I/O is possible on a given socket. Schedule an event to this task that 1527 * will call an internal function to do the I/O. This will charge the 1528 * task with the I/O operation and let our select loop handler get back 1529 * to doing something real as fast as possible. 1530 * 1531 * The socket and manager must be locked before calling this function. 1532 */ 1533 static void 1534 dispatch_recv(isc__socket_t *sock) { 1535 intev_t *iev; 1536 isc_socketevent_t *ev; 1537 isc_task_t *sender; 1538 1539 INSIST(!sock->pending_recv); 1540 1541 ev = ISC_LIST_HEAD(sock->recv_list); 1542 if (ev == NULL) 1543 return; 1544 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1545 "dispatch_recv: event %p -> task %p", 1546 ev, ev->ev_sender); 1547 sender = ev->ev_sender; 1548 1549 sock->pending_recv = 1; 1550 iev = &sock->readable_ev; 1551 1552 sock->references++; 1553 iev->ev_sender = sock; 1554 iev->ev_action = internal_recv; 1555 iev->ev_arg = sock; 1556 1557 isc_task_send(sender, (isc_event_t **)&iev); 1558 } 1559 1560 static void 1561 dispatch_send(isc__socket_t *sock) { 1562 intev_t *iev; 1563 isc_socketevent_t *ev; 1564 isc_task_t *sender; 1565 1566 INSIST(!sock->pending_send); 1567 1568 ev = ISC_LIST_HEAD(sock->send_list); 1569 if (ev == NULL) 1570 return; 1571 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1572 "dispatch_send: event %p -> task %p", 1573 ev, ev->ev_sender); 1574 sender = ev->ev_sender; 1575 1576 sock->pending_send = 1; 1577 iev = &sock->writable_ev; 1578 1579 sock->references++; 1580 iev->ev_sender = sock; 1581 iev->ev_action = internal_send; 1582 iev->ev_arg = sock; 1583 1584 isc_task_send(sender, (isc_event_t **)&iev); 1585 } 1586 1587 static void 1588 dispatch_connect(isc__socket_t *sock) { 1589 intev_t *iev; 1590 isc_socket_connev_t *ev; 1591 1592 iev = &sock->writable_ev; 1593 1594 ev = sock->connect_ev; 1595 INSIST(ev != NULL); /* XXX */ 1596 1597 INSIST(sock->connecting); 1598 1599 sock->references++; /* keep socket around for this internal event */ 1600 iev->ev_sender = sock; 1601 iev->ev_action = internal_connect; 1602 iev->ev_arg = sock; 1603 1604 isc_task_send(ev->ev_sender, (isc_event_t **)&iev); 1605 } 1606 1607 /* 1608 * Dequeue an item off the given socket's read queue, set the result code 1609 * in the done event to the one provided, and send it to the task it was 1610 * destined for. 1611 * 1612 * If the event to be sent is on a list, remove it before sending. If 1613 * asked to, send and detach from the socket as well. 1614 * 1615 * Caller must have the socket locked if the event is attached to the socket. 1616 */ 1617 static void 1618 send_recvdone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1619 isc_task_t *task; 1620 1621 task = (*dev)->ev_sender; 1622 1623 (*dev)->ev_sender = sock; 1624 1625 if (ISC_LINK_LINKED(*dev, ev_link)) 1626 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1627 1628 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1629 == ISC_SOCKEVENTATTR_ATTACHED) 1630 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1631 else 1632 isc_task_send(task, (isc_event_t **)dev); 1633 } 1634 1635 /* 1636 * See comments for send_recvdone_event() above. 1637 * 1638 * Caller must have the socket locked if the event is attached to the socket. 1639 */ 1640 static void 1641 send_senddone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1642 isc_task_t *task; 1643 1644 INSIST(dev != NULL && *dev != NULL); 1645 1646 task = (*dev)->ev_sender; 1647 (*dev)->ev_sender = sock; 1648 1649 if (ISC_LINK_LINKED(*dev, ev_link)) 1650 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1651 1652 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1653 == ISC_SOCKEVENTATTR_ATTACHED) 1654 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1655 else 1656 isc_task_send(task, (isc_event_t **)dev); 1657 } 1658 1659 static void 1660 internal_recv(isc_task_t *me, isc_event_t *ev) { 1661 isc_socketevent_t *dev; 1662 isc__socket_t *sock; 1663 1664 INSIST(ev->ev_type == ISC_SOCKEVENT_INTR); 1665 1666 sock = ev->ev_sender; 1667 INSIST(VALID_SOCKET(sock)); 1668 1669 socket_log(sock, NULL, IOEVENT, 1670 "internal_recv: task %p got event %p", me, ev); 1671 1672 INSIST(sock->pending_recv == 1); 1673 sock->pending_recv = 0; 1674 1675 INSIST(sock->references > 0); 1676 sock->references--; /* the internal event is done with this socket */ 1677 if (sock->references == 0) { 1678 destroy(&sock); 1679 return; 1680 } 1681 1682 /* 1683 * Try to do as much I/O as possible on this socket. There are no 1684 * limits here, currently. 1685 */ 1686 dev = ISC_LIST_HEAD(sock->recv_list); 1687 while (dev != NULL) { 1688 switch (doio_recv(sock, dev)) { 1689 case DOIO_SOFT: 1690 goto poke; 1691 1692 case DOIO_EOF: 1693 /* 1694 * read of 0 means the remote end was closed. 1695 * Run through the event queue and dispatch all 1696 * the events with an EOF result code. 1697 */ 1698 do { 1699 dev->result = ISC_R_EOF; 1700 send_recvdone_event(sock, &dev); 1701 dev = ISC_LIST_HEAD(sock->recv_list); 1702 } while (dev != NULL); 1703 goto poke; 1704 1705 case DOIO_SUCCESS: 1706 case DOIO_HARD: 1707 send_recvdone_event(sock, &dev); 1708 break; 1709 } 1710 1711 dev = ISC_LIST_HEAD(sock->recv_list); 1712 } 1713 1714 poke: 1715 if (!ISC_LIST_EMPTY(sock->recv_list)) 1716 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 1717 } 1718 1719 static void 1720 internal_send(isc_task_t *me, isc_event_t *ev) { 1721 isc_socketevent_t *dev; 1722 isc__socket_t *sock; 1723 1724 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 1725 1726 /* 1727 * Find out what socket this is and lock it. 1728 */ 1729 sock = (isc__socket_t *)ev->ev_sender; 1730 INSIST(VALID_SOCKET(sock)); 1731 socket_log(sock, NULL, IOEVENT, 1732 "internal_send: task %p got event %p", me, ev); 1733 1734 INSIST(sock->pending_send == 1); 1735 sock->pending_send = 0; 1736 1737 INSIST(sock->references > 0); 1738 sock->references--; /* the internal event is done with this socket */ 1739 if (sock->references == 0) { 1740 destroy(&sock); 1741 return; 1742 } 1743 1744 /* 1745 * Try to do as much I/O as possible on this socket. There are no 1746 * limits here, currently. 1747 */ 1748 dev = ISC_LIST_HEAD(sock->send_list); 1749 while (dev != NULL) { 1750 switch (doio_send(sock, dev)) { 1751 case DOIO_SOFT: 1752 goto poke; 1753 1754 case DOIO_HARD: 1755 case DOIO_SUCCESS: 1756 send_senddone_event(sock, &dev); 1757 break; 1758 } 1759 1760 dev = ISC_LIST_HEAD(sock->send_list); 1761 } 1762 1763 poke: 1764 if (!ISC_LIST_EMPTY(sock->send_list)) 1765 select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 1766 } 1767 1768 /* 1769 * Process read/writes on each fd here. Avoid locking 1770 * and unlocking twice if both reads and writes are possible. 1771 */ 1772 static void 1773 process_fd(isc__socketmgr_t *manager, int fd, isc_boolean_t readable, 1774 isc_boolean_t writeable) 1775 { 1776 isc__socket_t *sock; 1777 isc_boolean_t unwatch_read = ISC_FALSE, unwatch_write = ISC_FALSE; 1778 1779 /* 1780 * If the socket is going to be closed, don't do more I/O. 1781 */ 1782 if (manager->fdstate[fd] == CLOSE_PENDING) { 1783 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1784 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1785 return; 1786 } 1787 1788 sock = manager->fds[fd]; 1789 if (readable) { 1790 if (sock == NULL) { 1791 unwatch_read = ISC_TRUE; 1792 goto check_write; 1793 } 1794 if (!SOCK_DEAD(sock)) { 1795 dispatch_recv(sock); 1796 } 1797 unwatch_read = ISC_TRUE; 1798 } 1799 check_write: 1800 if (writeable) { 1801 if (sock == NULL) { 1802 unwatch_write = ISC_TRUE; 1803 goto unlock_fd; 1804 } 1805 if (!SOCK_DEAD(sock)) { 1806 if (sock->connecting) 1807 dispatch_connect(sock); 1808 else 1809 dispatch_send(sock); 1810 } 1811 unwatch_write = ISC_TRUE; 1812 } 1813 1814 unlock_fd: 1815 if (unwatch_read) 1816 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1817 if (unwatch_write) 1818 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1819 1820 } 1821 1822 static void 1823 process_fds(isc__socketmgr_t *manager, int maxfd, fd_set *readfds, 1824 fd_set *writefds) 1825 { 1826 int i; 1827 1828 REQUIRE(maxfd <= (int)manager->maxsocks); 1829 1830 for (i = 0; i < maxfd; i++) { 1831 process_fd(manager, i, FD_ISSET(i, readfds), 1832 FD_ISSET(i, writefds)); 1833 } 1834 } 1835 1836 /* 1837 * Create a new socket manager. 1838 */ 1839 1840 static isc_result_t 1841 setup_watcher(isc__socketmgr_t *manager) { 1842 isc_result_t result; 1843 1844 UNUSED(result); 1845 1846 manager->fd_bufsize = sizeof(fd_set); 1847 1848 manager->read_fds = NULL; 1849 manager->read_fds_copy = NULL; 1850 manager->write_fds = NULL; 1851 manager->write_fds_copy = NULL; 1852 1853 manager->read_fds = malloc(manager->fd_bufsize); 1854 if (manager->read_fds != NULL) 1855 manager->read_fds_copy = malloc(manager->fd_bufsize); 1856 if (manager->read_fds_copy != NULL) 1857 manager->write_fds = malloc(manager->fd_bufsize); 1858 if (manager->write_fds != NULL) { 1859 manager->write_fds_copy = malloc(manager->fd_bufsize); 1860 } 1861 if (manager->write_fds_copy == NULL) { 1862 if (manager->write_fds != NULL) { 1863 free(manager->write_fds); 1864 } 1865 if (manager->read_fds_copy != NULL) { 1866 free(manager->read_fds_copy); 1867 } 1868 if (manager->read_fds != NULL) { 1869 free(manager->read_fds); 1870 } 1871 return (ISC_R_NOMEMORY); 1872 } 1873 memset(manager->read_fds, 0, manager->fd_bufsize); 1874 memset(manager->write_fds, 0, manager->fd_bufsize); 1875 1876 manager->maxfd = 0; 1877 1878 return (ISC_R_SUCCESS); 1879 } 1880 1881 static void 1882 cleanup_watcher(isc__socketmgr_t *manager) { 1883 1884 if (manager->read_fds != NULL) 1885 free(manager->read_fds); 1886 if (manager->read_fds_copy != NULL) 1887 free(manager->read_fds_copy); 1888 if (manager->write_fds != NULL) 1889 free(manager->write_fds); 1890 if (manager->write_fds_copy != NULL) 1891 free(manager->write_fds_copy); 1892 } 1893 1894 isc_result_t 1895 isc__socketmgr_create(isc_socketmgr_t **managerp) { 1896 return (isc__socketmgr_create2(managerp, 0)); 1897 } 1898 1899 isc_result_t 1900 isc__socketmgr_create2(isc_socketmgr_t **managerp, 1901 unsigned int maxsocks) 1902 { 1903 isc__socketmgr_t *manager; 1904 isc_result_t result; 1905 1906 REQUIRE(managerp != NULL && *managerp == NULL); 1907 1908 if (socketmgr != NULL) { 1909 /* Don't allow maxsocks to be updated */ 1910 if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) 1911 return (ISC_R_EXISTS); 1912 1913 socketmgr->refs++; 1914 *managerp = (isc_socketmgr_t *)socketmgr; 1915 return (ISC_R_SUCCESS); 1916 } 1917 1918 if (maxsocks == 0) 1919 maxsocks = FD_SETSIZE; 1920 1921 manager = malloc(sizeof(*manager)); 1922 if (manager == NULL) 1923 return (ISC_R_NOMEMORY); 1924 1925 /* zero-clear so that necessary cleanup on failure will be easy */ 1926 memset(manager, 0, sizeof(*manager)); 1927 manager->maxsocks = maxsocks; 1928 manager->fds = malloc(manager->maxsocks * sizeof(isc__socket_t *)); 1929 if (manager->fds == NULL) { 1930 result = ISC_R_NOMEMORY; 1931 goto free_manager; 1932 } 1933 manager->fdstate = malloc(manager->maxsocks * sizeof(int)); 1934 if (manager->fdstate == NULL) { 1935 result = ISC_R_NOMEMORY; 1936 goto free_manager; 1937 } 1938 1939 manager->common.methods = &socketmgrmethods; 1940 manager->common.magic = ISCAPI_SOCKETMGR_MAGIC; 1941 manager->common.impmagic = SOCKET_MANAGER_MAGIC; 1942 memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); 1943 ISC_LIST_INIT(manager->socklist); 1944 1945 manager->refs = 1; 1946 1947 /* 1948 * Set up initial state for the select loop 1949 */ 1950 result = setup_watcher(manager); 1951 if (result != ISC_R_SUCCESS) 1952 goto cleanup; 1953 1954 memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); 1955 1956 socketmgr = manager; 1957 *managerp = (isc_socketmgr_t *)manager; 1958 1959 return (ISC_R_SUCCESS); 1960 1961 cleanup: 1962 1963 free_manager: 1964 if (manager->fdstate != NULL) { 1965 free(manager->fdstate); 1966 } 1967 if (manager->fds != NULL) { 1968 free(manager->fds); 1969 } 1970 free(manager); 1971 1972 return (result); 1973 } 1974 1975 void 1976 isc__socketmgr_destroy(isc_socketmgr_t **managerp) { 1977 isc__socketmgr_t *manager; 1978 int i; 1979 1980 /* 1981 * Destroy a socket manager. 1982 */ 1983 1984 REQUIRE(managerp != NULL); 1985 manager = (isc__socketmgr_t *)*managerp; 1986 REQUIRE(VALID_MANAGER(manager)); 1987 1988 manager->refs--; 1989 if (manager->refs > 0) { 1990 *managerp = NULL; 1991 return; 1992 } 1993 socketmgr = NULL; 1994 1995 /* 1996 * Wait for all sockets to be destroyed. 1997 */ 1998 while (!ISC_LIST_EMPTY(manager->socklist)) { 1999 isc__taskmgr_dispatch(NULL); 2000 } 2001 2002 /* 2003 * Here, poke our select/poll thread. Do this by closing the write 2004 * half of the pipe, which will send EOF to the read half. 2005 * This is currently a no-op in the non-threaded case. 2006 */ 2007 select_poke(manager, 0, SELECT_POKE_SHUTDOWN); 2008 2009 /* 2010 * Clean up. 2011 */ 2012 cleanup_watcher(manager); 2013 2014 for (i = 0; i < (int)manager->maxsocks; i++) 2015 if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ 2016 (void)close(i); 2017 2018 free(manager->fds); 2019 free(manager->fdstate); 2020 2021 manager->common.magic = 0; 2022 manager->common.impmagic = 0; 2023 free(manager); 2024 2025 *managerp = NULL; 2026 2027 socketmgr = NULL; 2028 } 2029 2030 static isc_result_t 2031 socket_recv(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2032 unsigned int flags) 2033 { 2034 int io_state; 2035 isc_task_t *ntask = NULL; 2036 isc_result_t result = ISC_R_SUCCESS; 2037 2038 dev->ev_sender = task; 2039 2040 if (sock->type == isc_sockettype_udp) { 2041 io_state = doio_recv(sock, dev); 2042 } else { 2043 if (ISC_LIST_EMPTY(sock->recv_list)) 2044 io_state = doio_recv(sock, dev); 2045 else 2046 io_state = DOIO_SOFT; 2047 } 2048 2049 switch (io_state) { 2050 case DOIO_SOFT: 2051 /* 2052 * We couldn't read all or part of the request right now, so 2053 * queue it. 2054 * 2055 * Attach to socket and to task 2056 */ 2057 isc_task_attach(task, &ntask); 2058 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2059 2060 /* 2061 * Enqueue the request. If the socket was previously not being 2062 * watched, poke the watcher to start paying attention to it. 2063 */ 2064 if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) 2065 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 2066 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2067 2068 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2069 "socket_recv: event %p -> task %p", 2070 dev, ntask); 2071 2072 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2073 result = ISC_R_INPROGRESS; 2074 break; 2075 2076 case DOIO_EOF: 2077 dev->result = ISC_R_EOF; 2078 /* fallthrough */ 2079 2080 case DOIO_HARD: 2081 case DOIO_SUCCESS: 2082 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2083 send_recvdone_event(sock, &dev); 2084 break; 2085 } 2086 2087 return (result); 2088 } 2089 2090 isc_result_t 2091 isc__socket_recvv(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2092 unsigned int minimum, isc_task_t *task, 2093 isc_taskaction_t action, void *arg) 2094 { 2095 isc__socket_t *sock = (isc__socket_t *)sock0; 2096 isc_socketevent_t *dev; 2097 isc__socketmgr_t *manager; 2098 unsigned int iocount; 2099 isc_buffer_t *buffer; 2100 2101 REQUIRE(VALID_SOCKET(sock)); 2102 REQUIRE(buflist != NULL); 2103 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2104 REQUIRE(task != NULL); 2105 REQUIRE(action != NULL); 2106 2107 manager = sock->manager; 2108 REQUIRE(VALID_MANAGER(manager)); 2109 2110 iocount = isc_bufferlist_availablecount(buflist); 2111 REQUIRE(iocount > 0); 2112 2113 INSIST(sock->bound); 2114 2115 dev = allocate_socketevent(sock, 2116 ISC_SOCKEVENT_RECVDONE, action, arg); 2117 if (dev == NULL) 2118 return (ISC_R_NOMEMORY); 2119 2120 /* 2121 * UDP sockets are always partial read 2122 */ 2123 if (sock->type == isc_sockettype_udp) 2124 dev->minimum = 1; 2125 else { 2126 if (minimum == 0) 2127 dev->minimum = iocount; 2128 else 2129 dev->minimum = minimum; 2130 } 2131 2132 /* 2133 * Move each buffer from the passed in list to our internal one. 2134 */ 2135 buffer = ISC_LIST_HEAD(*buflist); 2136 while (buffer != NULL) { 2137 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2138 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2139 buffer = ISC_LIST_HEAD(*buflist); 2140 } 2141 2142 return (socket_recv(sock, dev, task, 0)); 2143 } 2144 2145 static isc_result_t 2146 socket_send(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2147 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2148 unsigned int flags) 2149 { 2150 int io_state; 2151 isc_task_t *ntask = NULL; 2152 isc_result_t result = ISC_R_SUCCESS; 2153 2154 dev->ev_sender = task; 2155 2156 set_dev_address(address, sock, dev); 2157 if (pktinfo != NULL) { 2158 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 2159 dev->pktinfo = *pktinfo; 2160 2161 if (!isc_sockaddr_issitelocal(&dev->address) && 2162 !isc_sockaddr_islinklocal(&dev->address)) { 2163 socket_log(sock, NULL, TRACE, 2164 "pktinfo structure provided, ifindex %u " 2165 "(set to 0)", pktinfo->ipi6_ifindex); 2166 2167 /* 2168 * Set the pktinfo index to 0 here, to let the 2169 * kernel decide what interface it should send on. 2170 */ 2171 dev->pktinfo.ipi6_ifindex = 0; 2172 } 2173 } 2174 2175 if (sock->type == isc_sockettype_udp) 2176 io_state = doio_send(sock, dev); 2177 else { 2178 if (ISC_LIST_EMPTY(sock->send_list)) 2179 io_state = doio_send(sock, dev); 2180 else 2181 io_state = DOIO_SOFT; 2182 } 2183 2184 switch (io_state) { 2185 case DOIO_SOFT: 2186 /* 2187 * We couldn't send all or part of the request right now, so 2188 * queue it unless ISC_SOCKFLAG_NORETRY is set. 2189 */ 2190 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 2191 isc_task_attach(task, &ntask); 2192 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2193 2194 /* 2195 * Enqueue the request. If the socket was previously 2196 * not being watched, poke the watcher to start 2197 * paying attention to it. 2198 */ 2199 if (ISC_LIST_EMPTY(sock->send_list) && 2200 !sock->pending_send) 2201 select_poke(sock->manager, sock->fd, 2202 SELECT_POKE_WRITE); 2203 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 2204 2205 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2206 "socket_send: event %p -> task %p", 2207 dev, ntask); 2208 2209 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2210 result = ISC_R_INPROGRESS; 2211 break; 2212 } 2213 2214 /* FALLTHROUGH */ 2215 2216 case DOIO_HARD: 2217 case DOIO_SUCCESS: 2218 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2219 send_senddone_event(sock, &dev); 2220 break; 2221 } 2222 2223 return (result); 2224 } 2225 2226 isc_result_t 2227 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2228 isc_task_t *task, isc_taskaction_t action, void *arg) 2229 { 2230 return (isc__socket_sendtov2(sock, buflist, task, action, arg, NULL, 2231 NULL, 0)); 2232 } 2233 2234 isc_result_t 2235 isc__socket_sendtov2(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2236 isc_task_t *task, isc_taskaction_t action, void *arg, 2237 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2238 unsigned int flags) 2239 { 2240 isc__socket_t *sock = (isc__socket_t *)sock0; 2241 isc_socketevent_t *dev; 2242 isc__socketmgr_t *manager; 2243 unsigned int iocount; 2244 isc_buffer_t *buffer; 2245 2246 REQUIRE(VALID_SOCKET(sock)); 2247 REQUIRE(buflist != NULL); 2248 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2249 REQUIRE(task != NULL); 2250 REQUIRE(action != NULL); 2251 2252 manager = sock->manager; 2253 REQUIRE(VALID_MANAGER(manager)); 2254 2255 iocount = isc_bufferlist_usedcount(buflist); 2256 REQUIRE(iocount > 0); 2257 2258 dev = allocate_socketevent(sock, 2259 ISC_SOCKEVENT_SENDDONE, action, arg); 2260 if (dev == NULL) 2261 return (ISC_R_NOMEMORY); 2262 2263 /* 2264 * Move each buffer from the passed in list to our internal one. 2265 */ 2266 buffer = ISC_LIST_HEAD(*buflist); 2267 while (buffer != NULL) { 2268 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2269 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2270 buffer = ISC_LIST_HEAD(*buflist); 2271 } 2272 2273 return (socket_send(sock, dev, task, address, pktinfo, flags)); 2274 } 2275 2276 isc_result_t 2277 isc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr, 2278 unsigned int options) { 2279 isc__socket_t *sock = (isc__socket_t *)sock0; 2280 int on = 1; 2281 2282 REQUIRE(VALID_SOCKET(sock)); 2283 2284 INSIST(!sock->bound); 2285 2286 if (sock->pf != sockaddr->type.sa.sa_family) { 2287 return (ISC_R_FAMILYMISMATCH); 2288 } 2289 2290 /* 2291 * Only set SO_REUSEADDR when we want a specific port. 2292 */ 2293 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 2294 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 2295 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 2296 sizeof(on)) < 0) { 2297 UNEXPECTED_ERROR(__FILE__, __LINE__, 2298 "setsockopt(%d) %s", sock->fd, "failed"); 2299 /* Press on... */ 2300 } 2301 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 2302 switch (errno) { 2303 case EACCES: 2304 return (ISC_R_NOPERM); 2305 case EADDRNOTAVAIL: 2306 return (ISC_R_ADDRNOTAVAIL); 2307 case EADDRINUSE: 2308 return (ISC_R_ADDRINUSE); 2309 case EINVAL: 2310 return (ISC_R_BOUND); 2311 default: 2312 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 2313 strerror(errno)); 2314 return (ISC_R_UNEXPECTED); 2315 } 2316 } 2317 2318 socket_log(sock, sockaddr, TRACE, "bound"); 2319 sock->bound = 1; 2320 2321 return (ISC_R_SUCCESS); 2322 } 2323 2324 isc_result_t 2325 isc__socket_connect(isc_socket_t *sock0, isc_sockaddr_t *addr, 2326 isc_task_t *task, isc_taskaction_t action, void *arg) 2327 { 2328 isc__socket_t *sock = (isc__socket_t *)sock0; 2329 isc_socket_connev_t *dev; 2330 isc_task_t *ntask = NULL; 2331 isc__socketmgr_t *manager; 2332 int cc; 2333 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 2334 2335 REQUIRE(VALID_SOCKET(sock)); 2336 REQUIRE(addr != NULL); 2337 REQUIRE(task != NULL); 2338 REQUIRE(action != NULL); 2339 2340 manager = sock->manager; 2341 REQUIRE(VALID_MANAGER(manager)); 2342 REQUIRE(addr != NULL); 2343 2344 if (isc_sockaddr_ismulticast(addr)) 2345 return (ISC_R_MULTICAST); 2346 2347 REQUIRE(!sock->connecting); 2348 2349 dev = (isc_socket_connev_t *)isc_event_allocate(sock, 2350 ISC_SOCKEVENT_CONNECT, 2351 action, arg, 2352 sizeof(*dev)); 2353 if (dev == NULL) { 2354 return (ISC_R_NOMEMORY); 2355 } 2356 ISC_LINK_INIT(dev, ev_link); 2357 2358 /* 2359 * Try to do the connect right away, as there can be only one 2360 * outstanding, and it might happen to complete. 2361 */ 2362 sock->peer_address = *addr; 2363 cc = connect(sock->fd, &addr->type.sa, addr->length); 2364 if (cc < 0) { 2365 /* 2366 * HP-UX "fails" to connect a UDP socket and sets errno to 2367 * EINPROGRESS if it's non-blocking. We'd rather regard this as 2368 * a success and let the user detect it if it's really an error 2369 * at the time of sending a packet on the socket. 2370 */ 2371 if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { 2372 cc = 0; 2373 goto success; 2374 } 2375 if (SOFT_ERROR(errno) || errno == EINPROGRESS) 2376 goto queue; 2377 2378 switch (errno) { 2379 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit; 2380 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2381 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2382 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2383 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2384 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2385 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2386 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2387 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2388 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2389 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2390 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2391 #undef ERROR_MATCH 2392 } 2393 2394 sock->connected = 0; 2395 2396 isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); 2397 UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", 2398 addrbuf, errno, strerror(errno)); 2399 2400 isc_event_free(ISC_EVENT_PTR(&dev)); 2401 return (ISC_R_UNEXPECTED); 2402 2403 err_exit: 2404 sock->connected = 0; 2405 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2406 2407 return (ISC_R_SUCCESS); 2408 } 2409 2410 /* 2411 * If connect completed, fire off the done event. 2412 */ 2413 success: 2414 if (cc == 0) { 2415 sock->connected = 1; 2416 sock->bound = 1; 2417 dev->result = ISC_R_SUCCESS; 2418 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2419 2420 return (ISC_R_SUCCESS); 2421 } 2422 2423 queue: 2424 2425 /* 2426 * Attach to task. 2427 */ 2428 isc_task_attach(task, &ntask); 2429 2430 sock->connecting = 1; 2431 2432 dev->ev_sender = ntask; 2433 2434 /* 2435 * Poke watcher here. We still have the socket locked, so there 2436 * is no race condition. We will keep the lock for such a short 2437 * bit of time waking it up now or later won't matter all that much. 2438 */ 2439 if (sock->connect_ev == NULL) 2440 select_poke(manager, sock->fd, SELECT_POKE_CONNECT); 2441 2442 sock->connect_ev = dev; 2443 2444 return (ISC_R_SUCCESS); 2445 } 2446 2447 /* 2448 * Called when a socket with a pending connect() finishes. 2449 */ 2450 static void 2451 internal_connect(isc_task_t *me, isc_event_t *ev) { 2452 isc__socket_t *sock; 2453 isc_socket_connev_t *dev; 2454 isc_task_t *task; 2455 int cc; 2456 socklen_t optlen; 2457 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 2458 2459 UNUSED(me); 2460 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 2461 2462 sock = ev->ev_sender; 2463 INSIST(VALID_SOCKET(sock)); 2464 2465 /* 2466 * When the internal event was sent the reference count was bumped 2467 * to keep the socket around for us. Decrement the count here. 2468 */ 2469 INSIST(sock->references > 0); 2470 sock->references--; 2471 if (sock->references == 0) { 2472 destroy(&sock); 2473 return; 2474 } 2475 2476 /* 2477 * Has this event been canceled? 2478 */ 2479 dev = sock->connect_ev; 2480 if (dev == NULL) { 2481 INSIST(!sock->connecting); 2482 return; 2483 } 2484 2485 INSIST(sock->connecting); 2486 sock->connecting = 0; 2487 2488 /* 2489 * Get any possible error status here. 2490 */ 2491 optlen = sizeof(cc); 2492 if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, 2493 (void *)&cc, (void *)&optlen) < 0) 2494 cc = errno; 2495 else 2496 errno = cc; 2497 2498 if (errno != 0) { 2499 /* 2500 * If the error is EAGAIN, just re-select on this 2501 * fd and pretend nothing strange happened. 2502 */ 2503 if (SOFT_ERROR(errno) || errno == EINPROGRESS) { 2504 sock->connecting = 1; 2505 select_poke(sock->manager, sock->fd, 2506 SELECT_POKE_CONNECT); 2507 return; 2508 } 2509 2510 2511 /* 2512 * Translate other errors into ISC_R_* flavors. 2513 */ 2514 switch (errno) { 2515 #define ERROR_MATCH(a, b) case a: dev->result = b; break; 2516 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2517 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2518 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2519 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2520 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2521 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2522 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2523 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2524 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2525 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2526 ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); 2527 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2528 #undef ERROR_MATCH 2529 default: 2530 dev->result = ISC_R_UNEXPECTED; 2531 isc_sockaddr_format(&sock->peer_address, peerbuf, 2532 sizeof(peerbuf)); 2533 UNEXPECTED_ERROR(__FILE__, __LINE__, 2534 "internal_connect: connect(%s) %s", 2535 peerbuf, strerror(errno)); 2536 } 2537 } else { 2538 dev->result = ISC_R_SUCCESS; 2539 sock->connected = 1; 2540 sock->bound = 1; 2541 } 2542 2543 sock->connect_ev = NULL; 2544 2545 task = dev->ev_sender; 2546 dev->ev_sender = sock; 2547 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev)); 2548 } 2549 2550 /* 2551 * Run through the list of events on this socket, and cancel the ones 2552 * queued for task "task" of type "how". "how" is a bitmask. 2553 */ 2554 void 2555 isc__socket_cancel(isc_socket_t *sock0, isc_task_t *task, unsigned int how) { 2556 isc__socket_t *sock = (isc__socket_t *)sock0; 2557 2558 REQUIRE(VALID_SOCKET(sock)); 2559 2560 /* 2561 * Quick exit if there is nothing to do. Don't even bother locking 2562 * in this case. 2563 */ 2564 if (how == 0) 2565 return; 2566 2567 /* 2568 * All of these do the same thing, more or less. 2569 * Each will: 2570 * o If the internal event is marked as "posted" try to 2571 * remove it from the task's queue. If this fails, mark it 2572 * as canceled instead, and let the task clean it up later. 2573 * o For each I/O request for that task of that type, post 2574 * its done event with status of "ISC_R_CANCELED". 2575 * o Reset any state needed. 2576 */ 2577 if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) 2578 && !ISC_LIST_EMPTY(sock->recv_list)) { 2579 isc_socketevent_t *dev; 2580 isc_socketevent_t *next; 2581 isc_task_t *current_task; 2582 2583 dev = ISC_LIST_HEAD(sock->recv_list); 2584 2585 while (dev != NULL) { 2586 current_task = dev->ev_sender; 2587 next = ISC_LIST_NEXT(dev, ev_link); 2588 2589 if ((task == NULL) || (task == current_task)) { 2590 dev->result = ISC_R_CANCELED; 2591 send_recvdone_event(sock, &dev); 2592 } 2593 dev = next; 2594 } 2595 } 2596 2597 if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) 2598 && !ISC_LIST_EMPTY(sock->send_list)) { 2599 isc_socketevent_t *dev; 2600 isc_socketevent_t *next; 2601 isc_task_t *current_task; 2602 2603 dev = ISC_LIST_HEAD(sock->send_list); 2604 2605 while (dev != NULL) { 2606 current_task = dev->ev_sender; 2607 next = ISC_LIST_NEXT(dev, ev_link); 2608 2609 if ((task == NULL) || (task == current_task)) { 2610 dev->result = ISC_R_CANCELED; 2611 send_senddone_event(sock, &dev); 2612 } 2613 dev = next; 2614 } 2615 } 2616 2617 /* 2618 * Connecting is not a list. 2619 */ 2620 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 2621 && sock->connect_ev != NULL) { 2622 isc_socket_connev_t *dev; 2623 isc_task_t *current_task; 2624 2625 INSIST(sock->connecting); 2626 sock->connecting = 0; 2627 2628 dev = sock->connect_ev; 2629 current_task = dev->ev_sender; 2630 2631 if ((task == NULL) || (task == current_task)) { 2632 sock->connect_ev = NULL; 2633 2634 dev->result = ISC_R_CANCELED; 2635 dev->ev_sender = sock; 2636 isc_task_sendanddetach(¤t_task, 2637 ISC_EVENT_PTR(&dev)); 2638 } 2639 } 2640 2641 } 2642 2643 /* 2644 * In our assumed scenario, we can simply use a single static object. 2645 * XXX: this is not true if the application uses multiple threads with 2646 * 'multi-context' mode. Fixing this is a future TODO item. 2647 */ 2648 static isc_socketwait_t swait_private; 2649 2650 int 2651 isc__socketmgr_waitevents(isc_socketmgr_t *manager0, struct timeval *tvp, 2652 isc_socketwait_t **swaitp) 2653 { 2654 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2655 int n; 2656 2657 REQUIRE(swaitp != NULL && *swaitp == NULL); 2658 2659 if (manager == NULL) 2660 manager = socketmgr; 2661 if (manager == NULL) 2662 return (0); 2663 2664 memmove(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); 2665 memmove(manager->write_fds_copy, manager->write_fds, 2666 manager->fd_bufsize); 2667 2668 swait_private.readset = manager->read_fds_copy; 2669 swait_private.writeset = manager->write_fds_copy; 2670 swait_private.maxfd = manager->maxfd + 1; 2671 2672 n = select(swait_private.maxfd, swait_private.readset, 2673 swait_private.writeset, NULL, tvp); 2674 2675 *swaitp = &swait_private; 2676 return (n); 2677 } 2678 2679 isc_result_t 2680 isc__socketmgr_dispatch(isc_socketmgr_t *manager0, isc_socketwait_t *swait) { 2681 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2682 2683 REQUIRE(swait == &swait_private); 2684 2685 if (manager == NULL) 2686 manager = socketmgr; 2687 if (manager == NULL) 2688 return (ISC_R_NOTFOUND); 2689 2690 process_fds(manager, swait->maxfd, swait->readset, swait->writeset); 2691 return (ISC_R_SUCCESS); 2692 } 2693 2694 #include "../socket_api.c" 2695