1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /*! \file */ 18 19 #include <sys/socket.h> 20 #include <sys/time.h> 21 #include <sys/uio.h> 22 23 #include <netinet/tcp.h> 24 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stddef.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include <isc/buffer.h> 33 #include <isc/bufferlist.h> 34 #include <isc/formatcheck.h> 35 #include <isc/list.h> 36 #include <isc/log.h> 37 #include <isc/net.h> 38 #include <isc/region.h> 39 #include <isc/socket.h> 40 #include <isc/strerror.h> 41 #include <isc/task.h> 42 #include <isc/util.h> 43 44 #include "errno2result.h" 45 46 #include "socket_p.h" 47 #include "../task_p.h" 48 49 struct isc_socketwait { 50 fd_set *readset; 51 fd_set *writeset; 52 int nfds; 53 int maxfd; 54 }; 55 56 /* 57 * Set by the -T dscp option on the command line. If set to a value 58 * other than -1, we check to make sure DSCP values match it, and 59 * assert if not. 60 */ 61 int isc_dscp_check_value = -1; 62 63 /*% 64 * Size of per-FD lock buckets. 65 */ 66 #define FDLOCK_COUNT 1 67 #define FDLOCK_ID(fd) 0 68 69 /*% 70 * Some systems define the socket length argument as an int, some as size_t, 71 * some as socklen_t. This is here so it can be easily changed if needed. 72 */ 73 74 /*% 75 * Define what the possible "soft" errors can be. These are non-fatal returns 76 * of various network related functions, like recv() and so on. 77 * 78 * For some reason, BSDI (and perhaps others) will sometimes return <0 79 * from recv() but will have errno==0. This is broken, but we have to 80 * work around it here. 81 */ 82 #define SOFT_ERROR(e) ((e) == EAGAIN || \ 83 (e) == EWOULDBLOCK || \ 84 (e) == EINTR || \ 85 (e) == 0) 86 87 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 88 89 /*!< 90 * DLVL(90) -- Function entry/exit and other tracing. 91 * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 92 * DLVL(60) -- Socket data send/receive 93 * DLVL(50) -- Event tracing, including receiving/sending completion events. 94 * DLVL(20) -- Socket creation/destruction. 95 */ 96 #define TRACE_LEVEL 90 97 #define CORRECTNESS_LEVEL 70 98 #define IOEVENT_LEVEL 60 99 #define EVENT_LEVEL 50 100 #define CREATION_LEVEL 20 101 102 #define TRACE DLVL(TRACE_LEVEL) 103 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 104 #define IOEVENT DLVL(IOEVENT_LEVEL) 105 #define EVENT DLVL(EVENT_LEVEL) 106 #define CREATION DLVL(CREATION_LEVEL) 107 108 typedef isc_event_t intev_t; 109 110 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 111 #define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) 112 113 /*! 114 * IPv6 control information. If the socket is an IPv6 socket we want 115 * to collect the destination address and interface so the client can 116 * set them on outgoing packets. 117 */ 118 119 /*% 120 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have 121 * a setsockopt() like interface to request timestamps, and if the OS 122 * doesn't do it for us, call gettimeofday() on every UDP receive? 123 */ 124 125 /*% 126 * The size to raise the receive buffer to (from BIND 8). 127 */ 128 #define RCVBUFSIZE (32*1024) 129 130 /*% 131 * Instead of calculating the cmsgbuf lengths every time we take 132 * a rule of thumb approach - sizes are taken from x86_64 linux, 133 * multiplied by 2, everything should fit. Those sizes are not 134 * large enough to cause any concern. 135 */ 136 #define CMSG_SP_IN6PKT 40 137 138 #define CMSG_SP_TIMESTAMP 32 139 140 #define CMSG_SP_TCTOS 24 141 142 #define CMSG_SP_INT 24 143 144 #define RECVCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS)+1) 145 #define SENDCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS)+1) 146 147 /*% 148 * The number of times a send operation is repeated if the result is EINTR. 149 */ 150 #define NRETRIES 10 151 152 typedef struct isc__socket isc__socket_t; 153 typedef struct isc__socketmgr isc__socketmgr_t; 154 155 #define NEWCONNSOCK(ev) ((isc__socket_t *)(ev)->newsocket) 156 157 struct isc__socket { 158 /* Not locked. */ 159 isc_socket_t common; 160 isc__socketmgr_t *manager; 161 isc_sockettype_t type; 162 163 /* Locked by socket lock. */ 164 ISC_LINK(isc__socket_t) link; 165 unsigned int references; 166 int fd; 167 int pf; 168 169 ISC_LIST(isc_socketevent_t) send_list; 170 ISC_LIST(isc_socketevent_t) recv_list; 171 isc_socket_connev_t *connect_ev; 172 173 /* 174 * Internal events. Posted when a descriptor is readable or 175 * writable. These are statically allocated and never freed. 176 * They will be set to non-purgable before use. 177 */ 178 intev_t readable_ev; 179 intev_t writable_ev; 180 181 isc_sockaddr_t peer_address; /* remote address */ 182 183 unsigned int pending_recv : 1, 184 pending_send : 1, 185 connected : 1, 186 connecting : 1, /* connect pending */ 187 bound : 1, /* bound to local addr */ 188 active : 1, /* currently active */ 189 pktdscp : 1; /* per packet dscp */ 190 unsigned int dscp; 191 }; 192 193 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 194 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 195 196 struct isc__socketmgr { 197 /* Not locked. */ 198 isc_socketmgr_t common; 199 int fd_bufsize; 200 unsigned int maxsocks; 201 202 isc__socket_t **fds; 203 int *fdstate; 204 205 /* Locked by manager lock. */ 206 ISC_LIST(isc__socket_t) socklist; 207 fd_set *read_fds; 208 fd_set *read_fds_copy; 209 fd_set *write_fds; 210 fd_set *write_fds_copy; 211 int maxfd; 212 unsigned int refs; 213 }; 214 215 static isc__socketmgr_t *socketmgr = NULL; 216 217 #define CLOSED 0 /* this one must be zero */ 218 #define MANAGED 1 219 #define CLOSE_PENDING 2 220 221 /* 222 * send() and recv() iovec counts 223 */ 224 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 225 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 226 227 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, 228 isc_sockettype_t type, 229 isc_socket_t **socketp); 230 static void send_recvdone_event(isc__socket_t *, isc_socketevent_t **); 231 static void send_senddone_event(isc__socket_t *, isc_socketevent_t **); 232 static void free_socket(isc__socket_t **); 233 static isc_result_t allocate_socket(isc__socketmgr_t *, isc_sockettype_t, 234 isc__socket_t **); 235 static void destroy(isc__socket_t **); 236 static void internal_connect(isc_task_t *, isc_event_t *); 237 static void internal_recv(isc_task_t *, isc_event_t *); 238 static void internal_send(isc_task_t *, isc_event_t *); 239 static void process_cmsg(isc__socket_t *, struct msghdr *, isc_socketevent_t *); 240 static void build_msghdr_send(isc__socket_t *, char *, isc_socketevent_t *, 241 struct msghdr *, struct iovec *, size_t *); 242 static void build_msghdr_recv(isc__socket_t *, char *, isc_socketevent_t *, 243 struct msghdr *, struct iovec *, size_t *); 244 245 /*% 246 * The following are intended for internal use (indicated by "isc__" 247 * prefix) but are not declared as static, allowing direct access from 248 * unit tests etc. 249 */ 250 251 isc_result_t 252 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 253 isc_socket_t **socketp); 254 void 255 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp); 256 void 257 isc__socket_detach(isc_socket_t **socketp); 258 isc_result_t 259 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 260 unsigned int minimum, isc_task_t *task, 261 isc_taskaction_t action, void *arg); 262 isc_result_t 263 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 264 isc_task_t *task, isc_taskaction_t action, void *arg); 265 isc_result_t 266 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist, 267 isc_task_t *task, isc_taskaction_t action, void *arg, 268 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 269 unsigned int flags); 270 isc_result_t 271 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 272 unsigned int options); 273 isc_result_t 274 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 275 isc_task_t *task, isc_taskaction_t action, 276 void *arg); 277 void 278 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); 279 280 isc_result_t 281 isc__socketmgr_create(isc_socketmgr_t **managerp); 282 isc_result_t 283 isc__socketmgr_create2(isc_socketmgr_t **managerp, 284 unsigned int maxsocks); 285 isc_result_t 286 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager0, unsigned int *nsockp); 287 void 288 isc__socketmgr_destroy(isc_socketmgr_t **managerp); 289 290 static struct { 291 isc_socketmethods_t methods; 292 293 /*% 294 * The following are defined just for avoiding unused static functions. 295 */ 296 void *recvv, *sendv; 297 } socketmethods = { 298 { 299 isc__socket_attach, 300 isc__socket_detach, 301 isc__socket_bind, 302 isc__socket_connect, 303 isc__socket_cancel, 304 }, 305 (void *)isc__socket_recvv, 306 (void *)isc__socket_sendv, 307 }; 308 309 static isc_socketmgrmethods_t socketmgrmethods = { 310 isc__socketmgr_destroy, 311 isc__socket_create 312 }; 313 314 #define SELECT_POKE_SHUTDOWN (-1) 315 #define SELECT_POKE_NOTHING (-2) 316 #define SELECT_POKE_READ (-3) 317 #define SELECT_POKE_ACCEPT (-3) /*%< Same as _READ */ 318 #define SELECT_POKE_WRITE (-4) 319 #define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ 320 #define SELECT_POKE_CLOSE (-5) 321 322 #define SOCK_DEAD(s) ((s)->references == 0) 323 324 /*% 325 * Shortcut index arrays to get access to statistics counters. 326 */ 327 enum { 328 STATID_OPEN = 0, 329 STATID_OPENFAIL = 1, 330 STATID_CLOSE = 2, 331 STATID_BINDFAIL = 3, 332 STATID_CONNECTFAIL = 4, 333 STATID_CONNECT = 5, 334 STATID_ACCEPTFAIL = 6, 335 STATID_ACCEPT = 7, 336 STATID_SENDFAIL = 8, 337 STATID_RECVFAIL = 9, 338 STATID_ACTIVE = 10 339 }; 340 341 342 static void 343 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 344 isc_logcategory_t *category, isc_logmodule_t *module, int level, 345 const char *fmt, ...) ISC_FORMAT_PRINTF(6, 7); 346 static void 347 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 348 isc_logcategory_t *category, isc_logmodule_t *module, int level, 349 const char *fmt, ...) 350 { 351 char msgbuf[2048]; 352 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 353 va_list ap; 354 355 if (! isc_log_wouldlog(isc_lctx, level)) 356 return; 357 358 va_start(ap, fmt); 359 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 360 va_end(ap); 361 362 if (address == NULL) { 363 isc_log_write(isc_lctx, category, module, level, 364 "socket %p: %s", sock, msgbuf); 365 } else { 366 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 367 isc_log_write(isc_lctx, category, module, level, 368 "socket %p %s: %s", sock, peerbuf, msgbuf); 369 } 370 } 371 372 static inline isc_result_t 373 watch_fd(isc__socketmgr_t *manager, int fd, int msg) { 374 isc_result_t result = ISC_R_SUCCESS; 375 376 if (msg == SELECT_POKE_READ) 377 FD_SET(fd, manager->read_fds); 378 if (msg == SELECT_POKE_WRITE) 379 FD_SET(fd, manager->write_fds); 380 381 return (result); 382 } 383 384 static inline isc_result_t 385 unwatch_fd(isc__socketmgr_t *manager, int fd, int msg) { 386 isc_result_t result = ISC_R_SUCCESS; 387 388 if (msg == SELECT_POKE_READ) 389 FD_CLR(fd, manager->read_fds); 390 else if (msg == SELECT_POKE_WRITE) 391 FD_CLR(fd, manager->write_fds); 392 393 return (result); 394 } 395 396 static void 397 wakeup_socket(isc__socketmgr_t *manager, int fd, int msg) { 398 isc_result_t result; 399 400 /* 401 * This is a wakeup on a socket. If the socket is not in the 402 * process of being closed, start watching it for either reads 403 * or writes. 404 */ 405 406 INSIST(fd >= 0 && fd < (int)manager->maxsocks); 407 408 if (msg == SELECT_POKE_CLOSE) { 409 /* No one should be updating fdstate, so no need to lock it */ 410 INSIST(manager->fdstate[fd] == CLOSE_PENDING); 411 manager->fdstate[fd] = CLOSED; 412 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 413 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 414 (void)close(fd); 415 return; 416 } 417 418 if (manager->fdstate[fd] == CLOSE_PENDING) { 419 420 /* 421 * We accept (and ignore) any error from unwatch_fd() as we are 422 * closing the socket, hoping it doesn't leave dangling state in 423 * the kernel. 424 */ 425 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 426 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 427 return; 428 } 429 if (manager->fdstate[fd] != MANAGED) { 430 return; 431 } 432 433 /* 434 * Set requested bit. 435 */ 436 result = watch_fd(manager, fd, msg); 437 if (result != ISC_R_SUCCESS) { 438 /* 439 * XXXJT: what should we do? Ignoring the failure of watching 440 * a socket will make the application dysfunctional, but there 441 * seems to be no reasonable recovery process. 442 */ 443 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 444 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 445 "failed to start watching FD (%d): %s", 446 fd, isc_result_totext(result)); 447 } 448 } 449 450 /* 451 * Update the state of the socketmgr when something changes. 452 */ 453 static void 454 select_poke(isc__socketmgr_t *manager, int fd, int msg) { 455 if (msg == SELECT_POKE_SHUTDOWN) 456 return; 457 else if (fd >= 0) 458 wakeup_socket(manager, fd, msg); 459 return; 460 } 461 462 /* 463 * Make a fd non-blocking. 464 */ 465 static isc_result_t 466 make_nonblock(int fd) { 467 int ret; 468 char strbuf[ISC_STRERRORSIZE]; 469 int flags; 470 471 flags = fcntl(fd, F_GETFL, 0); 472 flags |= O_NONBLOCK; 473 ret = fcntl(fd, F_SETFL, flags); 474 475 if (ret == -1) { 476 isc__strerror(errno, strbuf, sizeof(strbuf)); 477 UNEXPECTED_ERROR(__FILE__, __LINE__, 478 "fcntl(%d, F_SETFL, %d): %s", fd, flags, 479 strbuf); 480 481 return (ISC_R_UNEXPECTED); 482 } 483 484 return (ISC_R_SUCCESS); 485 } 486 487 /* 488 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. 489 * In order to ensure as much portability as possible, we provide wrapper 490 * functions of these macros. 491 * Note that cmsg_space() could run slow on OSes that do not have 492 * CMSG_SPACE. 493 */ 494 static inline socklen_t 495 cmsg_len(socklen_t len) { 496 return (CMSG_LEN(len)); 497 } 498 499 static inline socklen_t 500 cmsg_space(socklen_t len) { 501 return (CMSG_SPACE(len)); 502 } 503 504 /* 505 * Process control messages received on a socket. 506 */ 507 static void 508 process_cmsg(isc__socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { 509 struct cmsghdr *cmsgp; 510 struct in6_pktinfo *pktinfop; 511 void *timevalp; 512 513 /* 514 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. 515 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. 516 * They are all here, outside of the CPP tests, because it is 517 * more consistent with the usual ISC coding style. 518 */ 519 UNUSED(sock); 520 UNUSED(msg); 521 UNUSED(dev); 522 523 if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC) 524 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; 525 526 if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC) 527 dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; 528 529 if (msg->msg_controllen == 0U || msg->msg_control == NULL) 530 return; 531 532 timevalp = NULL; 533 pktinfop = NULL; 534 535 cmsgp = CMSG_FIRSTHDR(msg); 536 while (cmsgp != NULL) { 537 socket_log(sock, NULL, TRACE, 538 "processing cmsg %p", cmsgp); 539 540 if (cmsgp->cmsg_level == IPPROTO_IPV6 541 && cmsgp->cmsg_type == IPV6_PKTINFO) { 542 543 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 544 memmove(&dev->pktinfo, pktinfop, 545 sizeof(struct in6_pktinfo)); 546 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 547 socket_log(sock, NULL, TRACE, 548 "interface received on ifindex %u", 549 dev->pktinfo.ipi6_ifindex); 550 if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) 551 dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; 552 goto next; 553 } 554 555 if (cmsgp->cmsg_level == SOL_SOCKET 556 && cmsgp->cmsg_type == SCM_TIMESTAMP) { 557 struct timeval tv; 558 timevalp = CMSG_DATA(cmsgp); 559 memmove(&tv, timevalp, sizeof(tv)); 560 dev->timestamp.seconds = tv.tv_sec; 561 dev->timestamp.nanoseconds = tv.tv_usec * 1000; 562 dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; 563 goto next; 564 } 565 566 if (cmsgp->cmsg_level == IPPROTO_IPV6 567 && cmsgp->cmsg_type == IPV6_TCLASS) { 568 dev->dscp = *(int *)CMSG_DATA(cmsgp); 569 dev->dscp >>= 2; 570 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 571 goto next; 572 } 573 574 if (cmsgp->cmsg_level == IPPROTO_IP 575 && (cmsgp->cmsg_type == IP_TOS)) { 576 dev->dscp = (int) *(unsigned char *)CMSG_DATA(cmsgp); 577 dev->dscp >>= 2; 578 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 579 goto next; 580 } 581 next: 582 cmsgp = CMSG_NXTHDR(msg, cmsgp); 583 } 584 585 } 586 587 /* 588 * Construct an iov array and attach it to the msghdr passed in. This is 589 * the SEND constructor, which will use the used region of the buffer 590 * (if using a buffer list) or will use the internal region (if a single 591 * buffer I/O is requested). 592 * 593 * Nothing can be NULL, and the done event must list at least one buffer 594 * on the buffer linked list for this function to be meaningful. 595 * 596 * If write_countp != NULL, *write_countp will hold the number of bytes 597 * this transaction can send. 598 */ 599 static void 600 build_msghdr_send(isc__socket_t *sock, char* cmsgbuf, isc_socketevent_t *dev, 601 struct msghdr *msg, struct iovec *iov, size_t *write_countp) 602 { 603 unsigned int iovcount; 604 isc_buffer_t *buffer; 605 isc_region_t used; 606 size_t write_count; 607 size_t skip_count; 608 struct cmsghdr *cmsgp; 609 610 memset(msg, 0, sizeof(*msg)); 611 612 if (!sock->connected) { 613 msg->msg_name = (void *)&dev->address.type.sa; 614 msg->msg_namelen = dev->address.length; 615 } else { 616 msg->msg_name = NULL; 617 msg->msg_namelen = 0; 618 } 619 620 buffer = ISC_LIST_HEAD(dev->bufferlist); 621 write_count = 0; 622 iovcount = 0; 623 624 /* 625 * Single buffer I/O? Skip what we've done so far in this region. 626 */ 627 if (buffer == NULL) { 628 write_count = dev->region.length - dev->n; 629 iov[0].iov_base = (void *)(dev->region.base + dev->n); 630 iov[0].iov_len = write_count; 631 iovcount = 1; 632 633 goto config; 634 } 635 636 /* 637 * Multibuffer I/O. 638 * Skip the data in the buffer list that we have already written. 639 */ 640 skip_count = dev->n; 641 while (buffer != NULL) { 642 REQUIRE(ISC_BUFFER_VALID(buffer)); 643 if (skip_count < isc_buffer_usedlength(buffer)) 644 break; 645 skip_count -= isc_buffer_usedlength(buffer); 646 buffer = ISC_LIST_NEXT(buffer, link); 647 } 648 649 while (buffer != NULL) { 650 INSIST(iovcount < MAXSCATTERGATHER_SEND); 651 652 isc_buffer_usedregion(buffer, &used); 653 654 if (used.length > 0) { 655 iov[iovcount].iov_base = (void *)(used.base 656 + skip_count); 657 iov[iovcount].iov_len = used.length - skip_count; 658 write_count += (used.length - skip_count); 659 skip_count = 0; 660 iovcount++; 661 } 662 buffer = ISC_LIST_NEXT(buffer, link); 663 } 664 665 INSIST(skip_count == 0U); 666 667 config: 668 msg->msg_iov = iov; 669 msg->msg_iovlen = iovcount; 670 671 msg->msg_control = NULL; 672 msg->msg_controllen = 0; 673 msg->msg_flags = 0; 674 675 if ((sock->type == isc_sockettype_udp) && 676 ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) 677 { 678 struct in6_pktinfo *pktinfop; 679 680 socket_log(sock, NULL, TRACE, 681 "sendto pktinfo data, ifindex %u", 682 dev->pktinfo.ipi6_ifindex); 683 684 msg->msg_control = (void *)cmsgbuf; 685 msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); 686 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 687 688 cmsgp = (struct cmsghdr *)cmsgbuf; 689 cmsgp->cmsg_level = IPPROTO_IPV6; 690 cmsgp->cmsg_type = IPV6_PKTINFO; 691 cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); 692 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 693 memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); 694 } 695 696 if ((sock->type == isc_sockettype_udp) && 697 ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0)) 698 { 699 int use_min_mtu = 1; /* -1, 0, 1 */ 700 701 cmsgp = (struct cmsghdr *)(cmsgbuf + 702 msg->msg_controllen); 703 704 msg->msg_control = (void *)cmsgbuf; 705 msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); 706 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 707 708 cmsgp->cmsg_level = IPPROTO_IPV6; 709 cmsgp->cmsg_type = IPV6_USE_MIN_MTU; 710 cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); 711 memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); 712 } 713 714 if (isc_dscp_check_value > -1) { 715 if (sock->type == isc_sockettype_udp) 716 INSIST((int)dev->dscp == isc_dscp_check_value); 717 else if (sock->type == isc_sockettype_tcp) 718 INSIST((int)sock->dscp == isc_dscp_check_value); 719 } 720 721 if ((sock->type == isc_sockettype_udp) && 722 ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0)) 723 { 724 int dscp = (dev->dscp << 2) & 0xff; 725 726 INSIST(dev->dscp < 0x40); 727 728 if (sock->pf == AF_INET && sock->pktdscp) { 729 cmsgp = (struct cmsghdr *)(cmsgbuf + 730 msg->msg_controllen); 731 msg->msg_control = (void *)cmsgbuf; 732 msg->msg_controllen += cmsg_space(sizeof(dscp)); 733 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 734 735 cmsgp->cmsg_level = IPPROTO_IP; 736 cmsgp->cmsg_type = IP_TOS; 737 cmsgp->cmsg_len = cmsg_len(sizeof(char)); 738 *(unsigned char*)CMSG_DATA(cmsgp) = dscp; 739 } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) { 740 if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, 741 (void *)&dscp, sizeof(int)) < 0) 742 { 743 char strbuf[ISC_STRERRORSIZE]; 744 isc__strerror(errno, strbuf, sizeof(strbuf)); 745 UNEXPECTED_ERROR(__FILE__, __LINE__, 746 "setsockopt(%d, IP_TOS, %.02x)" 747 " %s: %s", 748 sock->fd, dscp >> 2, 749 "failed", strbuf); 750 } else 751 sock->dscp = dscp; 752 } 753 754 if (sock->pf == AF_INET6 && sock->pktdscp) { 755 cmsgp = (struct cmsghdr *)(cmsgbuf + 756 msg->msg_controllen); 757 msg->msg_control = (void *)cmsgbuf; 758 msg->msg_controllen += cmsg_space(sizeof(dscp)); 759 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 760 761 cmsgp->cmsg_level = IPPROTO_IPV6; 762 cmsgp->cmsg_type = IPV6_TCLASS; 763 cmsgp->cmsg_len = cmsg_len(sizeof(dscp)); 764 memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp)); 765 } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) { 766 if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, 767 (void *)&dscp, sizeof(int)) < 0) { 768 char strbuf[ISC_STRERRORSIZE]; 769 isc__strerror(errno, strbuf, sizeof(strbuf)); 770 UNEXPECTED_ERROR(__FILE__, __LINE__, 771 "setsockopt(%d, IPV6_TCLASS, " 772 "%.02x) %s: %s", 773 sock->fd, dscp >> 2, 774 "failed", strbuf); 775 } else 776 sock->dscp = dscp; 777 } 778 779 if (msg->msg_controllen != 0 && 780 msg->msg_controllen < SENDCMSGBUFLEN) 781 { 782 memset(cmsgbuf + msg->msg_controllen, 0, 783 SENDCMSGBUFLEN - msg->msg_controllen); 784 } 785 } 786 787 if (write_countp != NULL) 788 *write_countp = write_count; 789 } 790 791 /* 792 * Construct an iov array and attach it to the msghdr passed in. This is 793 * the RECV constructor, which will use the available region of the buffer 794 * (if using a buffer list) or will use the internal region (if a single 795 * buffer I/O is requested). 796 * 797 * Nothing can be NULL, and the done event must list at least one buffer 798 * on the buffer linked list for this function to be meaningful. 799 * 800 * If read_countp != NULL, *read_countp will hold the number of bytes 801 * this transaction can receive. 802 */ 803 static void 804 build_msghdr_recv(isc__socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, 805 struct msghdr *msg, struct iovec *iov, size_t *read_countp) 806 { 807 unsigned int iovcount; 808 isc_buffer_t *buffer; 809 isc_region_t available; 810 size_t read_count; 811 812 memset(msg, 0, sizeof(struct msghdr)); 813 814 if (sock->type == isc_sockettype_udp) { 815 memset(&dev->address, 0, sizeof(dev->address)); 816 msg->msg_name = (void *)&dev->address.type.sa; 817 msg->msg_namelen = sizeof(dev->address.type); 818 } else { /* TCP */ 819 msg->msg_name = NULL; 820 msg->msg_namelen = 0; 821 dev->address = sock->peer_address; 822 } 823 824 buffer = ISC_LIST_HEAD(dev->bufferlist); 825 read_count = 0; 826 827 /* 828 * Single buffer I/O? Skip what we've done so far in this region. 829 */ 830 if (buffer == NULL) { 831 read_count = dev->region.length - dev->n; 832 iov[0].iov_base = (void *)(dev->region.base + dev->n); 833 iov[0].iov_len = read_count; 834 iovcount = 1; 835 836 goto config; 837 } 838 839 /* 840 * Multibuffer I/O. 841 * Skip empty buffers. 842 */ 843 while (buffer != NULL) { 844 REQUIRE(ISC_BUFFER_VALID(buffer)); 845 if (isc_buffer_availablelength(buffer) != 0) 846 break; 847 buffer = ISC_LIST_NEXT(buffer, link); 848 } 849 850 iovcount = 0; 851 while (buffer != NULL) { 852 INSIST(iovcount < MAXSCATTERGATHER_RECV); 853 854 isc_buffer_availableregion(buffer, &available); 855 856 if (available.length > 0) { 857 iov[iovcount].iov_base = (void *)(available.base); 858 iov[iovcount].iov_len = available.length; 859 read_count += available.length; 860 iovcount++; 861 } 862 buffer = ISC_LIST_NEXT(buffer, link); 863 } 864 865 config: 866 867 /* 868 * If needed, set up to receive that one extra byte. 869 */ 870 msg->msg_iov = iov; 871 msg->msg_iovlen = iovcount; 872 873 msg->msg_control = cmsgbuf; 874 msg->msg_controllen = RECVCMSGBUFLEN; 875 msg->msg_flags = 0; 876 877 if (read_countp != NULL) 878 *read_countp = read_count; 879 } 880 881 static void 882 set_dev_address(isc_sockaddr_t *address, isc__socket_t *sock, 883 isc_socketevent_t *dev) 884 { 885 if (sock->type == isc_sockettype_udp) { 886 if (address != NULL) 887 dev->address = *address; 888 else 889 dev->address = sock->peer_address; 890 } else if (sock->type == isc_sockettype_tcp) { 891 INSIST(address == NULL); 892 dev->address = sock->peer_address; 893 } 894 } 895 896 static void 897 destroy_socketevent(isc_event_t *event) { 898 isc_socketevent_t *ev = (isc_socketevent_t *)event; 899 900 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 901 902 (ev->destroy)(event); 903 } 904 905 static isc_socketevent_t * 906 allocate_socketevent(void *sender, 907 isc_eventtype_t eventtype, isc_taskaction_t action, 908 void *arg) 909 { 910 isc_socketevent_t *ev; 911 912 ev = (isc_socketevent_t *)isc_event_allocate(sender, 913 eventtype, action, arg, 914 sizeof(*ev)); 915 916 if (ev == NULL) 917 return (NULL); 918 919 ev->result = ISC_R_UNSET; 920 ISC_LINK_INIT(ev, ev_link); 921 ISC_LIST_INIT(ev->bufferlist); 922 ev->region.base = NULL; 923 ev->n = 0; 924 ev->offset = 0; 925 ev->attributes = 0; 926 ev->destroy = ev->ev_destroy; 927 ev->ev_destroy = destroy_socketevent; 928 ev->dscp = 0; 929 930 return (ev); 931 } 932 933 #define DOIO_SUCCESS 0 /* i/o ok, event sent */ 934 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 935 #define DOIO_HARD 2 /* i/o error, event sent */ 936 #define DOIO_EOF 3 /* EOF, no event sent */ 937 938 static int 939 doio_recv(isc__socket_t *sock, isc_socketevent_t *dev) { 940 int cc; 941 struct iovec iov[MAXSCATTERGATHER_RECV]; 942 size_t read_count; 943 size_t actual_count; 944 struct msghdr msghdr; 945 isc_buffer_t *buffer; 946 int recv_errno; 947 char strbuf[ISC_STRERRORSIZE]; 948 char cmsgbuf[RECVCMSGBUFLEN] = {0}; 949 950 build_msghdr_recv(sock, cmsgbuf, dev, &msghdr, iov, &read_count); 951 952 cc = recvmsg(sock->fd, &msghdr, 0); 953 recv_errno = errno; 954 955 if (cc < 0) { 956 if (SOFT_ERROR(recv_errno)) 957 return (DOIO_SOFT); 958 959 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 960 isc__strerror(recv_errno, strbuf, sizeof(strbuf)); 961 socket_log(sock, NULL, IOEVENT, 962 "doio_recv: recvmsg(%d) %d bytes, err %d/%s", 963 sock->fd, cc, recv_errno, strbuf); 964 } 965 966 #define SOFT_OR_HARD(_system, _isc) \ 967 if (recv_errno == _system) { \ 968 if (sock->connected) { \ 969 dev->result = _isc; \ 970 return (DOIO_HARD); \ 971 } \ 972 return (DOIO_SOFT); \ 973 } 974 #define ALWAYS_HARD(_system, _isc) \ 975 if (recv_errno == _system) { \ 976 dev->result = _isc; \ 977 return (DOIO_HARD); \ 978 } 979 980 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 981 SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); 982 SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 983 SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); 984 /* HPUX 11.11 can return EADDRNOTAVAIL. */ 985 SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 986 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 987 /* Should never get this one but it was seen. */ 988 SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH); 989 /* 990 * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 991 * errors. 992 */ 993 SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); 994 SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); 995 996 #undef SOFT_OR_HARD 997 #undef ALWAYS_HARD 998 999 dev->result = isc__errno2result(recv_errno); 1000 return (DOIO_HARD); 1001 } 1002 1003 /* 1004 * On TCP and UNIX sockets, zero length reads indicate EOF, 1005 * while on UDP sockets, zero length reads are perfectly valid, 1006 * although strange. 1007 */ 1008 switch (sock->type) { 1009 case isc_sockettype_tcp: 1010 if (cc == 0) 1011 return (DOIO_EOF); 1012 break; 1013 case isc_sockettype_udp: 1014 break; 1015 default: 1016 INSIST(0); 1017 } 1018 1019 if (sock->type == isc_sockettype_udp) { 1020 dev->address.length = msghdr.msg_namelen; 1021 if (isc_sockaddr_getport(&dev->address) == 0) { 1022 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1023 socket_log(sock, &dev->address, IOEVENT, 1024 "dropping source port zero packet"); 1025 } 1026 return (DOIO_SOFT); 1027 } 1028 } 1029 1030 socket_log(sock, &dev->address, IOEVENT, 1031 "packet received correctly"); 1032 1033 /* 1034 * Overflow bit detection. If we received MORE bytes than we should, 1035 * this indicates an overflow situation. Set the flag in the 1036 * dev entry and adjust how much we read by one. 1037 */ 1038 /* 1039 * If there are control messages attached, run through them and pull 1040 * out the interesting bits. 1041 */ 1042 process_cmsg(sock, &msghdr, dev); 1043 1044 /* 1045 * update the buffers (if any) and the i/o count 1046 */ 1047 dev->n += cc; 1048 actual_count = cc; 1049 buffer = ISC_LIST_HEAD(dev->bufferlist); 1050 while (buffer != NULL && actual_count > 0U) { 1051 REQUIRE(ISC_BUFFER_VALID(buffer)); 1052 if (isc_buffer_availablelength(buffer) <= actual_count) { 1053 actual_count -= isc_buffer_availablelength(buffer); 1054 isc_buffer_add(buffer, 1055 isc_buffer_availablelength(buffer)); 1056 } else { 1057 isc_buffer_add(buffer, actual_count); 1058 actual_count = 0; 1059 POST(actual_count); 1060 break; 1061 } 1062 buffer = ISC_LIST_NEXT(buffer, link); 1063 if (buffer == NULL) { 1064 INSIST(actual_count == 0U); 1065 } 1066 } 1067 1068 /* 1069 * If we read less than we expected, update counters, 1070 * and let the upper layer poke the descriptor. 1071 */ 1072 if (((size_t)cc != read_count) && (dev->n < dev->minimum)) 1073 return (DOIO_SOFT); 1074 1075 /* 1076 * Full reads are posted, or partials if partials are ok. 1077 */ 1078 dev->result = ISC_R_SUCCESS; 1079 return (DOIO_SUCCESS); 1080 } 1081 1082 /* 1083 * Returns: 1084 * DOIO_SUCCESS The operation succeeded. dev->result contains 1085 * ISC_R_SUCCESS. 1086 * 1087 * DOIO_HARD A hard or unexpected I/O error was encountered. 1088 * dev->result contains the appropriate error. 1089 * 1090 * DOIO_SOFT A soft I/O error was encountered. No senddone 1091 * event was sent. The operation should be retried. 1092 * 1093 * No other return values are possible. 1094 */ 1095 static int 1096 doio_send(isc__socket_t *sock, isc_socketevent_t *dev) { 1097 int cc; 1098 struct iovec iov[MAXSCATTERGATHER_SEND]; 1099 size_t write_count; 1100 struct msghdr msghdr; 1101 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1102 int attempts = 0; 1103 int send_errno; 1104 char strbuf[ISC_STRERRORSIZE]; 1105 char cmsgbuf[SENDCMSGBUFLEN] = {0}; 1106 1107 build_msghdr_send(sock, cmsgbuf, dev, &msghdr, iov, &write_count); 1108 1109 resend: 1110 cc = sendmsg(sock->fd, &msghdr, 0); 1111 send_errno = errno; 1112 1113 /* 1114 * Check for error or block condition. 1115 */ 1116 if (cc < 0) { 1117 if (send_errno == EINTR && ++attempts < NRETRIES) 1118 goto resend; 1119 1120 if (SOFT_ERROR(send_errno)) { 1121 if (errno == EWOULDBLOCK || errno == EAGAIN) 1122 dev->result = ISC_R_WOULDBLOCK; 1123 return (DOIO_SOFT); 1124 } 1125 1126 #define SOFT_OR_HARD(_system, _isc) \ 1127 if (send_errno == _system) { \ 1128 if (sock->connected) { \ 1129 dev->result = _isc; \ 1130 return (DOIO_HARD); \ 1131 } \ 1132 return (DOIO_SOFT); \ 1133 } 1134 #define ALWAYS_HARD(_system, _isc) \ 1135 if (send_errno == _system) { \ 1136 dev->result = _isc; \ 1137 return (DOIO_HARD); \ 1138 } 1139 1140 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 1141 ALWAYS_HARD(EACCES, ISC_R_NOPERM); 1142 ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 1143 ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 1144 ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 1145 ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); 1146 ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); 1147 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 1148 ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); 1149 ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); 1150 ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); 1151 1152 #undef SOFT_OR_HARD 1153 #undef ALWAYS_HARD 1154 1155 /* 1156 * The other error types depend on whether or not the 1157 * socket is UDP or TCP. If it is UDP, some errors 1158 * that we expect to be fatal under TCP are merely 1159 * annoying, and are really soft errors. 1160 * 1161 * However, these soft errors are still returned as 1162 * a status. 1163 */ 1164 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1165 isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1166 UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", 1167 addrbuf, strbuf); 1168 dev->result = isc__errno2result(send_errno); 1169 return (DOIO_HARD); 1170 } 1171 1172 if (cc == 0) { 1173 UNEXPECTED_ERROR(__FILE__, __LINE__, 1174 "doio_send: send() %s 0", "returned"); 1175 } 1176 1177 /* 1178 * If we write less than we expected, update counters, poke. 1179 */ 1180 dev->n += cc; 1181 if ((size_t)cc != write_count) 1182 return (DOIO_SOFT); 1183 1184 /* 1185 * Exactly what we wanted to write. We're done with this 1186 * entry. Post its completion event. 1187 */ 1188 dev->result = ISC_R_SUCCESS; 1189 return (DOIO_SUCCESS); 1190 } 1191 1192 /* 1193 * Kill. 1194 * 1195 * Caller must ensure that the socket is not locked and no external 1196 * references exist. 1197 */ 1198 static void 1199 socketclose(isc__socketmgr_t *manager, isc__socket_t *sock, int fd) { 1200 /* 1201 * No one has this socket open, so the watcher doesn't have to be 1202 * poked, and the socket doesn't have to be locked. 1203 */ 1204 manager->fds[fd] = NULL; 1205 manager->fdstate[fd] = CLOSE_PENDING; 1206 select_poke(manager, fd, SELECT_POKE_CLOSE); 1207 1208 if (sock->active == 1) { 1209 sock->active = 0; 1210 } 1211 1212 /* 1213 * update manager->maxfd here (XXX: this should be implemented more 1214 * efficiently) 1215 */ 1216 if (manager->maxfd == fd) { 1217 int i; 1218 1219 manager->maxfd = 0; 1220 for (i = fd - 1; i >= 0; i--) { 1221 if (manager->fdstate[i] == MANAGED) { 1222 manager->maxfd = i; 1223 break; 1224 } 1225 } 1226 } 1227 1228 } 1229 1230 static void 1231 destroy(isc__socket_t **sockp) { 1232 int fd; 1233 isc__socket_t *sock = *sockp; 1234 isc__socketmgr_t *manager = sock->manager; 1235 1236 socket_log(sock, NULL, CREATION, "destroying"); 1237 1238 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1239 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1240 INSIST(sock->connect_ev == NULL); 1241 INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks); 1242 1243 if (sock->fd >= 0) { 1244 fd = sock->fd; 1245 sock->fd = -1; 1246 socketclose(manager, sock, fd); 1247 } 1248 1249 ISC_LIST_UNLINK(manager->socklist, sock, link); 1250 1251 /* can't unlock manager as its memory context is still used */ 1252 free_socket(sockp); 1253 } 1254 1255 static isc_result_t 1256 allocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type, 1257 isc__socket_t **socketp) 1258 { 1259 isc__socket_t *sock; 1260 1261 sock = malloc(sizeof(*sock)); 1262 1263 if (sock == NULL) 1264 return (ISC_R_NOMEMORY); 1265 1266 sock->common.magic = 0; 1267 sock->common.impmagic = 0; 1268 sock->references = 0; 1269 1270 sock->manager = manager; 1271 sock->type = type; 1272 sock->fd = -1; 1273 sock->dscp = 0; /* TOS/TCLASS is zero until set. */ 1274 sock->active = 0; 1275 1276 ISC_LINK_INIT(sock, link); 1277 1278 /* 1279 * Set up list of readers and writers to be initially empty. 1280 */ 1281 ISC_LIST_INIT(sock->recv_list); 1282 ISC_LIST_INIT(sock->send_list); 1283 sock->connect_ev = NULL; 1284 sock->pending_recv = 0; 1285 sock->pending_send = 0; 1286 sock->connected = 0; 1287 sock->connecting = 0; 1288 sock->bound = 0; 1289 sock->pktdscp = 0; 1290 1291 /* 1292 * Initialize readable and writable events. 1293 */ 1294 ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t), 1295 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR, 1296 NULL, sock, sock, NULL); 1297 ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t), 1298 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW, 1299 NULL, sock, sock, NULL); 1300 1301 sock->common.magic = ISCAPI_SOCKET_MAGIC; 1302 sock->common.impmagic = SOCKET_MAGIC; 1303 *socketp = sock; 1304 1305 return (ISC_R_SUCCESS); 1306 } 1307 1308 /* 1309 * This event requires that the various lists be empty, that the reference 1310 * count be 1, and that the magic number is valid. The other socket bits, 1311 * like the lock, must be initialized as well. The fd associated must be 1312 * marked as closed, by setting it to -1 on close, or this routine will 1313 * also close the socket. 1314 */ 1315 static void 1316 free_socket(isc__socket_t **socketp) { 1317 isc__socket_t *sock = *socketp; 1318 1319 INSIST(VALID_SOCKET(sock)); 1320 INSIST(sock->references == 0); 1321 INSIST(!sock->connecting); 1322 INSIST(!sock->pending_recv); 1323 INSIST(!sock->pending_send); 1324 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1325 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1326 INSIST(!ISC_LINK_LINKED(sock, link)); 1327 1328 sock->common.magic = 0; 1329 sock->common.impmagic = 0; 1330 1331 free(sock); 1332 1333 *socketp = NULL; 1334 } 1335 1336 static void 1337 use_min_mtu(isc__socket_t *sock) { 1338 /* use minimum MTU */ 1339 if (sock->pf == AF_INET6) { 1340 int on = 1; 1341 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 1342 (void *)&on, sizeof(on)); 1343 } 1344 } 1345 1346 static void 1347 set_tcp_maxseg(isc__socket_t *sock, int size) { 1348 if (sock->type == isc_sockettype_tcp) 1349 (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG, 1350 (void *)&size, sizeof(size)); 1351 } 1352 1353 static isc_result_t 1354 opensocket(isc__socket_t *sock) 1355 { 1356 isc_result_t result; 1357 char strbuf[ISC_STRERRORSIZE]; 1358 const char *err = "socket"; 1359 int on = 1; 1360 1361 switch (sock->type) { 1362 case isc_sockettype_udp: 1363 sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); 1364 break; 1365 case isc_sockettype_tcp: 1366 sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 1367 break; 1368 } 1369 1370 if (sock->fd < 0) { 1371 switch (errno) { 1372 case EMFILE: 1373 case ENFILE: 1374 isc__strerror(errno, strbuf, sizeof(strbuf)); 1375 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 1376 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 1377 "%s: %s", err, strbuf); 1378 /* fallthrough */ 1379 case ENOBUFS: 1380 return (ISC_R_NORESOURCES); 1381 1382 case EPROTONOSUPPORT: 1383 case EPFNOSUPPORT: 1384 case EAFNOSUPPORT: 1385 /* 1386 * Linux 2.2 (and maybe others) return EINVAL instead of 1387 * EAFNOSUPPORT. 1388 */ 1389 case EINVAL: 1390 return (ISC_R_FAMILYNOSUPPORT); 1391 1392 default: 1393 isc__strerror(errno, strbuf, sizeof(strbuf)); 1394 UNEXPECTED_ERROR(__FILE__, __LINE__, 1395 "%s() %s: %s", err, "failed", 1396 strbuf); 1397 return (ISC_R_UNEXPECTED); 1398 } 1399 } 1400 1401 result = make_nonblock(sock->fd); 1402 if (result != ISC_R_SUCCESS) { 1403 (void)close(sock->fd); 1404 return (result); 1405 } 1406 1407 /* 1408 * Use minimum mtu if possible. 1409 */ 1410 if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) { 1411 use_min_mtu(sock); 1412 set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */ 1413 } 1414 1415 if (sock->type == isc_sockettype_udp) { 1416 1417 if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, 1418 (void *)&on, sizeof(on)) < 0 1419 && errno != ENOPROTOOPT) { 1420 isc__strerror(errno, strbuf, sizeof(strbuf)); 1421 UNEXPECTED_ERROR(__FILE__, __LINE__, 1422 "setsockopt(%d, SO_TIMESTAMP) %s: %s", 1423 sock->fd, "failed", strbuf); 1424 /* Press on... */ 1425 } 1426 1427 /* RFC 3542 */ 1428 if ((sock->pf == AF_INET6) 1429 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1430 (void *)&on, sizeof(on)) < 0)) { 1431 isc__strerror(errno, strbuf, sizeof(strbuf)); 1432 UNEXPECTED_ERROR(__FILE__, __LINE__, 1433 "setsockopt(%d, IPV6_RECVPKTINFO) " 1434 "%s: %s", sock->fd, "failed", 1435 strbuf); 1436 } 1437 } 1438 1439 if (sock->active == 0) { 1440 sock->active = 1; 1441 } 1442 1443 return (ISC_R_SUCCESS); 1444 } 1445 1446 /* 1447 * Create a 'type' socket managed 1448 * by 'manager'. Events will be posted to 'task' and when dispatched 1449 * 'action' will be called with 'arg' as the arg value. The new 1450 * socket is returned in 'socketp'. 1451 */ 1452 static isc_result_t 1453 socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1454 isc_socket_t **socketp) 1455 { 1456 isc__socket_t *sock = NULL; 1457 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 1458 isc_result_t result; 1459 int lockid; 1460 1461 REQUIRE(VALID_MANAGER(manager)); 1462 REQUIRE(socketp != NULL && *socketp == NULL); 1463 1464 result = allocate_socket(manager, type, &sock); 1465 if (result != ISC_R_SUCCESS) 1466 return (result); 1467 1468 switch (sock->type) { 1469 case isc_sockettype_udp: 1470 #define DCSPPKT(pf) ((pf == AF_INET) ? ISC_NET_DSCPPKTV4 : ISC_NET_DSCPPKTV6) 1471 sock->pktdscp = (isc_net_probedscp() & DCSPPKT(pf)) != 0; 1472 break; 1473 case isc_sockettype_tcp: 1474 break; 1475 default: 1476 INSIST(0); 1477 } 1478 1479 sock->pf = pf; 1480 1481 result = opensocket(sock); 1482 if (result != ISC_R_SUCCESS) { 1483 free_socket(&sock); 1484 return (result); 1485 } 1486 1487 sock->common.methods = (isc_socketmethods_t *)&socketmethods; 1488 sock->references = 1; 1489 *socketp = (isc_socket_t *)sock; 1490 1491 /* 1492 * Note we don't have to lock the socket like we normally would because 1493 * there are no external references to it yet. 1494 */ 1495 1496 lockid = FDLOCK_ID(sock->fd); 1497 manager->fds[sock->fd] = sock; 1498 manager->fdstate[sock->fd] = MANAGED; 1499 1500 ISC_LIST_APPEND(manager->socklist, sock, link); 1501 if (manager->maxfd < sock->fd) 1502 manager->maxfd = sock->fd; 1503 1504 socket_log(sock, NULL, CREATION, "created"); 1505 1506 return (ISC_R_SUCCESS); 1507 } 1508 1509 /*% 1510 * Create a new 'type' socket managed by 'manager'. Events 1511 * will be posted to 'task' and when dispatched 'action' will be 1512 * called with 'arg' as the arg value. The new socket is returned 1513 * in 'socketp'. 1514 */ 1515 isc_result_t 1516 isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1517 isc_socket_t **socketp) 1518 { 1519 return (socket_create(manager0, pf, type, socketp)); 1520 } 1521 1522 /* 1523 * Attach to a socket. Caller must explicitly detach when it is done. 1524 */ 1525 void 1526 isc__socket_attach(isc_socket_t *sock0, isc_socket_t **socketp) { 1527 isc__socket_t *sock = (isc__socket_t *)sock0; 1528 1529 REQUIRE(VALID_SOCKET(sock)); 1530 REQUIRE(socketp != NULL && *socketp == NULL); 1531 1532 sock->references++; 1533 1534 *socketp = (isc_socket_t *)sock; 1535 } 1536 1537 /* 1538 * Dereference a socket. If this is the last reference to it, clean things 1539 * up by destroying the socket. 1540 */ 1541 void 1542 isc__socket_detach(isc_socket_t **socketp) { 1543 isc__socket_t *sock; 1544 isc_boolean_t kill_socket = ISC_FALSE; 1545 1546 REQUIRE(socketp != NULL); 1547 sock = (isc__socket_t *)*socketp; 1548 REQUIRE(VALID_SOCKET(sock)); 1549 1550 REQUIRE(sock->references > 0); 1551 sock->references--; 1552 if (sock->references == 0) 1553 kill_socket = ISC_TRUE; 1554 1555 if (kill_socket) 1556 destroy(&sock); 1557 1558 *socketp = NULL; 1559 } 1560 1561 /* 1562 * I/O is possible on a given socket. Schedule an event to this task that 1563 * will call an internal function to do the I/O. This will charge the 1564 * task with the I/O operation and let our select loop handler get back 1565 * to doing something real as fast as possible. 1566 * 1567 * The socket and manager must be locked before calling this function. 1568 */ 1569 static void 1570 dispatch_recv(isc__socket_t *sock) { 1571 intev_t *iev; 1572 isc_socketevent_t *ev; 1573 isc_task_t *sender; 1574 1575 INSIST(!sock->pending_recv); 1576 1577 ev = ISC_LIST_HEAD(sock->recv_list); 1578 if (ev == NULL) 1579 return; 1580 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1581 "dispatch_recv: event %p -> task %p", 1582 ev, ev->ev_sender); 1583 sender = ev->ev_sender; 1584 1585 sock->pending_recv = 1; 1586 iev = &sock->readable_ev; 1587 1588 sock->references++; 1589 iev->ev_sender = sock; 1590 iev->ev_action = internal_recv; 1591 iev->ev_arg = sock; 1592 1593 isc_task_send(sender, (isc_event_t **)&iev); 1594 } 1595 1596 static void 1597 dispatch_send(isc__socket_t *sock) { 1598 intev_t *iev; 1599 isc_socketevent_t *ev; 1600 isc_task_t *sender; 1601 1602 INSIST(!sock->pending_send); 1603 1604 ev = ISC_LIST_HEAD(sock->send_list); 1605 if (ev == NULL) 1606 return; 1607 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1608 "dispatch_send: event %p -> task %p", 1609 ev, ev->ev_sender); 1610 sender = ev->ev_sender; 1611 1612 sock->pending_send = 1; 1613 iev = &sock->writable_ev; 1614 1615 sock->references++; 1616 iev->ev_sender = sock; 1617 iev->ev_action = internal_send; 1618 iev->ev_arg = sock; 1619 1620 isc_task_send(sender, (isc_event_t **)&iev); 1621 } 1622 1623 static void 1624 dispatch_connect(isc__socket_t *sock) { 1625 intev_t *iev; 1626 isc_socket_connev_t *ev; 1627 1628 iev = &sock->writable_ev; 1629 1630 ev = sock->connect_ev; 1631 INSIST(ev != NULL); /* XXX */ 1632 1633 INSIST(sock->connecting); 1634 1635 sock->references++; /* keep socket around for this internal event */ 1636 iev->ev_sender = sock; 1637 iev->ev_action = internal_connect; 1638 iev->ev_arg = sock; 1639 1640 isc_task_send(ev->ev_sender, (isc_event_t **)&iev); 1641 } 1642 1643 /* 1644 * Dequeue an item off the given socket's read queue, set the result code 1645 * in the done event to the one provided, and send it to the task it was 1646 * destined for. 1647 * 1648 * If the event to be sent is on a list, remove it before sending. If 1649 * asked to, send and detach from the socket as well. 1650 * 1651 * Caller must have the socket locked if the event is attached to the socket. 1652 */ 1653 static void 1654 send_recvdone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1655 isc_task_t *task; 1656 1657 task = (*dev)->ev_sender; 1658 1659 (*dev)->ev_sender = sock; 1660 1661 if (ISC_LINK_LINKED(*dev, ev_link)) 1662 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1663 1664 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1665 == ISC_SOCKEVENTATTR_ATTACHED) 1666 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1667 else 1668 isc_task_send(task, (isc_event_t **)dev); 1669 } 1670 1671 /* 1672 * See comments for send_recvdone_event() above. 1673 * 1674 * Caller must have the socket locked if the event is attached to the socket. 1675 */ 1676 static void 1677 send_senddone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1678 isc_task_t *task; 1679 1680 INSIST(dev != NULL && *dev != NULL); 1681 1682 task = (*dev)->ev_sender; 1683 (*dev)->ev_sender = sock; 1684 1685 if (ISC_LINK_LINKED(*dev, ev_link)) 1686 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1687 1688 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1689 == ISC_SOCKEVENTATTR_ATTACHED) 1690 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1691 else 1692 isc_task_send(task, (isc_event_t **)dev); 1693 } 1694 1695 static void 1696 internal_recv(isc_task_t *me, isc_event_t *ev) { 1697 isc_socketevent_t *dev; 1698 isc__socket_t *sock; 1699 1700 INSIST(ev->ev_type == ISC_SOCKEVENT_INTR); 1701 1702 sock = ev->ev_sender; 1703 INSIST(VALID_SOCKET(sock)); 1704 1705 socket_log(sock, NULL, IOEVENT, 1706 "internal_recv: task %p got event %p", me, ev); 1707 1708 INSIST(sock->pending_recv == 1); 1709 sock->pending_recv = 0; 1710 1711 INSIST(sock->references > 0); 1712 sock->references--; /* the internal event is done with this socket */ 1713 if (sock->references == 0) { 1714 destroy(&sock); 1715 return; 1716 } 1717 1718 /* 1719 * Try to do as much I/O as possible on this socket. There are no 1720 * limits here, currently. 1721 */ 1722 dev = ISC_LIST_HEAD(sock->recv_list); 1723 while (dev != NULL) { 1724 switch (doio_recv(sock, dev)) { 1725 case DOIO_SOFT: 1726 goto poke; 1727 1728 case DOIO_EOF: 1729 /* 1730 * read of 0 means the remote end was closed. 1731 * Run through the event queue and dispatch all 1732 * the events with an EOF result code. 1733 */ 1734 do { 1735 dev->result = ISC_R_EOF; 1736 send_recvdone_event(sock, &dev); 1737 dev = ISC_LIST_HEAD(sock->recv_list); 1738 } while (dev != NULL); 1739 goto poke; 1740 1741 case DOIO_SUCCESS: 1742 case DOIO_HARD: 1743 send_recvdone_event(sock, &dev); 1744 break; 1745 } 1746 1747 dev = ISC_LIST_HEAD(sock->recv_list); 1748 } 1749 1750 poke: 1751 if (!ISC_LIST_EMPTY(sock->recv_list)) 1752 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 1753 } 1754 1755 static void 1756 internal_send(isc_task_t *me, isc_event_t *ev) { 1757 isc_socketevent_t *dev; 1758 isc__socket_t *sock; 1759 1760 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 1761 1762 /* 1763 * Find out what socket this is and lock it. 1764 */ 1765 sock = (isc__socket_t *)ev->ev_sender; 1766 INSIST(VALID_SOCKET(sock)); 1767 socket_log(sock, NULL, IOEVENT, 1768 "internal_send: task %p got event %p", me, ev); 1769 1770 INSIST(sock->pending_send == 1); 1771 sock->pending_send = 0; 1772 1773 INSIST(sock->references > 0); 1774 sock->references--; /* the internal event is done with this socket */ 1775 if (sock->references == 0) { 1776 destroy(&sock); 1777 return; 1778 } 1779 1780 /* 1781 * Try to do as much I/O as possible on this socket. There are no 1782 * limits here, currently. 1783 */ 1784 dev = ISC_LIST_HEAD(sock->send_list); 1785 while (dev != NULL) { 1786 switch (doio_send(sock, dev)) { 1787 case DOIO_SOFT: 1788 goto poke; 1789 1790 case DOIO_HARD: 1791 case DOIO_SUCCESS: 1792 send_senddone_event(sock, &dev); 1793 break; 1794 } 1795 1796 dev = ISC_LIST_HEAD(sock->send_list); 1797 } 1798 1799 poke: 1800 if (!ISC_LIST_EMPTY(sock->send_list)) 1801 select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 1802 } 1803 1804 /* 1805 * Process read/writes on each fd here. Avoid locking 1806 * and unlocking twice if both reads and writes are possible. 1807 */ 1808 static void 1809 process_fd(isc__socketmgr_t *manager, int fd, isc_boolean_t readable, 1810 isc_boolean_t writeable) 1811 { 1812 isc__socket_t *sock; 1813 isc_boolean_t unwatch_read = ISC_FALSE, unwatch_write = ISC_FALSE; 1814 1815 /* 1816 * If the socket is going to be closed, don't do more I/O. 1817 */ 1818 if (manager->fdstate[fd] == CLOSE_PENDING) { 1819 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1820 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1821 return; 1822 } 1823 1824 sock = manager->fds[fd]; 1825 if (readable) { 1826 if (sock == NULL) { 1827 unwatch_read = ISC_TRUE; 1828 goto check_write; 1829 } 1830 if (!SOCK_DEAD(sock)) { 1831 dispatch_recv(sock); 1832 } 1833 unwatch_read = ISC_TRUE; 1834 } 1835 check_write: 1836 if (writeable) { 1837 if (sock == NULL) { 1838 unwatch_write = ISC_TRUE; 1839 goto unlock_fd; 1840 } 1841 if (!SOCK_DEAD(sock)) { 1842 if (sock->connecting) 1843 dispatch_connect(sock); 1844 else 1845 dispatch_send(sock); 1846 } 1847 unwatch_write = ISC_TRUE; 1848 } 1849 1850 unlock_fd: 1851 if (unwatch_read) 1852 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1853 if (unwatch_write) 1854 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1855 1856 } 1857 1858 static void 1859 process_fds(isc__socketmgr_t *manager, int maxfd, fd_set *readfds, 1860 fd_set *writefds) 1861 { 1862 int i; 1863 1864 REQUIRE(maxfd <= (int)manager->maxsocks); 1865 1866 for (i = 0; i < maxfd; i++) { 1867 process_fd(manager, i, FD_ISSET(i, readfds), 1868 FD_ISSET(i, writefds)); 1869 } 1870 } 1871 1872 /* 1873 * Create a new socket manager. 1874 */ 1875 1876 static isc_result_t 1877 setup_watcher(isc__socketmgr_t *manager) { 1878 isc_result_t result; 1879 1880 UNUSED(result); 1881 1882 manager->fd_bufsize = sizeof(fd_set); 1883 1884 manager->read_fds = NULL; 1885 manager->read_fds_copy = NULL; 1886 manager->write_fds = NULL; 1887 manager->write_fds_copy = NULL; 1888 1889 manager->read_fds = malloc(manager->fd_bufsize); 1890 if (manager->read_fds != NULL) 1891 manager->read_fds_copy = malloc(manager->fd_bufsize); 1892 if (manager->read_fds_copy != NULL) 1893 manager->write_fds = malloc(manager->fd_bufsize); 1894 if (manager->write_fds != NULL) { 1895 manager->write_fds_copy = malloc(manager->fd_bufsize); 1896 } 1897 if (manager->write_fds_copy == NULL) { 1898 if (manager->write_fds != NULL) { 1899 free(manager->write_fds); 1900 } 1901 if (manager->read_fds_copy != NULL) { 1902 free(manager->read_fds_copy); 1903 } 1904 if (manager->read_fds != NULL) { 1905 free(manager->read_fds); 1906 } 1907 return (ISC_R_NOMEMORY); 1908 } 1909 memset(manager->read_fds, 0, manager->fd_bufsize); 1910 memset(manager->write_fds, 0, manager->fd_bufsize); 1911 1912 manager->maxfd = 0; 1913 1914 return (ISC_R_SUCCESS); 1915 } 1916 1917 static void 1918 cleanup_watcher(isc__socketmgr_t *manager) { 1919 1920 if (manager->read_fds != NULL) 1921 free(manager->read_fds); 1922 if (manager->read_fds_copy != NULL) 1923 free(manager->read_fds_copy); 1924 if (manager->write_fds != NULL) 1925 free(manager->write_fds); 1926 if (manager->write_fds_copy != NULL) 1927 free(manager->write_fds_copy); 1928 } 1929 1930 isc_result_t 1931 isc__socketmgr_create(isc_socketmgr_t **managerp) { 1932 return (isc__socketmgr_create2(managerp, 0)); 1933 } 1934 1935 isc_result_t 1936 isc__socketmgr_create2(isc_socketmgr_t **managerp, 1937 unsigned int maxsocks) 1938 { 1939 isc__socketmgr_t *manager; 1940 isc_result_t result; 1941 1942 REQUIRE(managerp != NULL && *managerp == NULL); 1943 1944 if (socketmgr != NULL) { 1945 /* Don't allow maxsocks to be updated */ 1946 if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) 1947 return (ISC_R_EXISTS); 1948 1949 socketmgr->refs++; 1950 *managerp = (isc_socketmgr_t *)socketmgr; 1951 return (ISC_R_SUCCESS); 1952 } 1953 1954 if (maxsocks == 0) 1955 maxsocks = FD_SETSIZE; 1956 1957 manager = malloc(sizeof(*manager)); 1958 if (manager == NULL) 1959 return (ISC_R_NOMEMORY); 1960 1961 /* zero-clear so that necessary cleanup on failure will be easy */ 1962 memset(manager, 0, sizeof(*manager)); 1963 manager->maxsocks = maxsocks; 1964 manager->fds = malloc(manager->maxsocks * sizeof(isc__socket_t *)); 1965 if (manager->fds == NULL) { 1966 result = ISC_R_NOMEMORY; 1967 goto free_manager; 1968 } 1969 manager->fdstate = malloc(manager->maxsocks * sizeof(int)); 1970 if (manager->fdstate == NULL) { 1971 result = ISC_R_NOMEMORY; 1972 goto free_manager; 1973 } 1974 1975 manager->common.methods = &socketmgrmethods; 1976 manager->common.magic = ISCAPI_SOCKETMGR_MAGIC; 1977 manager->common.impmagic = SOCKET_MANAGER_MAGIC; 1978 memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); 1979 ISC_LIST_INIT(manager->socklist); 1980 1981 manager->refs = 1; 1982 1983 /* 1984 * Set up initial state for the select loop 1985 */ 1986 result = setup_watcher(manager); 1987 if (result != ISC_R_SUCCESS) 1988 goto cleanup; 1989 1990 memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); 1991 1992 socketmgr = manager; 1993 *managerp = (isc_socketmgr_t *)manager; 1994 1995 return (ISC_R_SUCCESS); 1996 1997 cleanup: 1998 1999 free_manager: 2000 if (manager->fdstate != NULL) { 2001 free(manager->fdstate); 2002 } 2003 if (manager->fds != NULL) { 2004 free(manager->fds); 2005 } 2006 free(manager); 2007 2008 return (result); 2009 } 2010 2011 void 2012 isc__socketmgr_destroy(isc_socketmgr_t **managerp) { 2013 isc__socketmgr_t *manager; 2014 int i; 2015 2016 /* 2017 * Destroy a socket manager. 2018 */ 2019 2020 REQUIRE(managerp != NULL); 2021 manager = (isc__socketmgr_t *)*managerp; 2022 REQUIRE(VALID_MANAGER(manager)); 2023 2024 manager->refs--; 2025 if (manager->refs > 0) { 2026 *managerp = NULL; 2027 return; 2028 } 2029 socketmgr = NULL; 2030 2031 /* 2032 * Wait for all sockets to be destroyed. 2033 */ 2034 while (!ISC_LIST_EMPTY(manager->socklist)) { 2035 isc__taskmgr_dispatch(NULL); 2036 } 2037 2038 /* 2039 * Here, poke our select/poll thread. Do this by closing the write 2040 * half of the pipe, which will send EOF to the read half. 2041 * This is currently a no-op in the non-threaded case. 2042 */ 2043 select_poke(manager, 0, SELECT_POKE_SHUTDOWN); 2044 2045 /* 2046 * Clean up. 2047 */ 2048 cleanup_watcher(manager); 2049 2050 for (i = 0; i < (int)manager->maxsocks; i++) 2051 if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ 2052 (void)close(i); 2053 2054 free(manager->fds); 2055 free(manager->fdstate); 2056 2057 manager->common.magic = 0; 2058 manager->common.impmagic = 0; 2059 free(manager); 2060 2061 *managerp = NULL; 2062 2063 socketmgr = NULL; 2064 } 2065 2066 static isc_result_t 2067 socket_recv(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2068 unsigned int flags) 2069 { 2070 int io_state; 2071 isc_task_t *ntask = NULL; 2072 isc_result_t result = ISC_R_SUCCESS; 2073 2074 dev->ev_sender = task; 2075 2076 if (sock->type == isc_sockettype_udp) { 2077 io_state = doio_recv(sock, dev); 2078 } else { 2079 if (ISC_LIST_EMPTY(sock->recv_list)) 2080 io_state = doio_recv(sock, dev); 2081 else 2082 io_state = DOIO_SOFT; 2083 } 2084 2085 switch (io_state) { 2086 case DOIO_SOFT: 2087 /* 2088 * We couldn't read all or part of the request right now, so 2089 * queue it. 2090 * 2091 * Attach to socket and to task 2092 */ 2093 isc_task_attach(task, &ntask); 2094 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2095 2096 /* 2097 * Enqueue the request. If the socket was previously not being 2098 * watched, poke the watcher to start paying attention to it. 2099 */ 2100 if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) 2101 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 2102 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2103 2104 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2105 "socket_recv: event %p -> task %p", 2106 dev, ntask); 2107 2108 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2109 result = ISC_R_INPROGRESS; 2110 break; 2111 2112 case DOIO_EOF: 2113 dev->result = ISC_R_EOF; 2114 /* fallthrough */ 2115 2116 case DOIO_HARD: 2117 case DOIO_SUCCESS: 2118 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2119 send_recvdone_event(sock, &dev); 2120 break; 2121 } 2122 2123 return (result); 2124 } 2125 2126 isc_result_t 2127 isc__socket_recvv(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2128 unsigned int minimum, isc_task_t *task, 2129 isc_taskaction_t action, void *arg) 2130 { 2131 isc__socket_t *sock = (isc__socket_t *)sock0; 2132 isc_socketevent_t *dev; 2133 isc__socketmgr_t *manager; 2134 unsigned int iocount; 2135 isc_buffer_t *buffer; 2136 2137 REQUIRE(VALID_SOCKET(sock)); 2138 REQUIRE(buflist != NULL); 2139 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2140 REQUIRE(task != NULL); 2141 REQUIRE(action != NULL); 2142 2143 manager = sock->manager; 2144 REQUIRE(VALID_MANAGER(manager)); 2145 2146 iocount = isc_bufferlist_availablecount(buflist); 2147 REQUIRE(iocount > 0); 2148 2149 INSIST(sock->bound); 2150 2151 dev = allocate_socketevent(sock, 2152 ISC_SOCKEVENT_RECVDONE, action, arg); 2153 if (dev == NULL) 2154 return (ISC_R_NOMEMORY); 2155 2156 /* 2157 * UDP sockets are always partial read 2158 */ 2159 if (sock->type == isc_sockettype_udp) 2160 dev->minimum = 1; 2161 else { 2162 if (minimum == 0) 2163 dev->minimum = iocount; 2164 else 2165 dev->minimum = minimum; 2166 } 2167 2168 /* 2169 * Move each buffer from the passed in list to our internal one. 2170 */ 2171 buffer = ISC_LIST_HEAD(*buflist); 2172 while (buffer != NULL) { 2173 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2174 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2175 buffer = ISC_LIST_HEAD(*buflist); 2176 } 2177 2178 return (socket_recv(sock, dev, task, 0)); 2179 } 2180 2181 static isc_result_t 2182 socket_send(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2183 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2184 unsigned int flags) 2185 { 2186 int io_state; 2187 isc_task_t *ntask = NULL; 2188 isc_result_t result = ISC_R_SUCCESS; 2189 2190 dev->ev_sender = task; 2191 2192 set_dev_address(address, sock, dev); 2193 if (pktinfo != NULL) { 2194 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 2195 dev->pktinfo = *pktinfo; 2196 2197 if (!isc_sockaddr_issitelocal(&dev->address) && 2198 !isc_sockaddr_islinklocal(&dev->address)) { 2199 socket_log(sock, NULL, TRACE, 2200 "pktinfo structure provided, ifindex %u " 2201 "(set to 0)", pktinfo->ipi6_ifindex); 2202 2203 /* 2204 * Set the pktinfo index to 0 here, to let the 2205 * kernel decide what interface it should send on. 2206 */ 2207 dev->pktinfo.ipi6_ifindex = 0; 2208 } 2209 } 2210 2211 if (sock->type == isc_sockettype_udp) 2212 io_state = doio_send(sock, dev); 2213 else { 2214 if (ISC_LIST_EMPTY(sock->send_list)) 2215 io_state = doio_send(sock, dev); 2216 else 2217 io_state = DOIO_SOFT; 2218 } 2219 2220 switch (io_state) { 2221 case DOIO_SOFT: 2222 /* 2223 * We couldn't send all or part of the request right now, so 2224 * queue it unless ISC_SOCKFLAG_NORETRY is set. 2225 */ 2226 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 2227 isc_task_attach(task, &ntask); 2228 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2229 2230 /* 2231 * Enqueue the request. If the socket was previously 2232 * not being watched, poke the watcher to start 2233 * paying attention to it. 2234 */ 2235 if (ISC_LIST_EMPTY(sock->send_list) && 2236 !sock->pending_send) 2237 select_poke(sock->manager, sock->fd, 2238 SELECT_POKE_WRITE); 2239 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 2240 2241 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2242 "socket_send: event %p -> task %p", 2243 dev, ntask); 2244 2245 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2246 result = ISC_R_INPROGRESS; 2247 break; 2248 } 2249 2250 /* FALLTHROUGH */ 2251 2252 case DOIO_HARD: 2253 case DOIO_SUCCESS: 2254 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2255 send_senddone_event(sock, &dev); 2256 break; 2257 } 2258 2259 return (result); 2260 } 2261 2262 isc_result_t 2263 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2264 isc_task_t *task, isc_taskaction_t action, void *arg) 2265 { 2266 return (isc__socket_sendtov2(sock, buflist, task, action, arg, NULL, 2267 NULL, 0)); 2268 } 2269 2270 isc_result_t 2271 isc__socket_sendtov2(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2272 isc_task_t *task, isc_taskaction_t action, void *arg, 2273 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2274 unsigned int flags) 2275 { 2276 isc__socket_t *sock = (isc__socket_t *)sock0; 2277 isc_socketevent_t *dev; 2278 isc__socketmgr_t *manager; 2279 unsigned int iocount; 2280 isc_buffer_t *buffer; 2281 2282 REQUIRE(VALID_SOCKET(sock)); 2283 REQUIRE(buflist != NULL); 2284 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2285 REQUIRE(task != NULL); 2286 REQUIRE(action != NULL); 2287 2288 manager = sock->manager; 2289 REQUIRE(VALID_MANAGER(manager)); 2290 2291 iocount = isc_bufferlist_usedcount(buflist); 2292 REQUIRE(iocount > 0); 2293 2294 dev = allocate_socketevent(sock, 2295 ISC_SOCKEVENT_SENDDONE, action, arg); 2296 if (dev == NULL) 2297 return (ISC_R_NOMEMORY); 2298 2299 /* 2300 * Move each buffer from the passed in list to our internal one. 2301 */ 2302 buffer = ISC_LIST_HEAD(*buflist); 2303 while (buffer != NULL) { 2304 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2305 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2306 buffer = ISC_LIST_HEAD(*buflist); 2307 } 2308 2309 return (socket_send(sock, dev, task, address, pktinfo, flags)); 2310 } 2311 2312 isc_result_t 2313 isc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr, 2314 unsigned int options) { 2315 isc__socket_t *sock = (isc__socket_t *)sock0; 2316 char strbuf[ISC_STRERRORSIZE]; 2317 int on = 1; 2318 2319 REQUIRE(VALID_SOCKET(sock)); 2320 2321 INSIST(!sock->bound); 2322 2323 if (sock->pf != sockaddr->type.sa.sa_family) { 2324 return (ISC_R_FAMILYMISMATCH); 2325 } 2326 2327 /* 2328 * Only set SO_REUSEADDR when we want a specific port. 2329 */ 2330 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 2331 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 2332 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 2333 sizeof(on)) < 0) { 2334 UNEXPECTED_ERROR(__FILE__, __LINE__, 2335 "setsockopt(%d) %s", sock->fd, "failed"); 2336 /* Press on... */ 2337 } 2338 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 2339 switch (errno) { 2340 case EACCES: 2341 return (ISC_R_NOPERM); 2342 case EADDRNOTAVAIL: 2343 return (ISC_R_ADDRNOTAVAIL); 2344 case EADDRINUSE: 2345 return (ISC_R_ADDRINUSE); 2346 case EINVAL: 2347 return (ISC_R_BOUND); 2348 default: 2349 isc__strerror(errno, strbuf, sizeof(strbuf)); 2350 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 2351 strbuf); 2352 return (ISC_R_UNEXPECTED); 2353 } 2354 } 2355 2356 socket_log(sock, sockaddr, TRACE, "bound"); 2357 sock->bound = 1; 2358 2359 return (ISC_R_SUCCESS); 2360 } 2361 2362 isc_result_t 2363 isc__socket_connect(isc_socket_t *sock0, isc_sockaddr_t *addr, 2364 isc_task_t *task, isc_taskaction_t action, void *arg) 2365 { 2366 isc__socket_t *sock = (isc__socket_t *)sock0; 2367 isc_socket_connev_t *dev; 2368 isc_task_t *ntask = NULL; 2369 isc__socketmgr_t *manager; 2370 int cc; 2371 char strbuf[ISC_STRERRORSIZE]; 2372 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 2373 2374 REQUIRE(VALID_SOCKET(sock)); 2375 REQUIRE(addr != NULL); 2376 REQUIRE(task != NULL); 2377 REQUIRE(action != NULL); 2378 2379 manager = sock->manager; 2380 REQUIRE(VALID_MANAGER(manager)); 2381 REQUIRE(addr != NULL); 2382 2383 if (isc_sockaddr_ismulticast(addr)) 2384 return (ISC_R_MULTICAST); 2385 2386 REQUIRE(!sock->connecting); 2387 2388 dev = (isc_socket_connev_t *)isc_event_allocate(sock, 2389 ISC_SOCKEVENT_CONNECT, 2390 action, arg, 2391 sizeof(*dev)); 2392 if (dev == NULL) { 2393 return (ISC_R_NOMEMORY); 2394 } 2395 ISC_LINK_INIT(dev, ev_link); 2396 2397 /* 2398 * Try to do the connect right away, as there can be only one 2399 * outstanding, and it might happen to complete. 2400 */ 2401 sock->peer_address = *addr; 2402 cc = connect(sock->fd, &addr->type.sa, addr->length); 2403 if (cc < 0) { 2404 /* 2405 * HP-UX "fails" to connect a UDP socket and sets errno to 2406 * EINPROGRESS if it's non-blocking. We'd rather regard this as 2407 * a success and let the user detect it if it's really an error 2408 * at the time of sending a packet on the socket. 2409 */ 2410 if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { 2411 cc = 0; 2412 goto success; 2413 } 2414 if (SOFT_ERROR(errno) || errno == EINPROGRESS) 2415 goto queue; 2416 2417 switch (errno) { 2418 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit; 2419 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2420 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2421 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2422 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2423 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2424 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2425 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2426 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2427 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2428 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2429 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2430 #undef ERROR_MATCH 2431 } 2432 2433 sock->connected = 0; 2434 2435 isc__strerror(errno, strbuf, sizeof(strbuf)); 2436 isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); 2437 UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", 2438 addrbuf, errno, strbuf); 2439 2440 isc_event_free(ISC_EVENT_PTR(&dev)); 2441 return (ISC_R_UNEXPECTED); 2442 2443 err_exit: 2444 sock->connected = 0; 2445 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2446 2447 return (ISC_R_SUCCESS); 2448 } 2449 2450 /* 2451 * If connect completed, fire off the done event. 2452 */ 2453 success: 2454 if (cc == 0) { 2455 sock->connected = 1; 2456 sock->bound = 1; 2457 dev->result = ISC_R_SUCCESS; 2458 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2459 2460 return (ISC_R_SUCCESS); 2461 } 2462 2463 queue: 2464 2465 /* 2466 * Attach to task. 2467 */ 2468 isc_task_attach(task, &ntask); 2469 2470 sock->connecting = 1; 2471 2472 dev->ev_sender = ntask; 2473 2474 /* 2475 * Poke watcher here. We still have the socket locked, so there 2476 * is no race condition. We will keep the lock for such a short 2477 * bit of time waking it up now or later won't matter all that much. 2478 */ 2479 if (sock->connect_ev == NULL) 2480 select_poke(manager, sock->fd, SELECT_POKE_CONNECT); 2481 2482 sock->connect_ev = dev; 2483 2484 return (ISC_R_SUCCESS); 2485 } 2486 2487 /* 2488 * Called when a socket with a pending connect() finishes. 2489 */ 2490 static void 2491 internal_connect(isc_task_t *me, isc_event_t *ev) { 2492 isc__socket_t *sock; 2493 isc_socket_connev_t *dev; 2494 isc_task_t *task; 2495 int cc; 2496 socklen_t optlen; 2497 char strbuf[ISC_STRERRORSIZE]; 2498 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 2499 2500 UNUSED(me); 2501 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 2502 2503 sock = ev->ev_sender; 2504 INSIST(VALID_SOCKET(sock)); 2505 2506 /* 2507 * When the internal event was sent the reference count was bumped 2508 * to keep the socket around for us. Decrement the count here. 2509 */ 2510 INSIST(sock->references > 0); 2511 sock->references--; 2512 if (sock->references == 0) { 2513 destroy(&sock); 2514 return; 2515 } 2516 2517 /* 2518 * Has this event been canceled? 2519 */ 2520 dev = sock->connect_ev; 2521 if (dev == NULL) { 2522 INSIST(!sock->connecting); 2523 return; 2524 } 2525 2526 INSIST(sock->connecting); 2527 sock->connecting = 0; 2528 2529 /* 2530 * Get any possible error status here. 2531 */ 2532 optlen = sizeof(cc); 2533 if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, 2534 (void *)&cc, (void *)&optlen) < 0) 2535 cc = errno; 2536 else 2537 errno = cc; 2538 2539 if (errno != 0) { 2540 /* 2541 * If the error is EAGAIN, just re-select on this 2542 * fd and pretend nothing strange happened. 2543 */ 2544 if (SOFT_ERROR(errno) || errno == EINPROGRESS) { 2545 sock->connecting = 1; 2546 select_poke(sock->manager, sock->fd, 2547 SELECT_POKE_CONNECT); 2548 return; 2549 } 2550 2551 2552 /* 2553 * Translate other errors into ISC_R_* flavors. 2554 */ 2555 switch (errno) { 2556 #define ERROR_MATCH(a, b) case a: dev->result = b; break; 2557 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2558 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2559 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2560 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2561 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2562 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2563 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2564 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2565 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2566 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2567 ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); 2568 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2569 #undef ERROR_MATCH 2570 default: 2571 dev->result = ISC_R_UNEXPECTED; 2572 isc_sockaddr_format(&sock->peer_address, peerbuf, 2573 sizeof(peerbuf)); 2574 isc__strerror(errno, strbuf, sizeof(strbuf)); 2575 UNEXPECTED_ERROR(__FILE__, __LINE__, 2576 "internal_connect: connect(%s) %s", 2577 peerbuf, strbuf); 2578 } 2579 } else { 2580 dev->result = ISC_R_SUCCESS; 2581 sock->connected = 1; 2582 sock->bound = 1; 2583 } 2584 2585 sock->connect_ev = NULL; 2586 2587 task = dev->ev_sender; 2588 dev->ev_sender = sock; 2589 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev)); 2590 } 2591 2592 /* 2593 * Run through the list of events on this socket, and cancel the ones 2594 * queued for task "task" of type "how". "how" is a bitmask. 2595 */ 2596 void 2597 isc__socket_cancel(isc_socket_t *sock0, isc_task_t *task, unsigned int how) { 2598 isc__socket_t *sock = (isc__socket_t *)sock0; 2599 2600 REQUIRE(VALID_SOCKET(sock)); 2601 2602 /* 2603 * Quick exit if there is nothing to do. Don't even bother locking 2604 * in this case. 2605 */ 2606 if (how == 0) 2607 return; 2608 2609 /* 2610 * All of these do the same thing, more or less. 2611 * Each will: 2612 * o If the internal event is marked as "posted" try to 2613 * remove it from the task's queue. If this fails, mark it 2614 * as canceled instead, and let the task clean it up later. 2615 * o For each I/O request for that task of that type, post 2616 * its done event with status of "ISC_R_CANCELED". 2617 * o Reset any state needed. 2618 */ 2619 if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) 2620 && !ISC_LIST_EMPTY(sock->recv_list)) { 2621 isc_socketevent_t *dev; 2622 isc_socketevent_t *next; 2623 isc_task_t *current_task; 2624 2625 dev = ISC_LIST_HEAD(sock->recv_list); 2626 2627 while (dev != NULL) { 2628 current_task = dev->ev_sender; 2629 next = ISC_LIST_NEXT(dev, ev_link); 2630 2631 if ((task == NULL) || (task == current_task)) { 2632 dev->result = ISC_R_CANCELED; 2633 send_recvdone_event(sock, &dev); 2634 } 2635 dev = next; 2636 } 2637 } 2638 2639 if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) 2640 && !ISC_LIST_EMPTY(sock->send_list)) { 2641 isc_socketevent_t *dev; 2642 isc_socketevent_t *next; 2643 isc_task_t *current_task; 2644 2645 dev = ISC_LIST_HEAD(sock->send_list); 2646 2647 while (dev != NULL) { 2648 current_task = dev->ev_sender; 2649 next = ISC_LIST_NEXT(dev, ev_link); 2650 2651 if ((task == NULL) || (task == current_task)) { 2652 dev->result = ISC_R_CANCELED; 2653 send_senddone_event(sock, &dev); 2654 } 2655 dev = next; 2656 } 2657 } 2658 2659 /* 2660 * Connecting is not a list. 2661 */ 2662 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 2663 && sock->connect_ev != NULL) { 2664 isc_socket_connev_t *dev; 2665 isc_task_t *current_task; 2666 2667 INSIST(sock->connecting); 2668 sock->connecting = 0; 2669 2670 dev = sock->connect_ev; 2671 current_task = dev->ev_sender; 2672 2673 if ((task == NULL) || (task == current_task)) { 2674 sock->connect_ev = NULL; 2675 2676 dev->result = ISC_R_CANCELED; 2677 dev->ev_sender = sock; 2678 isc_task_sendanddetach(¤t_task, 2679 ISC_EVENT_PTR(&dev)); 2680 } 2681 } 2682 2683 } 2684 2685 /* 2686 * In our assumed scenario, we can simply use a single static object. 2687 * XXX: this is not true if the application uses multiple threads with 2688 * 'multi-context' mode. Fixing this is a future TODO item. 2689 */ 2690 static isc_socketwait_t swait_private; 2691 2692 int 2693 isc__socketmgr_waitevents(isc_socketmgr_t *manager0, struct timeval *tvp, 2694 isc_socketwait_t **swaitp) 2695 { 2696 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2697 int n; 2698 2699 REQUIRE(swaitp != NULL && *swaitp == NULL); 2700 2701 if (manager == NULL) 2702 manager = socketmgr; 2703 if (manager == NULL) 2704 return (0); 2705 2706 memmove(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); 2707 memmove(manager->write_fds_copy, manager->write_fds, 2708 manager->fd_bufsize); 2709 2710 swait_private.readset = manager->read_fds_copy; 2711 swait_private.writeset = manager->write_fds_copy; 2712 swait_private.maxfd = manager->maxfd + 1; 2713 2714 n = select(swait_private.maxfd, swait_private.readset, 2715 swait_private.writeset, NULL, tvp); 2716 2717 *swaitp = &swait_private; 2718 return (n); 2719 } 2720 2721 isc_result_t 2722 isc__socketmgr_dispatch(isc_socketmgr_t *manager0, isc_socketwait_t *swait) { 2723 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2724 2725 REQUIRE(swait == &swait_private); 2726 2727 if (manager == NULL) 2728 manager = socketmgr; 2729 if (manager == NULL) 2730 return (ISC_R_NOTFOUND); 2731 2732 process_fds(manager, swait->maxfd, swait->readset, swait->writeset); 2733 return (ISC_R_SUCCESS); 2734 } 2735 2736 #include "../socket_api.c" 2737