1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /*! \file */ 18 19 #include <sys/param.h> 20 #include <sys/types.h> 21 #include <sys/event.h> 22 #include <sys/socket.h> 23 #include <sys/stat.h> 24 #include <sys/time.h> 25 #include <sys/uio.h> 26 #include <sys/un.h> 27 28 #include <netinet/tcp.h> 29 30 #include <errno.h> 31 #include <fcntl.h> 32 #include <stddef.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 #include <inttypes.h> /* uintptr_t */ 37 38 #include <isc/buffer.h> 39 #include <isc/bufferlist.h> 40 #include <isc/formatcheck.h> 41 #include <isc/list.h> 42 #include <isc/log.h> 43 #include <isc/msgs.h> 44 #include <isc/net.h> 45 #include <isc/region.h> 46 #include <isc/socket.h> 47 #include <isc/strerror.h> 48 #include <isc/task.h> 49 #include <isc/util.h> 50 51 #include "errno2result.h" 52 53 #include "socket_p.h" 54 #include "../task_p.h" 55 56 struct isc_socketwait { 57 fd_set *readset; 58 fd_set *writeset; 59 int nfds; 60 int maxfd; 61 }; 62 63 /* 64 * Set by the -T dscp option on the command line. If set to a value 65 * other than -1, we check to make sure DSCP values match it, and 66 * assert if not. 67 */ 68 int isc_dscp_check_value = -1; 69 70 /*% 71 * Size of per-FD lock buckets. 72 */ 73 #define FDLOCK_COUNT 1 74 #define FDLOCK_ID(fd) 0 75 76 /*% 77 * Some systems define the socket length argument as an int, some as size_t, 78 * some as socklen_t. This is here so it can be easily changed if needed. 79 */ 80 81 /*% 82 * Define what the possible "soft" errors can be. These are non-fatal returns 83 * of various network related functions, like recv() and so on. 84 * 85 * For some reason, BSDI (and perhaps others) will sometimes return <0 86 * from recv() but will have errno==0. This is broken, but we have to 87 * work around it here. 88 */ 89 #define SOFT_ERROR(e) ((e) == EAGAIN || \ 90 (e) == EWOULDBLOCK || \ 91 (e) == EINTR || \ 92 (e) == 0) 93 94 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 95 96 /*!< 97 * DLVL(90) -- Function entry/exit and other tracing. 98 * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 99 * DLVL(60) -- Socket data send/receive 100 * DLVL(50) -- Event tracing, including receiving/sending completion events. 101 * DLVL(20) -- Socket creation/destruction. 102 */ 103 #define TRACE_LEVEL 90 104 #define CORRECTNESS_LEVEL 70 105 #define IOEVENT_LEVEL 60 106 #define EVENT_LEVEL 50 107 #define CREATION_LEVEL 20 108 109 #define TRACE DLVL(TRACE_LEVEL) 110 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 111 #define IOEVENT DLVL(IOEVENT_LEVEL) 112 #define EVENT DLVL(EVENT_LEVEL) 113 #define CREATION DLVL(CREATION_LEVEL) 114 115 typedef isc_event_t intev_t; 116 117 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 118 #define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) 119 120 /*! 121 * IPv6 control information. If the socket is an IPv6 socket we want 122 * to collect the destination address and interface so the client can 123 * set them on outgoing packets. 124 */ 125 126 /*% 127 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have 128 * a setsockopt() like interface to request timestamps, and if the OS 129 * doesn't do it for us, call gettimeofday() on every UDP receive? 130 */ 131 132 /*% 133 * The size to raise the receive buffer to (from BIND 8). 134 */ 135 #define RCVBUFSIZE (32*1024) 136 137 /*% 138 * Instead of calculating the cmsgbuf lengths every time we take 139 * a rule of thumb approach - sizes are taken from x86_64 linux, 140 * multiplied by 2, everything should fit. Those sizes are not 141 * large enough to cause any concern. 142 */ 143 #define CMSG_SP_IN6PKT 40 144 145 #define CMSG_SP_TIMESTAMP 32 146 147 #define CMSG_SP_TCTOS 24 148 149 #define CMSG_SP_INT 24 150 151 #define RECVCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS)+1) 152 #define SENDCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS)+1) 153 154 /*% 155 * The number of times a send operation is repeated if the result is EINTR. 156 */ 157 #define NRETRIES 10 158 159 typedef struct isc__socket isc__socket_t; 160 typedef struct isc__socketmgr isc__socketmgr_t; 161 162 #define NEWCONNSOCK(ev) ((isc__socket_t *)(ev)->newsocket) 163 164 struct isc__socket { 165 /* Not locked. */ 166 isc_socket_t common; 167 isc__socketmgr_t *manager; 168 isc_sockettype_t type; 169 170 /* Locked by socket lock. */ 171 ISC_LINK(isc__socket_t) link; 172 unsigned int references; 173 int fd; 174 int pf; 175 176 ISC_LIST(isc_socketevent_t) send_list; 177 ISC_LIST(isc_socketevent_t) recv_list; 178 isc_socket_connev_t *connect_ev; 179 180 /* 181 * Internal events. Posted when a descriptor is readable or 182 * writable. These are statically allocated and never freed. 183 * They will be set to non-purgable before use. 184 */ 185 intev_t readable_ev; 186 intev_t writable_ev; 187 188 isc_sockaddr_t peer_address; /* remote address */ 189 190 unsigned int pending_recv : 1, 191 pending_send : 1, 192 connected : 1, 193 connecting : 1, /* connect pending */ 194 bound : 1, /* bound to local addr */ 195 active : 1, /* currently active */ 196 pktdscp : 1; /* per packet dscp */ 197 unsigned int dscp; 198 }; 199 200 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 201 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 202 203 struct isc__socketmgr { 204 /* Not locked. */ 205 isc_socketmgr_t common; 206 int fd_bufsize; 207 unsigned int maxsocks; 208 209 isc__socket_t **fds; 210 int *fdstate; 211 212 /* Locked by manager lock. */ 213 ISC_LIST(isc__socket_t) socklist; 214 fd_set *read_fds; 215 fd_set *read_fds_copy; 216 fd_set *write_fds; 217 fd_set *write_fds_copy; 218 int maxfd; 219 unsigned int refs; 220 }; 221 222 static isc__socketmgr_t *socketmgr = NULL; 223 224 #define CLOSED 0 /* this one must be zero */ 225 #define MANAGED 1 226 #define CLOSE_PENDING 2 227 228 /* 229 * send() and recv() iovec counts 230 */ 231 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 232 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 233 234 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, 235 isc_sockettype_t type, 236 isc_socket_t **socketp); 237 static void send_recvdone_event(isc__socket_t *, isc_socketevent_t **); 238 static void send_senddone_event(isc__socket_t *, isc_socketevent_t **); 239 static void free_socket(isc__socket_t **); 240 static isc_result_t allocate_socket(isc__socketmgr_t *, isc_sockettype_t, 241 isc__socket_t **); 242 static void destroy(isc__socket_t **); 243 static void internal_connect(isc_task_t *, isc_event_t *); 244 static void internal_recv(isc_task_t *, isc_event_t *); 245 static void internal_send(isc_task_t *, isc_event_t *); 246 static void process_cmsg(isc__socket_t *, struct msghdr *, isc_socketevent_t *); 247 static void build_msghdr_send(isc__socket_t *, char *, isc_socketevent_t *, 248 struct msghdr *, struct iovec *, size_t *); 249 static void build_msghdr_recv(isc__socket_t *, char *, isc_socketevent_t *, 250 struct msghdr *, struct iovec *, size_t *); 251 252 /*% 253 * The following are intended for internal use (indicated by "isc__" 254 * prefix) but are not declared as static, allowing direct access from 255 * unit tests etc. 256 */ 257 258 isc_result_t 259 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 260 isc_socket_t **socketp); 261 void 262 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp); 263 void 264 isc__socket_detach(isc_socket_t **socketp); 265 isc_result_t 266 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 267 unsigned int minimum, isc_task_t *task, 268 isc_taskaction_t action, void *arg); 269 isc_result_t 270 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 271 isc_task_t *task, isc_taskaction_t action, void *arg); 272 isc_result_t 273 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist, 274 isc_task_t *task, isc_taskaction_t action, void *arg, 275 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 276 unsigned int flags); 277 isc_result_t 278 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 279 unsigned int options); 280 isc_result_t 281 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 282 isc_task_t *task, isc_taskaction_t action, 283 void *arg); 284 void 285 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); 286 287 isc_result_t 288 isc__socketmgr_create(isc_socketmgr_t **managerp); 289 isc_result_t 290 isc__socketmgr_create2(isc_socketmgr_t **managerp, 291 unsigned int maxsocks); 292 isc_result_t 293 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager0, unsigned int *nsockp); 294 void 295 isc__socketmgr_destroy(isc_socketmgr_t **managerp); 296 297 static struct { 298 isc_socketmethods_t methods; 299 300 /*% 301 * The following are defined just for avoiding unused static functions. 302 */ 303 void *recvv, *sendv; 304 } socketmethods = { 305 { 306 isc__socket_attach, 307 isc__socket_detach, 308 isc__socket_bind, 309 isc__socket_connect, 310 isc__socket_cancel, 311 }, 312 (void *)isc__socket_recvv, 313 (void *)isc__socket_sendv, 314 }; 315 316 static isc_socketmgrmethods_t socketmgrmethods = { 317 isc__socketmgr_destroy, 318 isc__socket_create 319 }; 320 321 #define SELECT_POKE_SHUTDOWN (-1) 322 #define SELECT_POKE_NOTHING (-2) 323 #define SELECT_POKE_READ (-3) 324 #define SELECT_POKE_ACCEPT (-3) /*%< Same as _READ */ 325 #define SELECT_POKE_WRITE (-4) 326 #define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ 327 #define SELECT_POKE_CLOSE (-5) 328 329 #define SOCK_DEAD(s) ((s)->references == 0) 330 331 /*% 332 * Shortcut index arrays to get access to statistics counters. 333 */ 334 enum { 335 STATID_OPEN = 0, 336 STATID_OPENFAIL = 1, 337 STATID_CLOSE = 2, 338 STATID_BINDFAIL = 3, 339 STATID_CONNECTFAIL = 4, 340 STATID_CONNECT = 5, 341 STATID_ACCEPTFAIL = 6, 342 STATID_ACCEPT = 7, 343 STATID_SENDFAIL = 8, 344 STATID_RECVFAIL = 9, 345 STATID_ACTIVE = 10 346 }; 347 348 349 static void 350 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 351 isc_logcategory_t *category, isc_logmodule_t *module, int level, 352 const char *fmt, ...) ISC_FORMAT_PRINTF(6, 7); 353 static void 354 socket_log(isc__socket_t *sock, isc_sockaddr_t *address, 355 isc_logcategory_t *category, isc_logmodule_t *module, int level, 356 const char *fmt, ...) 357 { 358 char msgbuf[2048]; 359 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 360 va_list ap; 361 362 if (! isc_log_wouldlog(isc_lctx, level)) 363 return; 364 365 va_start(ap, fmt); 366 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 367 va_end(ap); 368 369 if (address == NULL) { 370 isc_log_write(isc_lctx, category, module, level, 371 "socket %p: %s", sock, msgbuf); 372 } else { 373 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 374 isc_log_write(isc_lctx, category, module, level, 375 "socket %p %s: %s", sock, peerbuf, msgbuf); 376 } 377 } 378 379 static inline isc_result_t 380 watch_fd(isc__socketmgr_t *manager, int fd, int msg) { 381 isc_result_t result = ISC_R_SUCCESS; 382 383 if (msg == SELECT_POKE_READ) 384 FD_SET(fd, manager->read_fds); 385 if (msg == SELECT_POKE_WRITE) 386 FD_SET(fd, manager->write_fds); 387 388 return (result); 389 } 390 391 static inline isc_result_t 392 unwatch_fd(isc__socketmgr_t *manager, int fd, int msg) { 393 isc_result_t result = ISC_R_SUCCESS; 394 395 if (msg == SELECT_POKE_READ) 396 FD_CLR(fd, manager->read_fds); 397 else if (msg == SELECT_POKE_WRITE) 398 FD_CLR(fd, manager->write_fds); 399 400 return (result); 401 } 402 403 static void 404 wakeup_socket(isc__socketmgr_t *manager, int fd, int msg) { 405 isc_result_t result; 406 407 /* 408 * This is a wakeup on a socket. If the socket is not in the 409 * process of being closed, start watching it for either reads 410 * or writes. 411 */ 412 413 INSIST(fd >= 0 && fd < (int)manager->maxsocks); 414 415 if (msg == SELECT_POKE_CLOSE) { 416 /* No one should be updating fdstate, so no need to lock it */ 417 INSIST(manager->fdstate[fd] == CLOSE_PENDING); 418 manager->fdstate[fd] = CLOSED; 419 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 420 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 421 (void)close(fd); 422 return; 423 } 424 425 if (manager->fdstate[fd] == CLOSE_PENDING) { 426 427 /* 428 * We accept (and ignore) any error from unwatch_fd() as we are 429 * closing the socket, hoping it doesn't leave dangling state in 430 * the kernel. 431 */ 432 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 433 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 434 return; 435 } 436 if (manager->fdstate[fd] != MANAGED) { 437 return; 438 } 439 440 /* 441 * Set requested bit. 442 */ 443 result = watch_fd(manager, fd, msg); 444 if (result != ISC_R_SUCCESS) { 445 /* 446 * XXXJT: what should we do? Ignoring the failure of watching 447 * a socket will make the application dysfunctional, but there 448 * seems to be no reasonable recovery process. 449 */ 450 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 451 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 452 "failed to start watching FD (%d): %s", 453 fd, isc_result_totext(result)); 454 } 455 } 456 457 /* 458 * Update the state of the socketmgr when something changes. 459 */ 460 static void 461 select_poke(isc__socketmgr_t *manager, int fd, int msg) { 462 if (msg == SELECT_POKE_SHUTDOWN) 463 return; 464 else if (fd >= 0) 465 wakeup_socket(manager, fd, msg); 466 return; 467 } 468 469 /* 470 * Make a fd non-blocking. 471 */ 472 static isc_result_t 473 make_nonblock(int fd) { 474 int ret; 475 char strbuf[ISC_STRERRORSIZE]; 476 int flags; 477 478 flags = fcntl(fd, F_GETFL, 0); 479 flags |= O_NONBLOCK; 480 ret = fcntl(fd, F_SETFL, flags); 481 482 if (ret == -1) { 483 isc__strerror(errno, strbuf, sizeof(strbuf)); 484 UNEXPECTED_ERROR(__FILE__, __LINE__, 485 "fcntl(%d, F_SETFL, %d): %s", fd, flags, 486 strbuf); 487 488 return (ISC_R_UNEXPECTED); 489 } 490 491 return (ISC_R_SUCCESS); 492 } 493 494 /* 495 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. 496 * In order to ensure as much portability as possible, we provide wrapper 497 * functions of these macros. 498 * Note that cmsg_space() could run slow on OSes that do not have 499 * CMSG_SPACE. 500 */ 501 static inline socklen_t 502 cmsg_len(socklen_t len) { 503 return (CMSG_LEN(len)); 504 } 505 506 static inline socklen_t 507 cmsg_space(socklen_t len) { 508 return (CMSG_SPACE(len)); 509 } 510 511 /* 512 * Process control messages received on a socket. 513 */ 514 static void 515 process_cmsg(isc__socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { 516 struct cmsghdr *cmsgp; 517 struct in6_pktinfo *pktinfop; 518 void *timevalp; 519 520 /* 521 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. 522 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. 523 * They are all here, outside of the CPP tests, because it is 524 * more consistent with the usual ISC coding style. 525 */ 526 UNUSED(sock); 527 UNUSED(msg); 528 UNUSED(dev); 529 530 if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC) 531 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; 532 533 if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC) 534 dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; 535 536 if (msg->msg_controllen == 0U || msg->msg_control == NULL) 537 return; 538 539 timevalp = NULL; 540 pktinfop = NULL; 541 542 cmsgp = CMSG_FIRSTHDR(msg); 543 while (cmsgp != NULL) { 544 socket_log(sock, NULL, TRACE, 545 "processing cmsg %p", cmsgp); 546 547 if (cmsgp->cmsg_level == IPPROTO_IPV6 548 && cmsgp->cmsg_type == IPV6_PKTINFO) { 549 550 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 551 memmove(&dev->pktinfo, pktinfop, 552 sizeof(struct in6_pktinfo)); 553 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 554 socket_log(sock, NULL, TRACE, 555 "interface received on ifindex %u", 556 dev->pktinfo.ipi6_ifindex); 557 if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) 558 dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; 559 goto next; 560 } 561 562 if (cmsgp->cmsg_level == SOL_SOCKET 563 && cmsgp->cmsg_type == SCM_TIMESTAMP) { 564 struct timeval tv; 565 timevalp = CMSG_DATA(cmsgp); 566 memmove(&tv, timevalp, sizeof(tv)); 567 dev->timestamp.seconds = tv.tv_sec; 568 dev->timestamp.nanoseconds = tv.tv_usec * 1000; 569 dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; 570 goto next; 571 } 572 573 if (cmsgp->cmsg_level == IPPROTO_IPV6 574 && cmsgp->cmsg_type == IPV6_TCLASS) { 575 dev->dscp = *(int *)CMSG_DATA(cmsgp); 576 dev->dscp >>= 2; 577 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 578 goto next; 579 } 580 581 if (cmsgp->cmsg_level == IPPROTO_IP 582 && (cmsgp->cmsg_type == IP_TOS)) { 583 dev->dscp = (int) *(unsigned char *)CMSG_DATA(cmsgp); 584 dev->dscp >>= 2; 585 dev->attributes |= ISC_SOCKEVENTATTR_DSCP; 586 goto next; 587 } 588 next: 589 cmsgp = CMSG_NXTHDR(msg, cmsgp); 590 } 591 592 } 593 594 /* 595 * Construct an iov array and attach it to the msghdr passed in. This is 596 * the SEND constructor, which will use the used region of the buffer 597 * (if using a buffer list) or will use the internal region (if a single 598 * buffer I/O is requested). 599 * 600 * Nothing can be NULL, and the done event must list at least one buffer 601 * on the buffer linked list for this function to be meaningful. 602 * 603 * If write_countp != NULL, *write_countp will hold the number of bytes 604 * this transaction can send. 605 */ 606 static void 607 build_msghdr_send(isc__socket_t *sock, char* cmsgbuf, isc_socketevent_t *dev, 608 struct msghdr *msg, struct iovec *iov, size_t *write_countp) 609 { 610 unsigned int iovcount; 611 isc_buffer_t *buffer; 612 isc_region_t used; 613 size_t write_count; 614 size_t skip_count; 615 struct cmsghdr *cmsgp; 616 617 memset(msg, 0, sizeof(*msg)); 618 619 if (!sock->connected) { 620 msg->msg_name = (void *)&dev->address.type.sa; 621 msg->msg_namelen = dev->address.length; 622 } else { 623 msg->msg_name = NULL; 624 msg->msg_namelen = 0; 625 } 626 627 buffer = ISC_LIST_HEAD(dev->bufferlist); 628 write_count = 0; 629 iovcount = 0; 630 631 /* 632 * Single buffer I/O? Skip what we've done so far in this region. 633 */ 634 if (buffer == NULL) { 635 write_count = dev->region.length - dev->n; 636 iov[0].iov_base = (void *)(dev->region.base + dev->n); 637 iov[0].iov_len = write_count; 638 iovcount = 1; 639 640 goto config; 641 } 642 643 /* 644 * Multibuffer I/O. 645 * Skip the data in the buffer list that we have already written. 646 */ 647 skip_count = dev->n; 648 while (buffer != NULL) { 649 REQUIRE(ISC_BUFFER_VALID(buffer)); 650 if (skip_count < isc_buffer_usedlength(buffer)) 651 break; 652 skip_count -= isc_buffer_usedlength(buffer); 653 buffer = ISC_LIST_NEXT(buffer, link); 654 } 655 656 while (buffer != NULL) { 657 INSIST(iovcount < MAXSCATTERGATHER_SEND); 658 659 isc_buffer_usedregion(buffer, &used); 660 661 if (used.length > 0) { 662 iov[iovcount].iov_base = (void *)(used.base 663 + skip_count); 664 iov[iovcount].iov_len = used.length - skip_count; 665 write_count += (used.length - skip_count); 666 skip_count = 0; 667 iovcount++; 668 } 669 buffer = ISC_LIST_NEXT(buffer, link); 670 } 671 672 INSIST(skip_count == 0U); 673 674 config: 675 msg->msg_iov = iov; 676 msg->msg_iovlen = iovcount; 677 678 msg->msg_control = NULL; 679 msg->msg_controllen = 0; 680 msg->msg_flags = 0; 681 682 if ((sock->type == isc_sockettype_udp) && 683 ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) 684 { 685 struct in6_pktinfo *pktinfop; 686 687 socket_log(sock, NULL, TRACE, 688 "sendto pktinfo data, ifindex %u", 689 dev->pktinfo.ipi6_ifindex); 690 691 msg->msg_control = (void *)cmsgbuf; 692 msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); 693 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 694 695 cmsgp = (struct cmsghdr *)cmsgbuf; 696 cmsgp->cmsg_level = IPPROTO_IPV6; 697 cmsgp->cmsg_type = IPV6_PKTINFO; 698 cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); 699 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 700 memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); 701 } 702 703 if ((sock->type == isc_sockettype_udp) && 704 ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0)) 705 { 706 int use_min_mtu = 1; /* -1, 0, 1 */ 707 708 cmsgp = (struct cmsghdr *)(cmsgbuf + 709 msg->msg_controllen); 710 711 msg->msg_control = (void *)cmsgbuf; 712 msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); 713 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 714 715 cmsgp->cmsg_level = IPPROTO_IPV6; 716 cmsgp->cmsg_type = IPV6_USE_MIN_MTU; 717 cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); 718 memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); 719 } 720 721 if (isc_dscp_check_value > -1) { 722 if (sock->type == isc_sockettype_udp) 723 INSIST((int)dev->dscp == isc_dscp_check_value); 724 else if (sock->type == isc_sockettype_tcp) 725 INSIST((int)sock->dscp == isc_dscp_check_value); 726 } 727 728 if ((sock->type == isc_sockettype_udp) && 729 ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0)) 730 { 731 int dscp = (dev->dscp << 2) & 0xff; 732 733 INSIST(dev->dscp < 0x40); 734 735 if (sock->pf == AF_INET && sock->pktdscp) { 736 cmsgp = (struct cmsghdr *)(cmsgbuf + 737 msg->msg_controllen); 738 msg->msg_control = (void *)cmsgbuf; 739 msg->msg_controllen += cmsg_space(sizeof(dscp)); 740 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 741 742 cmsgp->cmsg_level = IPPROTO_IP; 743 cmsgp->cmsg_type = IP_TOS; 744 cmsgp->cmsg_len = cmsg_len(sizeof(char)); 745 *(unsigned char*)CMSG_DATA(cmsgp) = dscp; 746 } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) { 747 if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, 748 (void *)&dscp, sizeof(int)) < 0) 749 { 750 char strbuf[ISC_STRERRORSIZE]; 751 isc__strerror(errno, strbuf, sizeof(strbuf)); 752 UNEXPECTED_ERROR(__FILE__, __LINE__, 753 "setsockopt(%d, IP_TOS, %.02x)" 754 " %s: %s", 755 sock->fd, dscp >> 2, 756 "failed", strbuf); 757 } else 758 sock->dscp = dscp; 759 } 760 761 if (sock->pf == AF_INET6 && sock->pktdscp) { 762 cmsgp = (struct cmsghdr *)(cmsgbuf + 763 msg->msg_controllen); 764 msg->msg_control = (void *)cmsgbuf; 765 msg->msg_controllen += cmsg_space(sizeof(dscp)); 766 INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); 767 768 cmsgp->cmsg_level = IPPROTO_IPV6; 769 cmsgp->cmsg_type = IPV6_TCLASS; 770 cmsgp->cmsg_len = cmsg_len(sizeof(dscp)); 771 memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp)); 772 } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) { 773 if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, 774 (void *)&dscp, sizeof(int)) < 0) { 775 char strbuf[ISC_STRERRORSIZE]; 776 isc__strerror(errno, strbuf, sizeof(strbuf)); 777 UNEXPECTED_ERROR(__FILE__, __LINE__, 778 "setsockopt(%d, IPV6_TCLASS, " 779 "%.02x) %s: %s", 780 sock->fd, dscp >> 2, 781 "failed", strbuf); 782 } else 783 sock->dscp = dscp; 784 } 785 786 if (msg->msg_controllen != 0 && 787 msg->msg_controllen < SENDCMSGBUFLEN) 788 { 789 memset(cmsgbuf + msg->msg_controllen, 0, 790 SENDCMSGBUFLEN - msg->msg_controllen); 791 } 792 } 793 794 if (write_countp != NULL) 795 *write_countp = write_count; 796 } 797 798 /* 799 * Construct an iov array and attach it to the msghdr passed in. This is 800 * the RECV constructor, which will use the available region of the buffer 801 * (if using a buffer list) or will use the internal region (if a single 802 * buffer I/O is requested). 803 * 804 * Nothing can be NULL, and the done event must list at least one buffer 805 * on the buffer linked list for this function to be meaningful. 806 * 807 * If read_countp != NULL, *read_countp will hold the number of bytes 808 * this transaction can receive. 809 */ 810 static void 811 build_msghdr_recv(isc__socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, 812 struct msghdr *msg, struct iovec *iov, size_t *read_countp) 813 { 814 unsigned int iovcount; 815 isc_buffer_t *buffer; 816 isc_region_t available; 817 size_t read_count; 818 819 memset(msg, 0, sizeof(struct msghdr)); 820 821 if (sock->type == isc_sockettype_udp) { 822 memset(&dev->address, 0, sizeof(dev->address)); 823 msg->msg_name = (void *)&dev->address.type.sa; 824 msg->msg_namelen = sizeof(dev->address.type); 825 } else { /* TCP */ 826 msg->msg_name = NULL; 827 msg->msg_namelen = 0; 828 dev->address = sock->peer_address; 829 } 830 831 buffer = ISC_LIST_HEAD(dev->bufferlist); 832 read_count = 0; 833 834 /* 835 * Single buffer I/O? Skip what we've done so far in this region. 836 */ 837 if (buffer == NULL) { 838 read_count = dev->region.length - dev->n; 839 iov[0].iov_base = (void *)(dev->region.base + dev->n); 840 iov[0].iov_len = read_count; 841 iovcount = 1; 842 843 goto config; 844 } 845 846 /* 847 * Multibuffer I/O. 848 * Skip empty buffers. 849 */ 850 while (buffer != NULL) { 851 REQUIRE(ISC_BUFFER_VALID(buffer)); 852 if (isc_buffer_availablelength(buffer) != 0) 853 break; 854 buffer = ISC_LIST_NEXT(buffer, link); 855 } 856 857 iovcount = 0; 858 while (buffer != NULL) { 859 INSIST(iovcount < MAXSCATTERGATHER_RECV); 860 861 isc_buffer_availableregion(buffer, &available); 862 863 if (available.length > 0) { 864 iov[iovcount].iov_base = (void *)(available.base); 865 iov[iovcount].iov_len = available.length; 866 read_count += available.length; 867 iovcount++; 868 } 869 buffer = ISC_LIST_NEXT(buffer, link); 870 } 871 872 config: 873 874 /* 875 * If needed, set up to receive that one extra byte. 876 */ 877 msg->msg_iov = iov; 878 msg->msg_iovlen = iovcount; 879 880 msg->msg_control = cmsgbuf; 881 msg->msg_controllen = RECVCMSGBUFLEN; 882 msg->msg_flags = 0; 883 884 if (read_countp != NULL) 885 *read_countp = read_count; 886 } 887 888 static void 889 set_dev_address(isc_sockaddr_t *address, isc__socket_t *sock, 890 isc_socketevent_t *dev) 891 { 892 if (sock->type == isc_sockettype_udp) { 893 if (address != NULL) 894 dev->address = *address; 895 else 896 dev->address = sock->peer_address; 897 } else if (sock->type == isc_sockettype_tcp) { 898 INSIST(address == NULL); 899 dev->address = sock->peer_address; 900 } 901 } 902 903 static void 904 destroy_socketevent(isc_event_t *event) { 905 isc_socketevent_t *ev = (isc_socketevent_t *)event; 906 907 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 908 909 (ev->destroy)(event); 910 } 911 912 static isc_socketevent_t * 913 allocate_socketevent(void *sender, 914 isc_eventtype_t eventtype, isc_taskaction_t action, 915 void *arg) 916 { 917 isc_socketevent_t *ev; 918 919 ev = (isc_socketevent_t *)isc_event_allocate(sender, 920 eventtype, action, arg, 921 sizeof(*ev)); 922 923 if (ev == NULL) 924 return (NULL); 925 926 ev->result = ISC_R_UNSET; 927 ISC_LINK_INIT(ev, ev_link); 928 ISC_LIST_INIT(ev->bufferlist); 929 ev->region.base = NULL; 930 ev->n = 0; 931 ev->offset = 0; 932 ev->attributes = 0; 933 ev->destroy = ev->ev_destroy; 934 ev->ev_destroy = destroy_socketevent; 935 ev->dscp = 0; 936 937 return (ev); 938 } 939 940 #define DOIO_SUCCESS 0 /* i/o ok, event sent */ 941 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 942 #define DOIO_HARD 2 /* i/o error, event sent */ 943 #define DOIO_EOF 3 /* EOF, no event sent */ 944 945 static int 946 doio_recv(isc__socket_t *sock, isc_socketevent_t *dev) { 947 int cc; 948 struct iovec iov[MAXSCATTERGATHER_RECV]; 949 size_t read_count; 950 size_t actual_count; 951 struct msghdr msghdr; 952 isc_buffer_t *buffer; 953 int recv_errno; 954 char strbuf[ISC_STRERRORSIZE]; 955 char cmsgbuf[RECVCMSGBUFLEN] = {0}; 956 957 build_msghdr_recv(sock, cmsgbuf, dev, &msghdr, iov, &read_count); 958 959 cc = recvmsg(sock->fd, &msghdr, 0); 960 recv_errno = errno; 961 962 if (cc < 0) { 963 if (SOFT_ERROR(recv_errno)) 964 return (DOIO_SOFT); 965 966 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 967 isc__strerror(recv_errno, strbuf, sizeof(strbuf)); 968 socket_log(sock, NULL, IOEVENT, 969 "doio_recv: recvmsg(%d) %d bytes, err %d/%s", 970 sock->fd, cc, recv_errno, strbuf); 971 } 972 973 #define SOFT_OR_HARD(_system, _isc) \ 974 if (recv_errno == _system) { \ 975 if (sock->connected) { \ 976 dev->result = _isc; \ 977 return (DOIO_HARD); \ 978 } \ 979 return (DOIO_SOFT); \ 980 } 981 #define ALWAYS_HARD(_system, _isc) \ 982 if (recv_errno == _system) { \ 983 dev->result = _isc; \ 984 return (DOIO_HARD); \ 985 } 986 987 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 988 SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); 989 SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 990 SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); 991 /* HPUX 11.11 can return EADDRNOTAVAIL. */ 992 SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 993 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 994 /* Should never get this one but it was seen. */ 995 #ifdef ENOPROTOOPT 996 SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH); 997 #endif 998 /* 999 * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 1000 * errors. 1001 */ 1002 #ifdef EPROTO 1003 SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); 1004 #endif 1005 SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); 1006 1007 #undef SOFT_OR_HARD 1008 #undef ALWAYS_HARD 1009 1010 dev->result = isc__errno2result(recv_errno); 1011 return (DOIO_HARD); 1012 } 1013 1014 /* 1015 * On TCP and UNIX sockets, zero length reads indicate EOF, 1016 * while on UDP sockets, zero length reads are perfectly valid, 1017 * although strange. 1018 */ 1019 switch (sock->type) { 1020 case isc_sockettype_tcp: 1021 if (cc == 0) 1022 return (DOIO_EOF); 1023 break; 1024 case isc_sockettype_udp: 1025 break; 1026 default: 1027 INSIST(0); 1028 } 1029 1030 if (sock->type == isc_sockettype_udp) { 1031 dev->address.length = msghdr.msg_namelen; 1032 if (isc_sockaddr_getport(&dev->address) == 0) { 1033 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1034 socket_log(sock, &dev->address, IOEVENT, 1035 "dropping source port zero packet"); 1036 } 1037 return (DOIO_SOFT); 1038 } 1039 } 1040 1041 socket_log(sock, &dev->address, IOEVENT, 1042 "packet received correctly"); 1043 1044 /* 1045 * Overflow bit detection. If we received MORE bytes than we should, 1046 * this indicates an overflow situation. Set the flag in the 1047 * dev entry and adjust how much we read by one. 1048 */ 1049 /* 1050 * If there are control messages attached, run through them and pull 1051 * out the interesting bits. 1052 */ 1053 process_cmsg(sock, &msghdr, dev); 1054 1055 /* 1056 * update the buffers (if any) and the i/o count 1057 */ 1058 dev->n += cc; 1059 actual_count = cc; 1060 buffer = ISC_LIST_HEAD(dev->bufferlist); 1061 while (buffer != NULL && actual_count > 0U) { 1062 REQUIRE(ISC_BUFFER_VALID(buffer)); 1063 if (isc_buffer_availablelength(buffer) <= actual_count) { 1064 actual_count -= isc_buffer_availablelength(buffer); 1065 isc_buffer_add(buffer, 1066 isc_buffer_availablelength(buffer)); 1067 } else { 1068 isc_buffer_add(buffer, actual_count); 1069 actual_count = 0; 1070 POST(actual_count); 1071 break; 1072 } 1073 buffer = ISC_LIST_NEXT(buffer, link); 1074 if (buffer == NULL) { 1075 INSIST(actual_count == 0U); 1076 } 1077 } 1078 1079 /* 1080 * If we read less than we expected, update counters, 1081 * and let the upper layer poke the descriptor. 1082 */ 1083 if (((size_t)cc != read_count) && (dev->n < dev->minimum)) 1084 return (DOIO_SOFT); 1085 1086 /* 1087 * Full reads are posted, or partials if partials are ok. 1088 */ 1089 dev->result = ISC_R_SUCCESS; 1090 return (DOIO_SUCCESS); 1091 } 1092 1093 /* 1094 * Returns: 1095 * DOIO_SUCCESS The operation succeeded. dev->result contains 1096 * ISC_R_SUCCESS. 1097 * 1098 * DOIO_HARD A hard or unexpected I/O error was encountered. 1099 * dev->result contains the appropriate error. 1100 * 1101 * DOIO_SOFT A soft I/O error was encountered. No senddone 1102 * event was sent. The operation should be retried. 1103 * 1104 * No other return values are possible. 1105 */ 1106 static int 1107 doio_send(isc__socket_t *sock, isc_socketevent_t *dev) { 1108 int cc; 1109 struct iovec iov[MAXSCATTERGATHER_SEND]; 1110 size_t write_count; 1111 struct msghdr msghdr; 1112 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1113 int attempts = 0; 1114 int send_errno; 1115 char strbuf[ISC_STRERRORSIZE]; 1116 char cmsgbuf[SENDCMSGBUFLEN] = {0}; 1117 1118 build_msghdr_send(sock, cmsgbuf, dev, &msghdr, iov, &write_count); 1119 1120 resend: 1121 cc = sendmsg(sock->fd, &msghdr, 0); 1122 send_errno = errno; 1123 1124 /* 1125 * Check for error or block condition. 1126 */ 1127 if (cc < 0) { 1128 if (send_errno == EINTR && ++attempts < NRETRIES) 1129 goto resend; 1130 1131 if (SOFT_ERROR(send_errno)) { 1132 if (errno == EWOULDBLOCK || errno == EAGAIN) 1133 dev->result = ISC_R_WOULDBLOCK; 1134 return (DOIO_SOFT); 1135 } 1136 1137 #define SOFT_OR_HARD(_system, _isc) \ 1138 if (send_errno == _system) { \ 1139 if (sock->connected) { \ 1140 dev->result = _isc; \ 1141 return (DOIO_HARD); \ 1142 } \ 1143 return (DOIO_SOFT); \ 1144 } 1145 #define ALWAYS_HARD(_system, _isc) \ 1146 if (send_errno == _system) { \ 1147 dev->result = _isc; \ 1148 return (DOIO_HARD); \ 1149 } 1150 1151 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 1152 ALWAYS_HARD(EACCES, ISC_R_NOPERM); 1153 ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 1154 ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 1155 ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 1156 #ifdef EHOSTDOWN 1157 ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); 1158 #endif 1159 ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); 1160 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 1161 ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); 1162 ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); 1163 ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); 1164 1165 #undef SOFT_OR_HARD 1166 #undef ALWAYS_HARD 1167 1168 /* 1169 * The other error types depend on whether or not the 1170 * socket is UDP or TCP. If it is UDP, some errors 1171 * that we expect to be fatal under TCP are merely 1172 * annoying, and are really soft errors. 1173 * 1174 * However, these soft errors are still returned as 1175 * a status. 1176 */ 1177 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1178 isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1179 UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", 1180 addrbuf, strbuf); 1181 dev->result = isc__errno2result(send_errno); 1182 return (DOIO_HARD); 1183 } 1184 1185 if (cc == 0) { 1186 UNEXPECTED_ERROR(__FILE__, __LINE__, 1187 "doio_send: send() %s 0", "returned"); 1188 } 1189 1190 /* 1191 * If we write less than we expected, update counters, poke. 1192 */ 1193 dev->n += cc; 1194 if ((size_t)cc != write_count) 1195 return (DOIO_SOFT); 1196 1197 /* 1198 * Exactly what we wanted to write. We're done with this 1199 * entry. Post its completion event. 1200 */ 1201 dev->result = ISC_R_SUCCESS; 1202 return (DOIO_SUCCESS); 1203 } 1204 1205 /* 1206 * Kill. 1207 * 1208 * Caller must ensure that the socket is not locked and no external 1209 * references exist. 1210 */ 1211 static void 1212 socketclose(isc__socketmgr_t *manager, isc__socket_t *sock, int fd) { 1213 /* 1214 * No one has this socket open, so the watcher doesn't have to be 1215 * poked, and the socket doesn't have to be locked. 1216 */ 1217 manager->fds[fd] = NULL; 1218 manager->fdstate[fd] = CLOSE_PENDING; 1219 select_poke(manager, fd, SELECT_POKE_CLOSE); 1220 1221 if (sock->active == 1) { 1222 sock->active = 0; 1223 } 1224 1225 /* 1226 * update manager->maxfd here (XXX: this should be implemented more 1227 * efficiently) 1228 */ 1229 if (manager->maxfd == fd) { 1230 int i; 1231 1232 manager->maxfd = 0; 1233 for (i = fd - 1; i >= 0; i--) { 1234 if (manager->fdstate[i] == MANAGED) { 1235 manager->maxfd = i; 1236 break; 1237 } 1238 } 1239 } 1240 1241 } 1242 1243 static void 1244 destroy(isc__socket_t **sockp) { 1245 int fd; 1246 isc__socket_t *sock = *sockp; 1247 isc__socketmgr_t *manager = sock->manager; 1248 1249 socket_log(sock, NULL, CREATION, "destroying"); 1250 1251 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1252 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1253 INSIST(sock->connect_ev == NULL); 1254 INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks); 1255 1256 if (sock->fd >= 0) { 1257 fd = sock->fd; 1258 sock->fd = -1; 1259 socketclose(manager, sock, fd); 1260 } 1261 1262 ISC_LIST_UNLINK(manager->socklist, sock, link); 1263 1264 /* can't unlock manager as its memory context is still used */ 1265 free_socket(sockp); 1266 } 1267 1268 static isc_result_t 1269 allocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type, 1270 isc__socket_t **socketp) 1271 { 1272 isc__socket_t *sock; 1273 1274 sock = malloc(sizeof(*sock)); 1275 1276 if (sock == NULL) 1277 return (ISC_R_NOMEMORY); 1278 1279 sock->common.magic = 0; 1280 sock->common.impmagic = 0; 1281 sock->references = 0; 1282 1283 sock->manager = manager; 1284 sock->type = type; 1285 sock->fd = -1; 1286 sock->dscp = 0; /* TOS/TCLASS is zero until set. */ 1287 sock->active = 0; 1288 1289 ISC_LINK_INIT(sock, link); 1290 1291 /* 1292 * Set up list of readers and writers to be initially empty. 1293 */ 1294 ISC_LIST_INIT(sock->recv_list); 1295 ISC_LIST_INIT(sock->send_list); 1296 sock->connect_ev = NULL; 1297 sock->pending_recv = 0; 1298 sock->pending_send = 0; 1299 sock->connected = 0; 1300 sock->connecting = 0; 1301 sock->bound = 0; 1302 sock->pktdscp = 0; 1303 1304 /* 1305 * Initialize readable and writable events. 1306 */ 1307 ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t), 1308 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR, 1309 NULL, sock, sock, NULL); 1310 ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t), 1311 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW, 1312 NULL, sock, sock, NULL); 1313 1314 sock->common.magic = ISCAPI_SOCKET_MAGIC; 1315 sock->common.impmagic = SOCKET_MAGIC; 1316 *socketp = sock; 1317 1318 return (ISC_R_SUCCESS); 1319 } 1320 1321 /* 1322 * This event requires that the various lists be empty, that the reference 1323 * count be 1, and that the magic number is valid. The other socket bits, 1324 * like the lock, must be initialized as well. The fd associated must be 1325 * marked as closed, by setting it to -1 on close, or this routine will 1326 * also close the socket. 1327 */ 1328 static void 1329 free_socket(isc__socket_t **socketp) { 1330 isc__socket_t *sock = *socketp; 1331 1332 INSIST(VALID_SOCKET(sock)); 1333 INSIST(sock->references == 0); 1334 INSIST(!sock->connecting); 1335 INSIST(!sock->pending_recv); 1336 INSIST(!sock->pending_send); 1337 INSIST(ISC_LIST_EMPTY(sock->recv_list)); 1338 INSIST(ISC_LIST_EMPTY(sock->send_list)); 1339 INSIST(!ISC_LINK_LINKED(sock, link)); 1340 1341 sock->common.magic = 0; 1342 sock->common.impmagic = 0; 1343 1344 free(sock); 1345 1346 *socketp = NULL; 1347 } 1348 1349 static void 1350 use_min_mtu(isc__socket_t *sock) { 1351 /* use minimum MTU */ 1352 if (sock->pf == AF_INET6) { 1353 int on = 1; 1354 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 1355 (void *)&on, sizeof(on)); 1356 } 1357 } 1358 1359 static void 1360 set_tcp_maxseg(isc__socket_t *sock, int size) { 1361 if (sock->type == isc_sockettype_tcp) 1362 (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG, 1363 (void *)&size, sizeof(size)); 1364 } 1365 1366 static isc_result_t 1367 opensocket(isc__socket_t *sock) 1368 { 1369 isc_result_t result; 1370 char strbuf[ISC_STRERRORSIZE]; 1371 const char *err = "socket"; 1372 int on = 1; 1373 1374 switch (sock->type) { 1375 case isc_sockettype_udp: 1376 sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); 1377 break; 1378 case isc_sockettype_tcp: 1379 sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 1380 break; 1381 } 1382 1383 if (sock->fd < 0) { 1384 switch (errno) { 1385 case EMFILE: 1386 case ENFILE: 1387 isc__strerror(errno, strbuf, sizeof(strbuf)); 1388 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 1389 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 1390 "%s: %s", err, strbuf); 1391 /* fallthrough */ 1392 case ENOBUFS: 1393 return (ISC_R_NORESOURCES); 1394 1395 case EPROTONOSUPPORT: 1396 case EPFNOSUPPORT: 1397 case EAFNOSUPPORT: 1398 /* 1399 * Linux 2.2 (and maybe others) return EINVAL instead of 1400 * EAFNOSUPPORT. 1401 */ 1402 case EINVAL: 1403 return (ISC_R_FAMILYNOSUPPORT); 1404 1405 default: 1406 isc__strerror(errno, strbuf, sizeof(strbuf)); 1407 UNEXPECTED_ERROR(__FILE__, __LINE__, 1408 "%s() %s: %s", err, "failed", 1409 strbuf); 1410 return (ISC_R_UNEXPECTED); 1411 } 1412 } 1413 1414 result = make_nonblock(sock->fd); 1415 if (result != ISC_R_SUCCESS) { 1416 (void)close(sock->fd); 1417 return (result); 1418 } 1419 1420 /* 1421 * Use minimum mtu if possible. 1422 */ 1423 if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) { 1424 use_min_mtu(sock); 1425 set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */ 1426 } 1427 1428 if (sock->type == isc_sockettype_udp) { 1429 1430 if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, 1431 (void *)&on, sizeof(on)) < 0 1432 && errno != ENOPROTOOPT) { 1433 isc__strerror(errno, strbuf, sizeof(strbuf)); 1434 UNEXPECTED_ERROR(__FILE__, __LINE__, 1435 "setsockopt(%d, SO_TIMESTAMP) %s: %s", 1436 sock->fd, "failed", strbuf); 1437 /* Press on... */ 1438 } 1439 1440 /* RFC 3542 */ 1441 if ((sock->pf == AF_INET6) 1442 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1443 (void *)&on, sizeof(on)) < 0)) { 1444 isc__strerror(errno, strbuf, sizeof(strbuf)); 1445 UNEXPECTED_ERROR(__FILE__, __LINE__, 1446 "setsockopt(%d, IPV6_RECVPKTINFO) " 1447 "%s: %s", sock->fd, "failed", 1448 strbuf); 1449 } 1450 } 1451 1452 if (sock->active == 0) { 1453 sock->active = 1; 1454 } 1455 1456 return (ISC_R_SUCCESS); 1457 } 1458 1459 /* 1460 * Create a 'type' socket managed 1461 * by 'manager'. Events will be posted to 'task' and when dispatched 1462 * 'action' will be called with 'arg' as the arg value. The new 1463 * socket is returned in 'socketp'. 1464 */ 1465 static isc_result_t 1466 socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1467 isc_socket_t **socketp) 1468 { 1469 isc__socket_t *sock = NULL; 1470 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 1471 isc_result_t result; 1472 int lockid; 1473 1474 REQUIRE(VALID_MANAGER(manager)); 1475 REQUIRE(socketp != NULL && *socketp == NULL); 1476 1477 result = allocate_socket(manager, type, &sock); 1478 if (result != ISC_R_SUCCESS) 1479 return (result); 1480 1481 switch (sock->type) { 1482 case isc_sockettype_udp: 1483 #define DCSPPKT(pf) ((pf == AF_INET) ? ISC_NET_DSCPPKTV4 : ISC_NET_DSCPPKTV6) 1484 sock->pktdscp = (isc_net_probedscp() & DCSPPKT(pf)) != 0; 1485 break; 1486 case isc_sockettype_tcp: 1487 break; 1488 default: 1489 INSIST(0); 1490 } 1491 1492 sock->pf = pf; 1493 1494 result = opensocket(sock); 1495 if (result != ISC_R_SUCCESS) { 1496 free_socket(&sock); 1497 return (result); 1498 } 1499 1500 sock->common.methods = (isc_socketmethods_t *)&socketmethods; 1501 sock->references = 1; 1502 *socketp = (isc_socket_t *)sock; 1503 1504 /* 1505 * Note we don't have to lock the socket like we normally would because 1506 * there are no external references to it yet. 1507 */ 1508 1509 lockid = FDLOCK_ID(sock->fd); 1510 manager->fds[sock->fd] = sock; 1511 manager->fdstate[sock->fd] = MANAGED; 1512 1513 ISC_LIST_APPEND(manager->socklist, sock, link); 1514 if (manager->maxfd < sock->fd) 1515 manager->maxfd = sock->fd; 1516 1517 socket_log(sock, NULL, CREATION, "created"); 1518 1519 return (ISC_R_SUCCESS); 1520 } 1521 1522 /*% 1523 * Create a new 'type' socket managed by 'manager'. Events 1524 * will be posted to 'task' and when dispatched 'action' will be 1525 * called with 'arg' as the arg value. The new socket is returned 1526 * in 'socketp'. 1527 */ 1528 isc_result_t 1529 isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 1530 isc_socket_t **socketp) 1531 { 1532 return (socket_create(manager0, pf, type, socketp)); 1533 } 1534 1535 /* 1536 * Attach to a socket. Caller must explicitly detach when it is done. 1537 */ 1538 void 1539 isc__socket_attach(isc_socket_t *sock0, isc_socket_t **socketp) { 1540 isc__socket_t *sock = (isc__socket_t *)sock0; 1541 1542 REQUIRE(VALID_SOCKET(sock)); 1543 REQUIRE(socketp != NULL && *socketp == NULL); 1544 1545 sock->references++; 1546 1547 *socketp = (isc_socket_t *)sock; 1548 } 1549 1550 /* 1551 * Dereference a socket. If this is the last reference to it, clean things 1552 * up by destroying the socket. 1553 */ 1554 void 1555 isc__socket_detach(isc_socket_t **socketp) { 1556 isc__socket_t *sock; 1557 isc_boolean_t kill_socket = ISC_FALSE; 1558 1559 REQUIRE(socketp != NULL); 1560 sock = (isc__socket_t *)*socketp; 1561 REQUIRE(VALID_SOCKET(sock)); 1562 1563 REQUIRE(sock->references > 0); 1564 sock->references--; 1565 if (sock->references == 0) 1566 kill_socket = ISC_TRUE; 1567 1568 if (kill_socket) 1569 destroy(&sock); 1570 1571 *socketp = NULL; 1572 } 1573 1574 /* 1575 * I/O is possible on a given socket. Schedule an event to this task that 1576 * will call an internal function to do the I/O. This will charge the 1577 * task with the I/O operation and let our select loop handler get back 1578 * to doing something real as fast as possible. 1579 * 1580 * The socket and manager must be locked before calling this function. 1581 */ 1582 static void 1583 dispatch_recv(isc__socket_t *sock) { 1584 intev_t *iev; 1585 isc_socketevent_t *ev; 1586 isc_task_t *sender; 1587 1588 INSIST(!sock->pending_recv); 1589 1590 ev = ISC_LIST_HEAD(sock->recv_list); 1591 if (ev == NULL) 1592 return; 1593 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1594 "dispatch_recv: event %p -> task %p", 1595 ev, ev->ev_sender); 1596 sender = ev->ev_sender; 1597 1598 sock->pending_recv = 1; 1599 iev = &sock->readable_ev; 1600 1601 sock->references++; 1602 iev->ev_sender = sock; 1603 iev->ev_action = internal_recv; 1604 iev->ev_arg = sock; 1605 1606 isc_task_send(sender, (isc_event_t **)&iev); 1607 } 1608 1609 static void 1610 dispatch_send(isc__socket_t *sock) { 1611 intev_t *iev; 1612 isc_socketevent_t *ev; 1613 isc_task_t *sender; 1614 1615 INSIST(!sock->pending_send); 1616 1617 ev = ISC_LIST_HEAD(sock->send_list); 1618 if (ev == NULL) 1619 return; 1620 socket_log(sock, NULL, EVENT, NULL, 0, 0, 1621 "dispatch_send: event %p -> task %p", 1622 ev, ev->ev_sender); 1623 sender = ev->ev_sender; 1624 1625 sock->pending_send = 1; 1626 iev = &sock->writable_ev; 1627 1628 sock->references++; 1629 iev->ev_sender = sock; 1630 iev->ev_action = internal_send; 1631 iev->ev_arg = sock; 1632 1633 isc_task_send(sender, (isc_event_t **)&iev); 1634 } 1635 1636 static void 1637 dispatch_connect(isc__socket_t *sock) { 1638 intev_t *iev; 1639 isc_socket_connev_t *ev; 1640 1641 iev = &sock->writable_ev; 1642 1643 ev = sock->connect_ev; 1644 INSIST(ev != NULL); /* XXX */ 1645 1646 INSIST(sock->connecting); 1647 1648 sock->references++; /* keep socket around for this internal event */ 1649 iev->ev_sender = sock; 1650 iev->ev_action = internal_connect; 1651 iev->ev_arg = sock; 1652 1653 isc_task_send(ev->ev_sender, (isc_event_t **)&iev); 1654 } 1655 1656 /* 1657 * Dequeue an item off the given socket's read queue, set the result code 1658 * in the done event to the one provided, and send it to the task it was 1659 * destined for. 1660 * 1661 * If the event to be sent is on a list, remove it before sending. If 1662 * asked to, send and detach from the socket as well. 1663 * 1664 * Caller must have the socket locked if the event is attached to the socket. 1665 */ 1666 static void 1667 send_recvdone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1668 isc_task_t *task; 1669 1670 task = (*dev)->ev_sender; 1671 1672 (*dev)->ev_sender = sock; 1673 1674 if (ISC_LINK_LINKED(*dev, ev_link)) 1675 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1676 1677 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1678 == ISC_SOCKEVENTATTR_ATTACHED) 1679 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1680 else 1681 isc_task_send(task, (isc_event_t **)dev); 1682 } 1683 1684 /* 1685 * See comments for send_recvdone_event() above. 1686 * 1687 * Caller must have the socket locked if the event is attached to the socket. 1688 */ 1689 static void 1690 send_senddone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 1691 isc_task_t *task; 1692 1693 INSIST(dev != NULL && *dev != NULL); 1694 1695 task = (*dev)->ev_sender; 1696 (*dev)->ev_sender = sock; 1697 1698 if (ISC_LINK_LINKED(*dev, ev_link)) 1699 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1700 1701 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1702 == ISC_SOCKEVENTATTR_ATTACHED) 1703 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1704 else 1705 isc_task_send(task, (isc_event_t **)dev); 1706 } 1707 1708 static void 1709 internal_recv(isc_task_t *me, isc_event_t *ev) { 1710 isc_socketevent_t *dev; 1711 isc__socket_t *sock; 1712 1713 INSIST(ev->ev_type == ISC_SOCKEVENT_INTR); 1714 1715 sock = ev->ev_sender; 1716 INSIST(VALID_SOCKET(sock)); 1717 1718 socket_log(sock, NULL, IOEVENT, 1719 "internal_recv: task %p got event %p", me, ev); 1720 1721 INSIST(sock->pending_recv == 1); 1722 sock->pending_recv = 0; 1723 1724 INSIST(sock->references > 0); 1725 sock->references--; /* the internal event is done with this socket */ 1726 if (sock->references == 0) { 1727 destroy(&sock); 1728 return; 1729 } 1730 1731 /* 1732 * Try to do as much I/O as possible on this socket. There are no 1733 * limits here, currently. 1734 */ 1735 dev = ISC_LIST_HEAD(sock->recv_list); 1736 while (dev != NULL) { 1737 switch (doio_recv(sock, dev)) { 1738 case DOIO_SOFT: 1739 goto poke; 1740 1741 case DOIO_EOF: 1742 /* 1743 * read of 0 means the remote end was closed. 1744 * Run through the event queue and dispatch all 1745 * the events with an EOF result code. 1746 */ 1747 do { 1748 dev->result = ISC_R_EOF; 1749 send_recvdone_event(sock, &dev); 1750 dev = ISC_LIST_HEAD(sock->recv_list); 1751 } while (dev != NULL); 1752 goto poke; 1753 1754 case DOIO_SUCCESS: 1755 case DOIO_HARD: 1756 send_recvdone_event(sock, &dev); 1757 break; 1758 } 1759 1760 dev = ISC_LIST_HEAD(sock->recv_list); 1761 } 1762 1763 poke: 1764 if (!ISC_LIST_EMPTY(sock->recv_list)) 1765 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 1766 } 1767 1768 static void 1769 internal_send(isc_task_t *me, isc_event_t *ev) { 1770 isc_socketevent_t *dev; 1771 isc__socket_t *sock; 1772 1773 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 1774 1775 /* 1776 * Find out what socket this is and lock it. 1777 */ 1778 sock = (isc__socket_t *)ev->ev_sender; 1779 INSIST(VALID_SOCKET(sock)); 1780 socket_log(sock, NULL, IOEVENT, 1781 "internal_send: task %p got event %p", me, ev); 1782 1783 INSIST(sock->pending_send == 1); 1784 sock->pending_send = 0; 1785 1786 INSIST(sock->references > 0); 1787 sock->references--; /* the internal event is done with this socket */ 1788 if (sock->references == 0) { 1789 destroy(&sock); 1790 return; 1791 } 1792 1793 /* 1794 * Try to do as much I/O as possible on this socket. There are no 1795 * limits here, currently. 1796 */ 1797 dev = ISC_LIST_HEAD(sock->send_list); 1798 while (dev != NULL) { 1799 switch (doio_send(sock, dev)) { 1800 case DOIO_SOFT: 1801 goto poke; 1802 1803 case DOIO_HARD: 1804 case DOIO_SUCCESS: 1805 send_senddone_event(sock, &dev); 1806 break; 1807 } 1808 1809 dev = ISC_LIST_HEAD(sock->send_list); 1810 } 1811 1812 poke: 1813 if (!ISC_LIST_EMPTY(sock->send_list)) 1814 select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 1815 } 1816 1817 /* 1818 * Process read/writes on each fd here. Avoid locking 1819 * and unlocking twice if both reads and writes are possible. 1820 */ 1821 static void 1822 process_fd(isc__socketmgr_t *manager, int fd, isc_boolean_t readable, 1823 isc_boolean_t writeable) 1824 { 1825 isc__socket_t *sock; 1826 isc_boolean_t unwatch_read = ISC_FALSE, unwatch_write = ISC_FALSE; 1827 1828 /* 1829 * If the socket is going to be closed, don't do more I/O. 1830 */ 1831 if (manager->fdstate[fd] == CLOSE_PENDING) { 1832 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1833 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1834 return; 1835 } 1836 1837 sock = manager->fds[fd]; 1838 if (readable) { 1839 if (sock == NULL) { 1840 unwatch_read = ISC_TRUE; 1841 goto check_write; 1842 } 1843 if (!SOCK_DEAD(sock)) { 1844 dispatch_recv(sock); 1845 } 1846 unwatch_read = ISC_TRUE; 1847 } 1848 check_write: 1849 if (writeable) { 1850 if (sock == NULL) { 1851 unwatch_write = ISC_TRUE; 1852 goto unlock_fd; 1853 } 1854 if (!SOCK_DEAD(sock)) { 1855 if (sock->connecting) 1856 dispatch_connect(sock); 1857 else 1858 dispatch_send(sock); 1859 } 1860 unwatch_write = ISC_TRUE; 1861 } 1862 1863 unlock_fd: 1864 if (unwatch_read) 1865 (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1866 if (unwatch_write) 1867 (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1868 1869 } 1870 1871 static void 1872 process_fds(isc__socketmgr_t *manager, int maxfd, fd_set *readfds, 1873 fd_set *writefds) 1874 { 1875 int i; 1876 1877 REQUIRE(maxfd <= (int)manager->maxsocks); 1878 1879 for (i = 0; i < maxfd; i++) { 1880 process_fd(manager, i, FD_ISSET(i, readfds), 1881 FD_ISSET(i, writefds)); 1882 } 1883 } 1884 1885 /* 1886 * Create a new socket manager. 1887 */ 1888 1889 static isc_result_t 1890 setup_watcher(isc__socketmgr_t *manager) { 1891 isc_result_t result; 1892 1893 UNUSED(result); 1894 1895 manager->fd_bufsize = sizeof(fd_set); 1896 1897 manager->read_fds = NULL; 1898 manager->read_fds_copy = NULL; 1899 manager->write_fds = NULL; 1900 manager->write_fds_copy = NULL; 1901 1902 manager->read_fds = malloc(manager->fd_bufsize); 1903 if (manager->read_fds != NULL) 1904 manager->read_fds_copy = malloc(manager->fd_bufsize); 1905 if (manager->read_fds_copy != NULL) 1906 manager->write_fds = malloc(manager->fd_bufsize); 1907 if (manager->write_fds != NULL) { 1908 manager->write_fds_copy = malloc(manager->fd_bufsize); 1909 } 1910 if (manager->write_fds_copy == NULL) { 1911 if (manager->write_fds != NULL) { 1912 free(manager->write_fds); 1913 } 1914 if (manager->read_fds_copy != NULL) { 1915 free(manager->read_fds_copy); 1916 } 1917 if (manager->read_fds != NULL) { 1918 free(manager->read_fds); 1919 } 1920 return (ISC_R_NOMEMORY); 1921 } 1922 memset(manager->read_fds, 0, manager->fd_bufsize); 1923 memset(manager->write_fds, 0, manager->fd_bufsize); 1924 1925 manager->maxfd = 0; 1926 1927 return (ISC_R_SUCCESS); 1928 } 1929 1930 static void 1931 cleanup_watcher(isc__socketmgr_t *manager) { 1932 1933 if (manager->read_fds != NULL) 1934 free(manager->read_fds); 1935 if (manager->read_fds_copy != NULL) 1936 free(manager->read_fds_copy); 1937 if (manager->write_fds != NULL) 1938 free(manager->write_fds); 1939 if (manager->write_fds_copy != NULL) 1940 free(manager->write_fds_copy); 1941 } 1942 1943 isc_result_t 1944 isc__socketmgr_create(isc_socketmgr_t **managerp) { 1945 return (isc__socketmgr_create2(managerp, 0)); 1946 } 1947 1948 isc_result_t 1949 isc__socketmgr_create2(isc_socketmgr_t **managerp, 1950 unsigned int maxsocks) 1951 { 1952 isc__socketmgr_t *manager; 1953 isc_result_t result; 1954 1955 REQUIRE(managerp != NULL && *managerp == NULL); 1956 1957 if (socketmgr != NULL) { 1958 /* Don't allow maxsocks to be updated */ 1959 if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) 1960 return (ISC_R_EXISTS); 1961 1962 socketmgr->refs++; 1963 *managerp = (isc_socketmgr_t *)socketmgr; 1964 return (ISC_R_SUCCESS); 1965 } 1966 1967 if (maxsocks == 0) 1968 maxsocks = FD_SETSIZE; 1969 1970 manager = malloc(sizeof(*manager)); 1971 if (manager == NULL) 1972 return (ISC_R_NOMEMORY); 1973 1974 /* zero-clear so that necessary cleanup on failure will be easy */ 1975 memset(manager, 0, sizeof(*manager)); 1976 manager->maxsocks = maxsocks; 1977 manager->fds = malloc(manager->maxsocks * sizeof(isc__socket_t *)); 1978 if (manager->fds == NULL) { 1979 result = ISC_R_NOMEMORY; 1980 goto free_manager; 1981 } 1982 manager->fdstate = malloc(manager->maxsocks * sizeof(int)); 1983 if (manager->fdstate == NULL) { 1984 result = ISC_R_NOMEMORY; 1985 goto free_manager; 1986 } 1987 1988 manager->common.methods = &socketmgrmethods; 1989 manager->common.magic = ISCAPI_SOCKETMGR_MAGIC; 1990 manager->common.impmagic = SOCKET_MANAGER_MAGIC; 1991 memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); 1992 ISC_LIST_INIT(manager->socklist); 1993 1994 manager->refs = 1; 1995 1996 /* 1997 * Set up initial state for the select loop 1998 */ 1999 result = setup_watcher(manager); 2000 if (result != ISC_R_SUCCESS) 2001 goto cleanup; 2002 2003 memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); 2004 2005 socketmgr = manager; 2006 *managerp = (isc_socketmgr_t *)manager; 2007 2008 return (ISC_R_SUCCESS); 2009 2010 cleanup: 2011 2012 free_manager: 2013 if (manager->fdstate != NULL) { 2014 free(manager->fdstate); 2015 } 2016 if (manager->fds != NULL) { 2017 free(manager->fds); 2018 } 2019 free(manager); 2020 2021 return (result); 2022 } 2023 2024 void 2025 isc__socketmgr_destroy(isc_socketmgr_t **managerp) { 2026 isc__socketmgr_t *manager; 2027 int i; 2028 2029 /* 2030 * Destroy a socket manager. 2031 */ 2032 2033 REQUIRE(managerp != NULL); 2034 manager = (isc__socketmgr_t *)*managerp; 2035 REQUIRE(VALID_MANAGER(manager)); 2036 2037 manager->refs--; 2038 if (manager->refs > 0) { 2039 *managerp = NULL; 2040 return; 2041 } 2042 socketmgr = NULL; 2043 2044 /* 2045 * Wait for all sockets to be destroyed. 2046 */ 2047 while (!ISC_LIST_EMPTY(manager->socklist)) { 2048 isc__taskmgr_dispatch(NULL); 2049 } 2050 2051 /* 2052 * Here, poke our select/poll thread. Do this by closing the write 2053 * half of the pipe, which will send EOF to the read half. 2054 * This is currently a no-op in the non-threaded case. 2055 */ 2056 select_poke(manager, 0, SELECT_POKE_SHUTDOWN); 2057 2058 /* 2059 * Clean up. 2060 */ 2061 cleanup_watcher(manager); 2062 2063 for (i = 0; i < (int)manager->maxsocks; i++) 2064 if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ 2065 (void)close(i); 2066 2067 free(manager->fds); 2068 free(manager->fdstate); 2069 2070 manager->common.magic = 0; 2071 manager->common.impmagic = 0; 2072 free(manager); 2073 2074 *managerp = NULL; 2075 2076 socketmgr = NULL; 2077 } 2078 2079 static isc_result_t 2080 socket_recv(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2081 unsigned int flags) 2082 { 2083 int io_state; 2084 isc_task_t *ntask = NULL; 2085 isc_result_t result = ISC_R_SUCCESS; 2086 2087 dev->ev_sender = task; 2088 2089 if (sock->type == isc_sockettype_udp) { 2090 io_state = doio_recv(sock, dev); 2091 } else { 2092 if (ISC_LIST_EMPTY(sock->recv_list)) 2093 io_state = doio_recv(sock, dev); 2094 else 2095 io_state = DOIO_SOFT; 2096 } 2097 2098 switch (io_state) { 2099 case DOIO_SOFT: 2100 /* 2101 * We couldn't read all or part of the request right now, so 2102 * queue it. 2103 * 2104 * Attach to socket and to task 2105 */ 2106 isc_task_attach(task, &ntask); 2107 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2108 2109 /* 2110 * Enqueue the request. If the socket was previously not being 2111 * watched, poke the watcher to start paying attention to it. 2112 */ 2113 if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) 2114 select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 2115 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2116 2117 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2118 "socket_recv: event %p -> task %p", 2119 dev, ntask); 2120 2121 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2122 result = ISC_R_INPROGRESS; 2123 break; 2124 2125 case DOIO_EOF: 2126 dev->result = ISC_R_EOF; 2127 /* fallthrough */ 2128 2129 case DOIO_HARD: 2130 case DOIO_SUCCESS: 2131 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2132 send_recvdone_event(sock, &dev); 2133 break; 2134 } 2135 2136 return (result); 2137 } 2138 2139 isc_result_t 2140 isc__socket_recvv(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2141 unsigned int minimum, isc_task_t *task, 2142 isc_taskaction_t action, void *arg) 2143 { 2144 isc__socket_t *sock = (isc__socket_t *)sock0; 2145 isc_socketevent_t *dev; 2146 isc__socketmgr_t *manager; 2147 unsigned int iocount; 2148 isc_buffer_t *buffer; 2149 2150 REQUIRE(VALID_SOCKET(sock)); 2151 REQUIRE(buflist != NULL); 2152 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2153 REQUIRE(task != NULL); 2154 REQUIRE(action != NULL); 2155 2156 manager = sock->manager; 2157 REQUIRE(VALID_MANAGER(manager)); 2158 2159 iocount = isc_bufferlist_availablecount(buflist); 2160 REQUIRE(iocount > 0); 2161 2162 INSIST(sock->bound); 2163 2164 dev = allocate_socketevent(sock, 2165 ISC_SOCKEVENT_RECVDONE, action, arg); 2166 if (dev == NULL) 2167 return (ISC_R_NOMEMORY); 2168 2169 /* 2170 * UDP sockets are always partial read 2171 */ 2172 if (sock->type == isc_sockettype_udp) 2173 dev->minimum = 1; 2174 else { 2175 if (minimum == 0) 2176 dev->minimum = iocount; 2177 else 2178 dev->minimum = minimum; 2179 } 2180 2181 /* 2182 * Move each buffer from the passed in list to our internal one. 2183 */ 2184 buffer = ISC_LIST_HEAD(*buflist); 2185 while (buffer != NULL) { 2186 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2187 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2188 buffer = ISC_LIST_HEAD(*buflist); 2189 } 2190 2191 return (socket_recv(sock, dev, task, 0)); 2192 } 2193 2194 static isc_result_t 2195 socket_send(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2196 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2197 unsigned int flags) 2198 { 2199 int io_state; 2200 isc_task_t *ntask = NULL; 2201 isc_result_t result = ISC_R_SUCCESS; 2202 2203 dev->ev_sender = task; 2204 2205 set_dev_address(address, sock, dev); 2206 if (pktinfo != NULL) { 2207 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 2208 dev->pktinfo = *pktinfo; 2209 2210 if (!isc_sockaddr_issitelocal(&dev->address) && 2211 !isc_sockaddr_islinklocal(&dev->address)) { 2212 socket_log(sock, NULL, TRACE, 2213 "pktinfo structure provided, ifindex %u " 2214 "(set to 0)", pktinfo->ipi6_ifindex); 2215 2216 /* 2217 * Set the pktinfo index to 0 here, to let the 2218 * kernel decide what interface it should send on. 2219 */ 2220 dev->pktinfo.ipi6_ifindex = 0; 2221 } 2222 } 2223 2224 if (sock->type == isc_sockettype_udp) 2225 io_state = doio_send(sock, dev); 2226 else { 2227 if (ISC_LIST_EMPTY(sock->send_list)) 2228 io_state = doio_send(sock, dev); 2229 else 2230 io_state = DOIO_SOFT; 2231 } 2232 2233 switch (io_state) { 2234 case DOIO_SOFT: 2235 /* 2236 * We couldn't send all or part of the request right now, so 2237 * queue it unless ISC_SOCKFLAG_NORETRY is set. 2238 */ 2239 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 2240 isc_task_attach(task, &ntask); 2241 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2242 2243 /* 2244 * Enqueue the request. If the socket was previously 2245 * not being watched, poke the watcher to start 2246 * paying attention to it. 2247 */ 2248 if (ISC_LIST_EMPTY(sock->send_list) && 2249 !sock->pending_send) 2250 select_poke(sock->manager, sock->fd, 2251 SELECT_POKE_WRITE); 2252 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 2253 2254 socket_log(sock, NULL, EVENT, NULL, 0, 0, 2255 "socket_send: event %p -> task %p", 2256 dev, ntask); 2257 2258 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2259 result = ISC_R_INPROGRESS; 2260 break; 2261 } 2262 2263 /* FALLTHROUGH */ 2264 2265 case DOIO_HARD: 2266 case DOIO_SUCCESS: 2267 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 2268 send_senddone_event(sock, &dev); 2269 break; 2270 } 2271 2272 return (result); 2273 } 2274 2275 isc_result_t 2276 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2277 isc_task_t *task, isc_taskaction_t action, void *arg) 2278 { 2279 return (isc__socket_sendtov2(sock, buflist, task, action, arg, NULL, 2280 NULL, 0)); 2281 } 2282 2283 isc_result_t 2284 isc__socket_sendtov2(isc_socket_t *sock0, isc_bufferlist_t *buflist, 2285 isc_task_t *task, isc_taskaction_t action, void *arg, 2286 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2287 unsigned int flags) 2288 { 2289 isc__socket_t *sock = (isc__socket_t *)sock0; 2290 isc_socketevent_t *dev; 2291 isc__socketmgr_t *manager; 2292 unsigned int iocount; 2293 isc_buffer_t *buffer; 2294 2295 REQUIRE(VALID_SOCKET(sock)); 2296 REQUIRE(buflist != NULL); 2297 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2298 REQUIRE(task != NULL); 2299 REQUIRE(action != NULL); 2300 2301 manager = sock->manager; 2302 REQUIRE(VALID_MANAGER(manager)); 2303 2304 iocount = isc_bufferlist_usedcount(buflist); 2305 REQUIRE(iocount > 0); 2306 2307 dev = allocate_socketevent(sock, 2308 ISC_SOCKEVENT_SENDDONE, action, arg); 2309 if (dev == NULL) 2310 return (ISC_R_NOMEMORY); 2311 2312 /* 2313 * Move each buffer from the passed in list to our internal one. 2314 */ 2315 buffer = ISC_LIST_HEAD(*buflist); 2316 while (buffer != NULL) { 2317 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2318 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2319 buffer = ISC_LIST_HEAD(*buflist); 2320 } 2321 2322 return (socket_send(sock, dev, task, address, pktinfo, flags)); 2323 } 2324 2325 isc_result_t 2326 isc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr, 2327 unsigned int options) { 2328 isc__socket_t *sock = (isc__socket_t *)sock0; 2329 char strbuf[ISC_STRERRORSIZE]; 2330 int on = 1; 2331 2332 REQUIRE(VALID_SOCKET(sock)); 2333 2334 INSIST(!sock->bound); 2335 2336 if (sock->pf != sockaddr->type.sa.sa_family) { 2337 return (ISC_R_FAMILYMISMATCH); 2338 } 2339 2340 /* 2341 * Only set SO_REUSEADDR when we want a specific port. 2342 */ 2343 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 2344 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 2345 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 2346 sizeof(on)) < 0) { 2347 UNEXPECTED_ERROR(__FILE__, __LINE__, 2348 "setsockopt(%d) %s", sock->fd, "failed"); 2349 /* Press on... */ 2350 } 2351 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 2352 switch (errno) { 2353 case EACCES: 2354 return (ISC_R_NOPERM); 2355 case EADDRNOTAVAIL: 2356 return (ISC_R_ADDRNOTAVAIL); 2357 case EADDRINUSE: 2358 return (ISC_R_ADDRINUSE); 2359 case EINVAL: 2360 return (ISC_R_BOUND); 2361 default: 2362 isc__strerror(errno, strbuf, sizeof(strbuf)); 2363 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 2364 strbuf); 2365 return (ISC_R_UNEXPECTED); 2366 } 2367 } 2368 2369 socket_log(sock, sockaddr, TRACE, "bound"); 2370 sock->bound = 1; 2371 2372 return (ISC_R_SUCCESS); 2373 } 2374 2375 isc_result_t 2376 isc__socket_connect(isc_socket_t *sock0, isc_sockaddr_t *addr, 2377 isc_task_t *task, isc_taskaction_t action, void *arg) 2378 { 2379 isc__socket_t *sock = (isc__socket_t *)sock0; 2380 isc_socket_connev_t *dev; 2381 isc_task_t *ntask = NULL; 2382 isc__socketmgr_t *manager; 2383 int cc; 2384 char strbuf[ISC_STRERRORSIZE]; 2385 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 2386 2387 REQUIRE(VALID_SOCKET(sock)); 2388 REQUIRE(addr != NULL); 2389 REQUIRE(task != NULL); 2390 REQUIRE(action != NULL); 2391 2392 manager = sock->manager; 2393 REQUIRE(VALID_MANAGER(manager)); 2394 REQUIRE(addr != NULL); 2395 2396 if (isc_sockaddr_ismulticast(addr)) 2397 return (ISC_R_MULTICAST); 2398 2399 REQUIRE(!sock->connecting); 2400 2401 dev = (isc_socket_connev_t *)isc_event_allocate(sock, 2402 ISC_SOCKEVENT_CONNECT, 2403 action, arg, 2404 sizeof(*dev)); 2405 if (dev == NULL) { 2406 return (ISC_R_NOMEMORY); 2407 } 2408 ISC_LINK_INIT(dev, ev_link); 2409 2410 /* 2411 * Try to do the connect right away, as there can be only one 2412 * outstanding, and it might happen to complete. 2413 */ 2414 sock->peer_address = *addr; 2415 cc = connect(sock->fd, &addr->type.sa, addr->length); 2416 if (cc < 0) { 2417 /* 2418 * HP-UX "fails" to connect a UDP socket and sets errno to 2419 * EINPROGRESS if it's non-blocking. We'd rather regard this as 2420 * a success and let the user detect it if it's really an error 2421 * at the time of sending a packet on the socket. 2422 */ 2423 if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { 2424 cc = 0; 2425 goto success; 2426 } 2427 if (SOFT_ERROR(errno) || errno == EINPROGRESS) 2428 goto queue; 2429 2430 switch (errno) { 2431 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit; 2432 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2433 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2434 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2435 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2436 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2437 #ifdef EHOSTDOWN 2438 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2439 #endif 2440 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2441 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2442 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2443 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2444 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2445 #undef ERROR_MATCH 2446 } 2447 2448 sock->connected = 0; 2449 2450 isc__strerror(errno, strbuf, sizeof(strbuf)); 2451 isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); 2452 UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", 2453 addrbuf, errno, strbuf); 2454 2455 isc_event_free(ISC_EVENT_PTR(&dev)); 2456 return (ISC_R_UNEXPECTED); 2457 2458 err_exit: 2459 sock->connected = 0; 2460 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2461 2462 return (ISC_R_SUCCESS); 2463 } 2464 2465 /* 2466 * If connect completed, fire off the done event. 2467 */ 2468 success: 2469 if (cc == 0) { 2470 sock->connected = 1; 2471 sock->bound = 1; 2472 dev->result = ISC_R_SUCCESS; 2473 isc_task_send(task, ISC_EVENT_PTR(&dev)); 2474 2475 return (ISC_R_SUCCESS); 2476 } 2477 2478 queue: 2479 2480 /* 2481 * Attach to task. 2482 */ 2483 isc_task_attach(task, &ntask); 2484 2485 sock->connecting = 1; 2486 2487 dev->ev_sender = ntask; 2488 2489 /* 2490 * Poke watcher here. We still have the socket locked, so there 2491 * is no race condition. We will keep the lock for such a short 2492 * bit of time waking it up now or later won't matter all that much. 2493 */ 2494 if (sock->connect_ev == NULL) 2495 select_poke(manager, sock->fd, SELECT_POKE_CONNECT); 2496 2497 sock->connect_ev = dev; 2498 2499 return (ISC_R_SUCCESS); 2500 } 2501 2502 /* 2503 * Called when a socket with a pending connect() finishes. 2504 */ 2505 static void 2506 internal_connect(isc_task_t *me, isc_event_t *ev) { 2507 isc__socket_t *sock; 2508 isc_socket_connev_t *dev; 2509 isc_task_t *task; 2510 int cc; 2511 socklen_t optlen; 2512 char strbuf[ISC_STRERRORSIZE]; 2513 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 2514 2515 UNUSED(me); 2516 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 2517 2518 sock = ev->ev_sender; 2519 INSIST(VALID_SOCKET(sock)); 2520 2521 /* 2522 * When the internal event was sent the reference count was bumped 2523 * to keep the socket around for us. Decrement the count here. 2524 */ 2525 INSIST(sock->references > 0); 2526 sock->references--; 2527 if (sock->references == 0) { 2528 destroy(&sock); 2529 return; 2530 } 2531 2532 /* 2533 * Has this event been canceled? 2534 */ 2535 dev = sock->connect_ev; 2536 if (dev == NULL) { 2537 INSIST(!sock->connecting); 2538 return; 2539 } 2540 2541 INSIST(sock->connecting); 2542 sock->connecting = 0; 2543 2544 /* 2545 * Get any possible error status here. 2546 */ 2547 optlen = sizeof(cc); 2548 if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, 2549 (void *)&cc, (void *)&optlen) < 0) 2550 cc = errno; 2551 else 2552 errno = cc; 2553 2554 if (errno != 0) { 2555 /* 2556 * If the error is EAGAIN, just re-select on this 2557 * fd and pretend nothing strange happened. 2558 */ 2559 if (SOFT_ERROR(errno) || errno == EINPROGRESS) { 2560 sock->connecting = 1; 2561 select_poke(sock->manager, sock->fd, 2562 SELECT_POKE_CONNECT); 2563 return; 2564 } 2565 2566 2567 /* 2568 * Translate other errors into ISC_R_* flavors. 2569 */ 2570 switch (errno) { 2571 #define ERROR_MATCH(a, b) case a: dev->result = b; break; 2572 ERROR_MATCH(EACCES, ISC_R_NOPERM); 2573 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2574 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2575 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 2576 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 2577 #ifdef EHOSTDOWN 2578 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 2579 #endif 2580 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 2581 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 2582 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 2583 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 2584 ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); 2585 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 2586 #undef ERROR_MATCH 2587 default: 2588 dev->result = ISC_R_UNEXPECTED; 2589 isc_sockaddr_format(&sock->peer_address, peerbuf, 2590 sizeof(peerbuf)); 2591 isc__strerror(errno, strbuf, sizeof(strbuf)); 2592 UNEXPECTED_ERROR(__FILE__, __LINE__, 2593 "internal_connect: connect(%s) %s", 2594 peerbuf, strbuf); 2595 } 2596 } else { 2597 dev->result = ISC_R_SUCCESS; 2598 sock->connected = 1; 2599 sock->bound = 1; 2600 } 2601 2602 sock->connect_ev = NULL; 2603 2604 task = dev->ev_sender; 2605 dev->ev_sender = sock; 2606 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev)); 2607 } 2608 2609 /* 2610 * Run through the list of events on this socket, and cancel the ones 2611 * queued for task "task" of type "how". "how" is a bitmask. 2612 */ 2613 void 2614 isc__socket_cancel(isc_socket_t *sock0, isc_task_t *task, unsigned int how) { 2615 isc__socket_t *sock = (isc__socket_t *)sock0; 2616 2617 REQUIRE(VALID_SOCKET(sock)); 2618 2619 /* 2620 * Quick exit if there is nothing to do. Don't even bother locking 2621 * in this case. 2622 */ 2623 if (how == 0) 2624 return; 2625 2626 /* 2627 * All of these do the same thing, more or less. 2628 * Each will: 2629 * o If the internal event is marked as "posted" try to 2630 * remove it from the task's queue. If this fails, mark it 2631 * as canceled instead, and let the task clean it up later. 2632 * o For each I/O request for that task of that type, post 2633 * its done event with status of "ISC_R_CANCELED". 2634 * o Reset any state needed. 2635 */ 2636 if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) 2637 && !ISC_LIST_EMPTY(sock->recv_list)) { 2638 isc_socketevent_t *dev; 2639 isc_socketevent_t *next; 2640 isc_task_t *current_task; 2641 2642 dev = ISC_LIST_HEAD(sock->recv_list); 2643 2644 while (dev != NULL) { 2645 current_task = dev->ev_sender; 2646 next = ISC_LIST_NEXT(dev, ev_link); 2647 2648 if ((task == NULL) || (task == current_task)) { 2649 dev->result = ISC_R_CANCELED; 2650 send_recvdone_event(sock, &dev); 2651 } 2652 dev = next; 2653 } 2654 } 2655 2656 if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) 2657 && !ISC_LIST_EMPTY(sock->send_list)) { 2658 isc_socketevent_t *dev; 2659 isc_socketevent_t *next; 2660 isc_task_t *current_task; 2661 2662 dev = ISC_LIST_HEAD(sock->send_list); 2663 2664 while (dev != NULL) { 2665 current_task = dev->ev_sender; 2666 next = ISC_LIST_NEXT(dev, ev_link); 2667 2668 if ((task == NULL) || (task == current_task)) { 2669 dev->result = ISC_R_CANCELED; 2670 send_senddone_event(sock, &dev); 2671 } 2672 dev = next; 2673 } 2674 } 2675 2676 /* 2677 * Connecting is not a list. 2678 */ 2679 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 2680 && sock->connect_ev != NULL) { 2681 isc_socket_connev_t *dev; 2682 isc_task_t *current_task; 2683 2684 INSIST(sock->connecting); 2685 sock->connecting = 0; 2686 2687 dev = sock->connect_ev; 2688 current_task = dev->ev_sender; 2689 2690 if ((task == NULL) || (task == current_task)) { 2691 sock->connect_ev = NULL; 2692 2693 dev->result = ISC_R_CANCELED; 2694 dev->ev_sender = sock; 2695 isc_task_sendanddetach(¤t_task, 2696 ISC_EVENT_PTR(&dev)); 2697 } 2698 } 2699 2700 } 2701 2702 /* 2703 * In our assumed scenario, we can simply use a single static object. 2704 * XXX: this is not true if the application uses multiple threads with 2705 * 'multi-context' mode. Fixing this is a future TODO item. 2706 */ 2707 static isc_socketwait_t swait_private; 2708 2709 int 2710 isc__socketmgr_waitevents(isc_socketmgr_t *manager0, struct timeval *tvp, 2711 isc_socketwait_t **swaitp) 2712 { 2713 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2714 int n; 2715 2716 REQUIRE(swaitp != NULL && *swaitp == NULL); 2717 2718 if (manager == NULL) 2719 manager = socketmgr; 2720 if (manager == NULL) 2721 return (0); 2722 2723 memmove(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); 2724 memmove(manager->write_fds_copy, manager->write_fds, 2725 manager->fd_bufsize); 2726 2727 swait_private.readset = manager->read_fds_copy; 2728 swait_private.writeset = manager->write_fds_copy; 2729 swait_private.maxfd = manager->maxfd + 1; 2730 2731 n = select(swait_private.maxfd, swait_private.readset, 2732 swait_private.writeset, NULL, tvp); 2733 2734 *swaitp = &swait_private; 2735 return (n); 2736 } 2737 2738 isc_result_t 2739 isc__socketmgr_dispatch(isc_socketmgr_t *manager0, isc_socketwait_t *swait) { 2740 isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2741 2742 REQUIRE(swait == &swait_private); 2743 2744 if (manager == NULL) 2745 manager = socketmgr; 2746 if (manager == NULL) 2747 return (ISC_R_NOTFOUND); 2748 2749 process_fds(manager, swait->maxfd, swait->readset, swait->writeset); 2750 return (ISC_R_SUCCESS); 2751 } 2752 2753 #include "../socket_api.c" 2754