1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ 68 */ 69 70 #include "opt_inet.h" 71 #include "opt_sctp.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/fcntl.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/file.h> /* for struct knote */ 80 #include <sys/kernel.h> 81 #include <sys/event.h> 82 #include <sys/proc.h> 83 #include <sys/protosw.h> 84 #include <sys/socket.h> 85 #include <sys/socketvar.h> 86 #include <sys/socketops.h> 87 #include <sys/resourcevar.h> 88 #include <sys/signalvar.h> 89 #include <sys/sysctl.h> 90 #include <sys/uio.h> 91 #include <sys/jail.h> 92 #include <vm/vm_zone.h> 93 #include <vm/pmap.h> 94 #include <net/netmsg2.h> 95 96 #include <sys/thread2.h> 97 #include <sys/socketvar2.h> 98 #include <sys/spinlock2.h> 99 100 #include <machine/limits.h> 101 102 #ifdef INET 103 extern int tcp_sosend_agglim; 104 extern int tcp_sosend_async; 105 extern int udp_sosend_async; 106 extern int udp_sosend_prepend; 107 108 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 109 #endif /* INET */ 110 111 static void filt_sordetach(struct knote *kn); 112 static int filt_soread(struct knote *kn, long hint); 113 static void filt_sowdetach(struct knote *kn); 114 static int filt_sowrite(struct knote *kn, long hint); 115 static int filt_solisten(struct knote *kn, long hint); 116 117 static void sodiscard(struct socket *so); 118 static int soclose_sync(struct socket *so, int fflag); 119 static void soclose_fast(struct socket *so); 120 121 static struct filterops solisten_filtops = 122 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_solisten }; 123 static struct filterops soread_filtops = 124 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 125 static struct filterops sowrite_filtops = 126 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sowdetach, filt_sowrite }; 127 static struct filterops soexcept_filtops = 128 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 129 130 MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); 131 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 132 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 133 134 135 static int somaxconn = SOMAXCONN; 136 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 137 &somaxconn, 0, "Maximum pending socket connection queue size"); 138 139 static int use_soclose_fast = 1; 140 SYSCTL_INT(_kern_ipc, OID_AUTO, soclose_fast, CTLFLAG_RW, 141 &use_soclose_fast, 0, "Fast socket close"); 142 143 int use_soaccept_pred_fast = 1; 144 SYSCTL_INT(_kern_ipc, OID_AUTO, soaccept_pred_fast, CTLFLAG_RW, 145 &use_soaccept_pred_fast, 0, "Fast socket accept predication"); 146 147 int use_sendfile_async = 1; 148 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfile_async, CTLFLAG_RW, 149 &use_sendfile_async, 0, "sendfile uses asynchronized pru_send"); 150 151 /* 152 * Socket operation routines. 153 * These routines are called by the routines in 154 * sys_socket.c or from a system process, and 155 * implement the semantics of socket operations by 156 * switching out to the protocol specific routines. 157 */ 158 159 /* 160 * Get a socket structure, and initialize it. 161 * Note that it would probably be better to allocate socket 162 * and PCB at the same time, but I'm not convinced that all 163 * the protocols can be easily modified to do this. 164 */ 165 struct socket * 166 soalloc(int waitok, struct protosw *pr) 167 { 168 struct socket *so; 169 unsigned waitmask; 170 171 waitmask = waitok ? M_WAITOK : M_NOWAIT; 172 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); 173 if (so) { 174 /* XXX race condition for reentrant kernel */ 175 so->so_proto = pr; 176 TAILQ_INIT(&so->so_aiojobq); 177 TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); 178 TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); 179 lwkt_token_init(&so->so_rcv.ssb_token, "rcvtok"); 180 lwkt_token_init(&so->so_snd.ssb_token, "sndtok"); 181 spin_init(&so->so_rcvd_spin); 182 netmsg_init(&so->so_rcvd_msg.base, so, &netisr_adone_rport, 183 MSGF_DROPABLE, so->so_proto->pr_usrreqs->pru_rcvd); 184 so->so_rcvd_msg.nm_pru_flags |= PRUR_ASYNC; 185 so->so_state = SS_NOFDREF; 186 so->so_refs = 1; 187 } 188 return so; 189 } 190 191 int 192 socreate(int dom, struct socket **aso, int type, 193 int proto, struct thread *td) 194 { 195 struct proc *p = td->td_proc; 196 struct protosw *prp; 197 struct socket *so; 198 struct pru_attach_info ai; 199 int error; 200 201 if (proto) 202 prp = pffindproto(dom, proto, type); 203 else 204 prp = pffindtype(dom, type); 205 206 if (prp == NULL || prp->pr_usrreqs->pru_attach == 0) 207 return (EPROTONOSUPPORT); 208 209 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only && 210 prp->pr_domain->dom_family != PF_LOCAL && 211 prp->pr_domain->dom_family != PF_INET && 212 prp->pr_domain->dom_family != PF_INET6 && 213 prp->pr_domain->dom_family != PF_ROUTE) { 214 return (EPROTONOSUPPORT); 215 } 216 217 if (prp->pr_type != type) 218 return (EPROTOTYPE); 219 so = soalloc(p != NULL, prp); 220 if (so == NULL) 221 return (ENOBUFS); 222 223 /* 224 * Callers of socreate() presumably will connect up a descriptor 225 * and call soclose() if they cannot. This represents our so_refs 226 * (which should be 1) from soalloc(). 227 */ 228 soclrstate(so, SS_NOFDREF); 229 230 /* 231 * Set a default port for protocol processing. No action will occur 232 * on the socket on this port until an inpcb is attached to it and 233 * is able to match incoming packets, or until the socket becomes 234 * available to userland. 235 * 236 * We normally default the socket to the protocol thread on cpu 0. 237 * If PR_SYNC_PORT is set (unix domain sockets) there is no protocol 238 * thread and all pr_*()/pru_*() calls are executed synchronously. 239 */ 240 if (prp->pr_flags & PR_SYNC_PORT) 241 so->so_port = &netisr_sync_port; 242 else 243 so->so_port = netisr_portfn(0); 244 245 TAILQ_INIT(&so->so_incomp); 246 TAILQ_INIT(&so->so_comp); 247 so->so_type = type; 248 so->so_cred = crhold(p->p_ucred); 249 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; 250 ai.p_ucred = p->p_ucred; 251 ai.fd_rdir = p->p_fd->fd_rdir; 252 253 /* 254 * Auto-sizing of socket buffers is managed by the protocols and 255 * the appropriate flags must be set in the pru_attach function. 256 */ 257 error = so_pru_attach(so, proto, &ai); 258 if (error) { 259 sosetstate(so, SS_NOFDREF); 260 sofree(so); /* from soalloc */ 261 return error; 262 } 263 264 /* 265 * NOTE: Returns referenced socket. 266 */ 267 *aso = so; 268 return (0); 269 } 270 271 int 272 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 273 { 274 int error; 275 276 error = so_pru_bind(so, nam, td); 277 return (error); 278 } 279 280 static void 281 sodealloc(struct socket *so) 282 { 283 if (so->so_rcv.ssb_hiwat) 284 (void)chgsbsize(so->so_cred->cr_uidinfo, 285 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); 286 if (so->so_snd.ssb_hiwat) 287 (void)chgsbsize(so->so_cred->cr_uidinfo, 288 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); 289 #ifdef INET 290 /* remove accept filter if present */ 291 if (so->so_accf != NULL) 292 do_setopt_accept_filter(so, NULL); 293 #endif /* INET */ 294 crfree(so->so_cred); 295 if (so->so_faddr != NULL) 296 kfree(so->so_faddr, M_SONAME); 297 kfree(so, M_SOCKET); 298 } 299 300 int 301 solisten(struct socket *so, int backlog, struct thread *td) 302 { 303 int error; 304 #ifdef SCTP 305 short oldopt, oldqlimit; 306 #endif /* SCTP */ 307 308 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) 309 return (EINVAL); 310 311 #ifdef SCTP 312 oldopt = so->so_options; 313 oldqlimit = so->so_qlimit; 314 #endif /* SCTP */ 315 316 lwkt_gettoken(&so->so_rcv.ssb_token); 317 if (TAILQ_EMPTY(&so->so_comp)) 318 so->so_options |= SO_ACCEPTCONN; 319 lwkt_reltoken(&so->so_rcv.ssb_token); 320 if (backlog < 0 || backlog > somaxconn) 321 backlog = somaxconn; 322 so->so_qlimit = backlog; 323 /* SCTP needs to look at tweak both the inbound backlog parameter AND 324 * the so_options (UDP model both connect's and gets inbound 325 * connections .. implicitly). 326 */ 327 error = so_pru_listen(so, td); 328 if (error) { 329 #ifdef SCTP 330 /* Restore the params */ 331 so->so_options = oldopt; 332 so->so_qlimit = oldqlimit; 333 #endif /* SCTP */ 334 return (error); 335 } 336 return (0); 337 } 338 339 /* 340 * Destroy a disconnected socket. This routine is a NOP if entities 341 * still have a reference on the socket: 342 * 343 * so_pcb - The protocol stack still has a reference 344 * SS_NOFDREF - There is no longer a file pointer reference 345 */ 346 void 347 sofree(struct socket *so) 348 { 349 struct socket *head; 350 351 /* 352 * This is a bit hackish at the moment. We need to interlock 353 * any accept queue we are on before we potentially lose the 354 * last reference to avoid races against a re-reference from 355 * someone operating on the queue. 356 */ 357 while ((head = so->so_head) != NULL) { 358 lwkt_getpooltoken(head); 359 if (so->so_head == head) 360 break; 361 lwkt_relpooltoken(head); 362 } 363 364 /* 365 * Arbitrage the last free. 366 */ 367 KKASSERT(so->so_refs > 0); 368 if (atomic_fetchadd_int(&so->so_refs, -1) != 1) { 369 if (head) 370 lwkt_relpooltoken(head); 371 return; 372 } 373 374 KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); 375 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 376 377 /* 378 * We're done, remove ourselves from the accept queue we are 379 * on, if we are on one. 380 */ 381 if (head != NULL) { 382 if (so->so_state & SS_INCOMP) { 383 TAILQ_REMOVE(&head->so_incomp, so, so_list); 384 head->so_incqlen--; 385 } else if (so->so_state & SS_COMP) { 386 /* 387 * We must not decommission a socket that's 388 * on the accept(2) queue. If we do, then 389 * accept(2) may hang after select(2) indicated 390 * that the listening socket was ready. 391 */ 392 lwkt_relpooltoken(head); 393 return; 394 } else { 395 panic("sofree: not queued"); 396 } 397 soclrstate(so, SS_INCOMP); 398 so->so_head = NULL; 399 lwkt_relpooltoken(head); 400 } 401 ssb_release(&so->so_snd, so); 402 sorflush(so); 403 sodealloc(so); 404 } 405 406 /* 407 * Close a socket on last file table reference removal. 408 * Initiate disconnect if connected. 409 * Free socket when disconnect complete. 410 */ 411 int 412 soclose(struct socket *so, int fflag) 413 { 414 int error; 415 416 funsetown(&so->so_sigio); 417 if (!use_soclose_fast || 418 (so->so_proto->pr_flags & PR_SYNC_PORT) || 419 (so->so_options & SO_LINGER)) { 420 error = soclose_sync(so, fflag); 421 } else { 422 soclose_fast(so); 423 error = 0; 424 } 425 return error; 426 } 427 428 static void 429 sodiscard(struct socket *so) 430 { 431 lwkt_getpooltoken(so); 432 if (so->so_options & SO_ACCEPTCONN) { 433 struct socket *sp; 434 435 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 436 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 437 soclrstate(sp, SS_INCOMP); 438 sp->so_head = NULL; 439 so->so_incqlen--; 440 soaborta(sp); 441 } 442 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 443 TAILQ_REMOVE(&so->so_comp, sp, so_list); 444 soclrstate(sp, SS_COMP); 445 sp->so_head = NULL; 446 so->so_qlen--; 447 soaborta(sp); 448 } 449 } 450 lwkt_relpooltoken(so); 451 452 if (so->so_state & SS_NOFDREF) 453 panic("soclose: NOFDREF"); 454 sosetstate(so, SS_NOFDREF); /* take ref */ 455 } 456 457 static int 458 soclose_sync(struct socket *so, int fflag) 459 { 460 int error = 0; 461 462 if (so->so_pcb == NULL) 463 goto discard; 464 if (so->so_state & SS_ISCONNECTED) { 465 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 466 error = sodisconnect(so); 467 if (error) 468 goto drop; 469 } 470 if (so->so_options & SO_LINGER) { 471 if ((so->so_state & SS_ISDISCONNECTING) && 472 (fflag & FNONBLOCK)) 473 goto drop; 474 while (so->so_state & SS_ISCONNECTED) { 475 error = tsleep(&so->so_timeo, PCATCH, 476 "soclos", so->so_linger * hz); 477 if (error) 478 break; 479 } 480 } 481 } 482 drop: 483 if (so->so_pcb) { 484 int error2; 485 486 error2 = so_pru_detach(so); 487 if (error == 0) 488 error = error2; 489 } 490 discard: 491 sodiscard(so); 492 so_pru_sync(so); /* unpend async sending */ 493 sofree(so); /* dispose of ref */ 494 495 return (error); 496 } 497 498 static void 499 soclose_sofree_async_handler(netmsg_t msg) 500 { 501 sofree(msg->base.nm_so); 502 } 503 504 static void 505 soclose_sofree_async(struct socket *so) 506 { 507 struct netmsg_base *base = &so->so_clomsg; 508 509 netmsg_init(base, so, &netisr_apanic_rport, 0, 510 soclose_sofree_async_handler); 511 lwkt_sendmsg(so->so_port, &base->lmsg); 512 } 513 514 static void 515 soclose_disconn_async_handler(netmsg_t msg) 516 { 517 struct socket *so = msg->base.nm_so; 518 519 if ((so->so_state & SS_ISCONNECTED) && 520 (so->so_state & SS_ISDISCONNECTING) == 0) 521 so_pru_disconnect_direct(so); 522 523 if (so->so_pcb) 524 so_pru_detach_direct(so); 525 526 sodiscard(so); 527 sofree(so); 528 } 529 530 static void 531 soclose_disconn_async(struct socket *so) 532 { 533 struct netmsg_base *base = &so->so_clomsg; 534 535 netmsg_init(base, so, &netisr_apanic_rport, 0, 536 soclose_disconn_async_handler); 537 lwkt_sendmsg(so->so_port, &base->lmsg); 538 } 539 540 static void 541 soclose_detach_async_handler(netmsg_t msg) 542 { 543 struct socket *so = msg->base.nm_so; 544 545 if (so->so_pcb) 546 so_pru_detach_direct(so); 547 548 sodiscard(so); 549 sofree(so); 550 } 551 552 static void 553 soclose_detach_async(struct socket *so) 554 { 555 struct netmsg_base *base = &so->so_clomsg; 556 557 netmsg_init(base, so, &netisr_apanic_rport, 0, 558 soclose_detach_async_handler); 559 lwkt_sendmsg(so->so_port, &base->lmsg); 560 } 561 562 static void 563 soclose_fast(struct socket *so) 564 { 565 if (so->so_pcb == NULL) 566 goto discard; 567 568 if ((so->so_state & SS_ISCONNECTED) && 569 (so->so_state & SS_ISDISCONNECTING) == 0) { 570 soclose_disconn_async(so); 571 return; 572 } 573 574 if (so->so_pcb) { 575 soclose_detach_async(so); 576 return; 577 } 578 579 discard: 580 sodiscard(so); 581 soclose_sofree_async(so); 582 } 583 584 /* 585 * Abort and destroy a socket. Only one abort can be in progress 586 * at any given moment. 587 */ 588 void 589 soabort(struct socket *so) 590 { 591 soreference(so); 592 so_pru_abort(so); 593 } 594 595 void 596 soaborta(struct socket *so) 597 { 598 soreference(so); 599 so_pru_aborta(so); 600 } 601 602 void 603 soabort_oncpu(struct socket *so) 604 { 605 soreference(so); 606 so_pru_abort_oncpu(so); 607 } 608 609 /* 610 * so is passed in ref'd, which becomes owned by 611 * the cleared SS_NOFDREF flag. 612 */ 613 void 614 soaccept_generic(struct socket *so) 615 { 616 if ((so->so_state & SS_NOFDREF) == 0) 617 panic("soaccept: !NOFDREF"); 618 soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ 619 } 620 621 int 622 soaccept(struct socket *so, struct sockaddr **nam) 623 { 624 int error; 625 626 soaccept_generic(so); 627 error = so_pru_accept(so, nam); 628 return (error); 629 } 630 631 int 632 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 633 { 634 int error; 635 636 if (so->so_options & SO_ACCEPTCONN) 637 return (EOPNOTSUPP); 638 /* 639 * If protocol is connection-based, can only connect once. 640 * Otherwise, if connected, try to disconnect first. 641 * This allows user to disconnect by connecting to, e.g., 642 * a null address. 643 */ 644 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 645 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 646 (error = sodisconnect(so)))) { 647 error = EISCONN; 648 } else { 649 /* 650 * Prevent accumulated error from previous connection 651 * from biting us. 652 */ 653 so->so_error = 0; 654 error = so_pru_connect(so, nam, td); 655 } 656 return (error); 657 } 658 659 int 660 soconnect2(struct socket *so1, struct socket *so2) 661 { 662 int error; 663 664 error = so_pru_connect2(so1, so2); 665 return (error); 666 } 667 668 int 669 sodisconnect(struct socket *so) 670 { 671 int error; 672 673 if ((so->so_state & SS_ISCONNECTED) == 0) { 674 error = ENOTCONN; 675 goto bad; 676 } 677 if (so->so_state & SS_ISDISCONNECTING) { 678 error = EALREADY; 679 goto bad; 680 } 681 error = so_pru_disconnect(so); 682 bad: 683 return (error); 684 } 685 686 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 687 /* 688 * Send on a socket. 689 * If send must go all at once and message is larger than 690 * send buffering, then hard error. 691 * Lock against other senders. 692 * If must go all at once and not enough room now, then 693 * inform user that this would block and do nothing. 694 * Otherwise, if nonblocking, send as much as possible. 695 * The data to be sent is described by "uio" if nonzero, 696 * otherwise by the mbuf chain "top" (which must be null 697 * if uio is not). Data provided in mbuf chain must be small 698 * enough to send all at once. 699 * 700 * Returns nonzero on error, timeout or signal; callers 701 * must check for short counts if EINTR/ERESTART are returned. 702 * Data and control buffers are freed on return. 703 */ 704 int 705 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 706 struct mbuf *top, struct mbuf *control, int flags, 707 struct thread *td) 708 { 709 struct mbuf **mp; 710 struct mbuf *m; 711 size_t resid; 712 int space, len; 713 int clen = 0, error, dontroute, mlen; 714 int atomic = sosendallatonce(so) || top; 715 int pru_flags; 716 717 if (uio) { 718 resid = uio->uio_resid; 719 } else { 720 resid = (size_t)top->m_pkthdr.len; 721 #ifdef INVARIANTS 722 len = 0; 723 for (m = top; m; m = m->m_next) 724 len += m->m_len; 725 KKASSERT(top->m_pkthdr.len == len); 726 #endif 727 } 728 729 /* 730 * WARNING! resid is unsigned, space and len are signed. space 731 * can wind up negative if the sockbuf is overcommitted. 732 * 733 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 734 * type sockets since that's an error. 735 */ 736 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 737 error = EINVAL; 738 goto out; 739 } 740 741 dontroute = 742 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 743 (so->so_proto->pr_flags & PR_ATOMIC); 744 if (td->td_lwp != NULL) 745 td->td_lwp->lwp_ru.ru_msgsnd++; 746 if (control) 747 clen = control->m_len; 748 #define gotoerr(errcode) { error = errcode; goto release; } 749 750 restart: 751 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 752 if (error) 753 goto out; 754 755 do { 756 if (so->so_state & SS_CANTSENDMORE) 757 gotoerr(EPIPE); 758 if (so->so_error) { 759 error = so->so_error; 760 so->so_error = 0; 761 goto release; 762 } 763 if ((so->so_state & SS_ISCONNECTED) == 0) { 764 /* 765 * `sendto' and `sendmsg' is allowed on a connection- 766 * based socket if it supports implied connect. 767 * Return ENOTCONN if not connected and no address is 768 * supplied. 769 */ 770 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 771 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 772 if ((so->so_state & SS_ISCONFIRMING) == 0 && 773 !(resid == 0 && clen != 0)) 774 gotoerr(ENOTCONN); 775 } else if (addr == NULL) 776 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 777 ENOTCONN : EDESTADDRREQ); 778 } 779 if ((atomic && resid > so->so_snd.ssb_hiwat) || 780 clen > so->so_snd.ssb_hiwat) { 781 gotoerr(EMSGSIZE); 782 } 783 space = ssb_space(&so->so_snd); 784 if (flags & MSG_OOB) 785 space += 1024; 786 if ((space < 0 || (size_t)space < resid + clen) && uio && 787 (atomic || space < so->so_snd.ssb_lowat || space < clen)) { 788 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 789 gotoerr(EWOULDBLOCK); 790 ssb_unlock(&so->so_snd); 791 error = ssb_wait(&so->so_snd); 792 if (error) 793 goto out; 794 goto restart; 795 } 796 mp = ⊤ 797 space -= clen; 798 do { 799 if (uio == NULL) { 800 /* 801 * Data is prepackaged in "top". 802 */ 803 resid = 0; 804 if (flags & MSG_EOR) 805 top->m_flags |= M_EOR; 806 } else do { 807 if (resid > INT_MAX) 808 resid = INT_MAX; 809 m = m_getl((int)resid, MB_WAIT, MT_DATA, 810 top == NULL ? M_PKTHDR : 0, &mlen); 811 if (top == NULL) { 812 m->m_pkthdr.len = 0; 813 m->m_pkthdr.rcvif = NULL; 814 } 815 len = imin((int)szmin(mlen, resid), space); 816 if (resid < MINCLSIZE) { 817 /* 818 * For datagram protocols, leave room 819 * for protocol headers in first mbuf. 820 */ 821 if (atomic && top == NULL && len < mlen) 822 MH_ALIGN(m, len); 823 } 824 space -= len; 825 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 826 resid = uio->uio_resid; 827 m->m_len = len; 828 *mp = m; 829 top->m_pkthdr.len += len; 830 if (error) 831 goto release; 832 mp = &m->m_next; 833 if (resid == 0) { 834 if (flags & MSG_EOR) 835 top->m_flags |= M_EOR; 836 break; 837 } 838 } while (space > 0 && atomic); 839 if (dontroute) 840 so->so_options |= SO_DONTROUTE; 841 if (flags & MSG_OOB) { 842 pru_flags = PRUS_OOB; 843 } else if ((flags & MSG_EOF) && 844 (so->so_proto->pr_flags & PR_IMPLOPCL) && 845 (resid == 0)) { 846 /* 847 * If the user set MSG_EOF, the protocol 848 * understands this flag and nothing left to 849 * send then use PRU_SEND_EOF instead of PRU_SEND. 850 */ 851 pru_flags = PRUS_EOF; 852 } else if (resid > 0 && space > 0) { 853 /* If there is more to send, set PRUS_MORETOCOME */ 854 pru_flags = PRUS_MORETOCOME; 855 } else { 856 pru_flags = 0; 857 } 858 /* 859 * XXX all the SS_CANTSENDMORE checks previously 860 * done could be out of date. We could have recieved 861 * a reset packet in an interrupt or maybe we slept 862 * while doing page faults in uiomove() etc. We could 863 * probably recheck again inside the splnet() protection 864 * here, but there are probably other places that this 865 * also happens. We must rethink this. 866 */ 867 error = so_pru_send(so, pru_flags, top, addr, control, td); 868 if (dontroute) 869 so->so_options &= ~SO_DONTROUTE; 870 clen = 0; 871 control = NULL; 872 top = NULL; 873 mp = ⊤ 874 if (error) 875 goto release; 876 } while (resid && space > 0); 877 } while (resid); 878 879 release: 880 ssb_unlock(&so->so_snd); 881 out: 882 if (top) 883 m_freem(top); 884 if (control) 885 m_freem(control); 886 return (error); 887 } 888 889 #ifdef INET 890 /* 891 * A specialization of sosend() for UDP based on protocol-specific knowledge: 892 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that 893 * sosendallatonce() returns true, 894 * the "atomic" variable is true, 895 * and sosendudp() blocks until space is available for the entire send. 896 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or 897 * PR_IMPLOPCL flags set. 898 * UDP has no out-of-band data. 899 * UDP has no control data. 900 * UDP does not support MSG_EOR. 901 */ 902 int 903 sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, 904 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 905 { 906 size_t resid; 907 int error, pru_flags = 0; 908 int space; 909 910 if (td->td_lwp != NULL) 911 td->td_lwp->lwp_ru.ru_msgsnd++; 912 if (control) 913 m_freem(control); 914 915 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); 916 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; 917 918 restart: 919 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 920 if (error) 921 goto out; 922 923 if (so->so_state & SS_CANTSENDMORE) 924 gotoerr(EPIPE); 925 if (so->so_error) { 926 error = so->so_error; 927 so->so_error = 0; 928 goto release; 929 } 930 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) 931 gotoerr(EDESTADDRREQ); 932 if (resid > so->so_snd.ssb_hiwat) 933 gotoerr(EMSGSIZE); 934 space = ssb_space(&so->so_snd); 935 if (uio && (space < 0 || (size_t)space < resid)) { 936 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 937 gotoerr(EWOULDBLOCK); 938 ssb_unlock(&so->so_snd); 939 error = ssb_wait(&so->so_snd); 940 if (error) 941 goto out; 942 goto restart; 943 } 944 945 if (uio) { 946 int hdrlen = max_hdr; 947 948 /* 949 * We try to optimize out the additional mbuf 950 * allocations in M_PREPEND() on output path, e.g. 951 * - udp_output(), when it tries to prepend protocol 952 * headers. 953 * - Link layer output function, when it tries to 954 * prepend link layer header. 955 * 956 * This probably will not benefit any data that will 957 * be fragmented, so this optimization is only performed 958 * when the size of data and max size of protocol+link 959 * headers fit into one mbuf cluster. 960 */ 961 if (uio->uio_resid > MCLBYTES - hdrlen || 962 !udp_sosend_prepend) { 963 top = m_uiomove(uio); 964 if (top == NULL) 965 goto release; 966 } else { 967 int nsize; 968 969 top = m_getl(uio->uio_resid + hdrlen, MB_WAIT, 970 MT_DATA, M_PKTHDR, &nsize); 971 KASSERT(nsize >= uio->uio_resid + hdrlen, 972 ("sosendudp invalid nsize %d, " 973 "resid %zu, hdrlen %d", 974 nsize, uio->uio_resid, hdrlen)); 975 976 top->m_len = uio->uio_resid; 977 top->m_pkthdr.len = uio->uio_resid; 978 top->m_data += hdrlen; 979 980 error = uiomove(mtod(top, caddr_t), top->m_len, uio); 981 if (error) 982 goto out; 983 } 984 } 985 986 if (flags & MSG_DONTROUTE) 987 pru_flags |= PRUS_DONTROUTE; 988 989 if (udp_sosend_async && (flags & MSG_SYNC) == 0) { 990 so_pru_send_async(so, pru_flags, top, addr, NULL, td); 991 error = 0; 992 } else { 993 error = so_pru_send(so, pru_flags, top, addr, NULL, td); 994 } 995 top = NULL; /* sent or freed in lower layer */ 996 997 release: 998 ssb_unlock(&so->so_snd); 999 out: 1000 if (top) 1001 m_freem(top); 1002 return (error); 1003 } 1004 1005 int 1006 sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio, 1007 struct mbuf *top, struct mbuf *control, int flags, 1008 struct thread *td) 1009 { 1010 struct mbuf **mp; 1011 struct mbuf *m; 1012 size_t resid; 1013 int space, len; 1014 int error, mlen; 1015 int allatonce; 1016 int pru_flags; 1017 1018 if (uio) { 1019 KKASSERT(top == NULL); 1020 allatonce = 0; 1021 resid = uio->uio_resid; 1022 } else { 1023 allatonce = 1; 1024 resid = (size_t)top->m_pkthdr.len; 1025 #ifdef INVARIANTS 1026 len = 0; 1027 for (m = top; m; m = m->m_next) 1028 len += m->m_len; 1029 KKASSERT(top->m_pkthdr.len == len); 1030 #endif 1031 } 1032 1033 /* 1034 * WARNING! resid is unsigned, space and len are signed. space 1035 * can wind up negative if the sockbuf is overcommitted. 1036 * 1037 * Also check to make sure that MSG_EOR isn't used on TCP 1038 */ 1039 if (flags & MSG_EOR) { 1040 error = EINVAL; 1041 goto out; 1042 } 1043 1044 if (control) { 1045 /* TCP doesn't do control messages (rights, creds, etc) */ 1046 if (control->m_len) { 1047 error = EINVAL; 1048 goto out; 1049 } 1050 m_freem(control); /* empty control, just free it */ 1051 control = NULL; 1052 } 1053 1054 if (td->td_lwp != NULL) 1055 td->td_lwp->lwp_ru.ru_msgsnd++; 1056 1057 #define gotoerr(errcode) { error = errcode; goto release; } 1058 1059 restart: 1060 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 1061 if (error) 1062 goto out; 1063 1064 do { 1065 if (so->so_state & SS_CANTSENDMORE) 1066 gotoerr(EPIPE); 1067 if (so->so_error) { 1068 error = so->so_error; 1069 so->so_error = 0; 1070 goto release; 1071 } 1072 if ((so->so_state & SS_ISCONNECTED) == 0 && 1073 (so->so_state & SS_ISCONFIRMING) == 0) 1074 gotoerr(ENOTCONN); 1075 if (allatonce && resid > so->so_snd.ssb_hiwat) 1076 gotoerr(EMSGSIZE); 1077 1078 space = ssb_space_prealloc(&so->so_snd); 1079 if (flags & MSG_OOB) 1080 space += 1024; 1081 if ((space < 0 || (size_t)space < resid) && !allatonce && 1082 space < so->so_snd.ssb_lowat) { 1083 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 1084 gotoerr(EWOULDBLOCK); 1085 ssb_unlock(&so->so_snd); 1086 error = ssb_wait(&so->so_snd); 1087 if (error) 1088 goto out; 1089 goto restart; 1090 } 1091 mp = ⊤ 1092 do { 1093 int cnt = 0, async = 0; 1094 1095 if (uio == NULL) { 1096 /* 1097 * Data is prepackaged in "top". 1098 */ 1099 resid = 0; 1100 } else do { 1101 if (resid > INT_MAX) 1102 resid = INT_MAX; 1103 m = m_getl((int)resid, MB_WAIT, MT_DATA, 1104 top == NULL ? M_PKTHDR : 0, &mlen); 1105 if (top == NULL) { 1106 m->m_pkthdr.len = 0; 1107 m->m_pkthdr.rcvif = NULL; 1108 } 1109 len = imin((int)szmin(mlen, resid), space); 1110 space -= len; 1111 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 1112 resid = uio->uio_resid; 1113 m->m_len = len; 1114 *mp = m; 1115 top->m_pkthdr.len += len; 1116 if (error) 1117 goto release; 1118 mp = &m->m_next; 1119 if (resid == 0) 1120 break; 1121 ++cnt; 1122 } while (space > 0 && cnt < tcp_sosend_agglim); 1123 1124 if (tcp_sosend_async) 1125 async = 1; 1126 1127 if (flags & MSG_OOB) { 1128 pru_flags = PRUS_OOB; 1129 async = 0; 1130 } else if ((flags & MSG_EOF) && resid == 0) { 1131 pru_flags = PRUS_EOF; 1132 } else if (resid > 0 && space > 0) { 1133 /* If there is more to send, set PRUS_MORETOCOME */ 1134 pru_flags = PRUS_MORETOCOME; 1135 async = 1; 1136 } else { 1137 pru_flags = 0; 1138 } 1139 1140 if (flags & MSG_SYNC) 1141 async = 0; 1142 1143 /* 1144 * XXX all the SS_CANTSENDMORE checks previously 1145 * done could be out of date. We could have recieved 1146 * a reset packet in an interrupt or maybe we slept 1147 * while doing page faults in uiomove() etc. We could 1148 * probably recheck again inside the splnet() protection 1149 * here, but there are probably other places that this 1150 * also happens. We must rethink this. 1151 */ 1152 for (m = top; m; m = m->m_next) 1153 ssb_preallocstream(&so->so_snd, m); 1154 if (!async) { 1155 error = so_pru_send(so, pru_flags, top, 1156 NULL, NULL, td); 1157 } else { 1158 so_pru_send_async(so, pru_flags, top, 1159 NULL, NULL, td); 1160 error = 0; 1161 } 1162 1163 top = NULL; 1164 mp = ⊤ 1165 if (error) 1166 goto release; 1167 } while (resid && space > 0); 1168 } while (resid); 1169 1170 release: 1171 ssb_unlock(&so->so_snd); 1172 out: 1173 if (top) 1174 m_freem(top); 1175 if (control) 1176 m_freem(control); 1177 return (error); 1178 } 1179 #endif 1180 1181 /* 1182 * Implement receive operations on a socket. 1183 * 1184 * We depend on the way that records are added to the signalsockbuf 1185 * by sbappend*. In particular, each record (mbufs linked through m_next) 1186 * must begin with an address if the protocol so specifies, 1187 * followed by an optional mbuf or mbufs containing ancillary data, 1188 * and then zero or more mbufs of data. 1189 * 1190 * Although the signalsockbuf is locked, new data may still be appended. 1191 * A token inside the ssb_lock deals with MP issues and still allows 1192 * the network to access the socket if we block in a uio. 1193 * 1194 * The caller may receive the data as a single mbuf chain by supplying 1195 * an mbuf **mp0 for use in returning the chain. The uio is then used 1196 * only for the count in uio_resid. 1197 */ 1198 int 1199 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 1200 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 1201 { 1202 struct mbuf *m, *n; 1203 struct mbuf *free_chain = NULL; 1204 int flags, len, error, offset; 1205 struct protosw *pr = so->so_proto; 1206 int moff, type = 0; 1207 size_t resid, orig_resid; 1208 1209 if (uio) 1210 resid = uio->uio_resid; 1211 else 1212 resid = (size_t)(sio->sb_climit - sio->sb_cc); 1213 orig_resid = resid; 1214 1215 if (psa) 1216 *psa = NULL; 1217 if (controlp) 1218 *controlp = NULL; 1219 if (flagsp) 1220 flags = *flagsp &~ MSG_EOR; 1221 else 1222 flags = 0; 1223 if (flags & MSG_OOB) { 1224 m = m_get(MB_WAIT, MT_DATA); 1225 if (m == NULL) 1226 return (ENOBUFS); 1227 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 1228 if (error) 1229 goto bad; 1230 if (sio) { 1231 do { 1232 sbappend(sio, m); 1233 KKASSERT(resid >= (size_t)m->m_len); 1234 resid -= (size_t)m->m_len; 1235 } while (resid > 0 && m); 1236 } else { 1237 do { 1238 uio->uio_resid = resid; 1239 error = uiomove(mtod(m, caddr_t), 1240 (int)szmin(resid, m->m_len), 1241 uio); 1242 resid = uio->uio_resid; 1243 m = m_free(m); 1244 } while (uio->uio_resid && error == 0 && m); 1245 } 1246 bad: 1247 if (m) 1248 m_freem(m); 1249 return (error); 1250 } 1251 if ((so->so_state & SS_ISCONFIRMING) && resid) 1252 so_pru_rcvd(so, 0); 1253 1254 /* 1255 * The token interlocks against the protocol thread while 1256 * ssb_lock is a blocking lock against other userland entities. 1257 */ 1258 lwkt_gettoken(&so->so_rcv.ssb_token); 1259 restart: 1260 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 1261 if (error) 1262 goto done; 1263 1264 m = so->so_rcv.ssb_mb; 1265 /* 1266 * If we have less data than requested, block awaiting more 1267 * (subject to any timeout) if: 1268 * 1. the current count is less than the low water mark, or 1269 * 2. MSG_WAITALL is set, and it is possible to do the entire 1270 * receive operation at once if we block (resid <= hiwat). 1271 * 3. MSG_DONTWAIT is not set 1272 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1273 * we have to do the receive in sections, and thus risk returning 1274 * a short count if a timeout or signal occurs after we start. 1275 */ 1276 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1277 (size_t)so->so_rcv.ssb_cc < resid) && 1278 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 1279 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) && 1280 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 1281 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 1282 if (so->so_error) { 1283 if (m) 1284 goto dontblock; 1285 error = so->so_error; 1286 if ((flags & MSG_PEEK) == 0) 1287 so->so_error = 0; 1288 goto release; 1289 } 1290 if (so->so_state & SS_CANTRCVMORE) { 1291 if (m) 1292 goto dontblock; 1293 else 1294 goto release; 1295 } 1296 for (; m; m = m->m_next) { 1297 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1298 m = so->so_rcv.ssb_mb; 1299 goto dontblock; 1300 } 1301 } 1302 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1303 (pr->pr_flags & PR_CONNREQUIRED)) { 1304 error = ENOTCONN; 1305 goto release; 1306 } 1307 if (resid == 0) 1308 goto release; 1309 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 1310 error = EWOULDBLOCK; 1311 goto release; 1312 } 1313 ssb_unlock(&so->so_rcv); 1314 error = ssb_wait(&so->so_rcv); 1315 if (error) 1316 goto done; 1317 goto restart; 1318 } 1319 dontblock: 1320 if (uio && uio->uio_td && uio->uio_td->td_proc) 1321 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 1322 1323 /* 1324 * note: m should be == sb_mb here. Cache the next record while 1325 * cleaning up. Note that calling m_free*() will break out critical 1326 * section. 1327 */ 1328 KKASSERT(m == so->so_rcv.ssb_mb); 1329 1330 /* 1331 * Skip any address mbufs prepending the record. 1332 */ 1333 if (pr->pr_flags & PR_ADDR) { 1334 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 1335 orig_resid = 0; 1336 if (psa) 1337 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1338 if (flags & MSG_PEEK) 1339 m = m->m_next; 1340 else 1341 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1342 } 1343 1344 /* 1345 * Skip any control mbufs prepending the record. 1346 */ 1347 #ifdef SCTP 1348 if (pr->pr_flags & PR_ADDR_OPT) { 1349 /* 1350 * For SCTP we may be getting a 1351 * whole message OR a partial delivery. 1352 */ 1353 if (m && m->m_type == MT_SONAME) { 1354 orig_resid = 0; 1355 if (psa) 1356 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1357 if (flags & MSG_PEEK) 1358 m = m->m_next; 1359 else 1360 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1361 } 1362 } 1363 #endif /* SCTP */ 1364 while (m && m->m_type == MT_CONTROL && error == 0) { 1365 if (flags & MSG_PEEK) { 1366 if (controlp) 1367 *controlp = m_copy(m, 0, m->m_len); 1368 m = m->m_next; /* XXX race */ 1369 } else { 1370 if (controlp) { 1371 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1372 if (pr->pr_domain->dom_externalize && 1373 mtod(m, struct cmsghdr *)->cmsg_type == 1374 SCM_RIGHTS) 1375 error = (*pr->pr_domain->dom_externalize)(m); 1376 *controlp = m; 1377 m = n; 1378 } else { 1379 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1380 } 1381 } 1382 if (controlp && *controlp) { 1383 orig_resid = 0; 1384 controlp = &(*controlp)->m_next; 1385 } 1386 } 1387 1388 /* 1389 * flag OOB data. 1390 */ 1391 if (m) { 1392 type = m->m_type; 1393 if (type == MT_OOBDATA) 1394 flags |= MSG_OOB; 1395 } 1396 1397 /* 1398 * Copy to the UIO or mbuf return chain (*mp). 1399 */ 1400 moff = 0; 1401 offset = 0; 1402 while (m && resid > 0 && error == 0) { 1403 if (m->m_type == MT_OOBDATA) { 1404 if (type != MT_OOBDATA) 1405 break; 1406 } else if (type == MT_OOBDATA) 1407 break; 1408 else 1409 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1410 ("receive 3")); 1411 soclrstate(so, SS_RCVATMARK); 1412 len = (resid > INT_MAX) ? INT_MAX : resid; 1413 if (so->so_oobmark && len > so->so_oobmark - offset) 1414 len = so->so_oobmark - offset; 1415 if (len > m->m_len - moff) 1416 len = m->m_len - moff; 1417 1418 /* 1419 * Copy out to the UIO or pass the mbufs back to the SIO. 1420 * The SIO is dealt with when we eat the mbuf, but deal 1421 * with the resid here either way. 1422 */ 1423 if (uio) { 1424 uio->uio_resid = resid; 1425 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1426 resid = uio->uio_resid; 1427 if (error) 1428 goto release; 1429 } else { 1430 resid -= (size_t)len; 1431 } 1432 1433 /* 1434 * Eat the entire mbuf or just a piece of it 1435 */ 1436 if (len == m->m_len - moff) { 1437 if (m->m_flags & M_EOR) 1438 flags |= MSG_EOR; 1439 #ifdef SCTP 1440 if (m->m_flags & M_NOTIFICATION) 1441 flags |= MSG_NOTIFICATION; 1442 #endif /* SCTP */ 1443 if (flags & MSG_PEEK) { 1444 m = m->m_next; 1445 moff = 0; 1446 } else { 1447 if (sio) { 1448 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1449 sbappend(sio, m); 1450 m = n; 1451 } else { 1452 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1453 } 1454 } 1455 } else { 1456 if (flags & MSG_PEEK) { 1457 moff += len; 1458 } else { 1459 if (sio) { 1460 n = m_copym(m, 0, len, MB_WAIT); 1461 if (n) 1462 sbappend(sio, n); 1463 } 1464 m->m_data += len; 1465 m->m_len -= len; 1466 so->so_rcv.ssb_cc -= len; 1467 } 1468 } 1469 if (so->so_oobmark) { 1470 if ((flags & MSG_PEEK) == 0) { 1471 so->so_oobmark -= len; 1472 if (so->so_oobmark == 0) { 1473 sosetstate(so, SS_RCVATMARK); 1474 break; 1475 } 1476 } else { 1477 offset += len; 1478 if (offset == so->so_oobmark) 1479 break; 1480 } 1481 } 1482 if (flags & MSG_EOR) 1483 break; 1484 /* 1485 * If the MSG_WAITALL flag is set (for non-atomic socket), 1486 * we must not quit until resid == 0 or an error 1487 * termination. If a signal/timeout occurs, return 1488 * with a short count but without error. 1489 * Keep signalsockbuf locked against other readers. 1490 */ 1491 while ((flags & MSG_WAITALL) && m == NULL && 1492 resid > 0 && !sosendallatonce(so) && 1493 so->so_rcv.ssb_mb == NULL) { 1494 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1495 break; 1496 /* 1497 * The window might have closed to zero, make 1498 * sure we send an ack now that we've drained 1499 * the buffer or we might end up blocking until 1500 * the idle takes over (5 seconds). 1501 */ 1502 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1503 so_pru_rcvd(so, flags); 1504 error = ssb_wait(&so->so_rcv); 1505 if (error) { 1506 ssb_unlock(&so->so_rcv); 1507 error = 0; 1508 goto done; 1509 } 1510 m = so->so_rcv.ssb_mb; 1511 } 1512 } 1513 1514 /* 1515 * If an atomic read was requested but unread data still remains 1516 * in the record, set MSG_TRUNC. 1517 */ 1518 if (m && pr->pr_flags & PR_ATOMIC) 1519 flags |= MSG_TRUNC; 1520 1521 /* 1522 * Cleanup. If an atomic read was requested drop any unread data. 1523 */ 1524 if ((flags & MSG_PEEK) == 0) { 1525 if (m && (pr->pr_flags & PR_ATOMIC)) 1526 sbdroprecord(&so->so_rcv.sb); 1527 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1528 so_pru_rcvd(so, flags); 1529 } 1530 1531 if (orig_resid == resid && orig_resid && 1532 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1533 ssb_unlock(&so->so_rcv); 1534 goto restart; 1535 } 1536 1537 if (flagsp) 1538 *flagsp |= flags; 1539 release: 1540 ssb_unlock(&so->so_rcv); 1541 done: 1542 lwkt_reltoken(&so->so_rcv.ssb_token); 1543 if (free_chain) 1544 m_freem(free_chain); 1545 return (error); 1546 } 1547 1548 int 1549 sorecvtcp(struct socket *so, struct sockaddr **psa, struct uio *uio, 1550 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 1551 { 1552 struct mbuf *m, *n; 1553 struct mbuf *free_chain = NULL; 1554 int flags, len, error, offset; 1555 struct protosw *pr = so->so_proto; 1556 int moff; 1557 size_t resid, orig_resid; 1558 1559 if (uio) 1560 resid = uio->uio_resid; 1561 else 1562 resid = (size_t)(sio->sb_climit - sio->sb_cc); 1563 orig_resid = resid; 1564 1565 if (psa) 1566 *psa = NULL; 1567 if (controlp) 1568 *controlp = NULL; 1569 if (flagsp) 1570 flags = *flagsp &~ MSG_EOR; 1571 else 1572 flags = 0; 1573 if (flags & MSG_OOB) { 1574 m = m_get(MB_WAIT, MT_DATA); 1575 if (m == NULL) 1576 return (ENOBUFS); 1577 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 1578 if (error) 1579 goto bad; 1580 if (sio) { 1581 do { 1582 sbappend(sio, m); 1583 KKASSERT(resid >= (size_t)m->m_len); 1584 resid -= (size_t)m->m_len; 1585 } while (resid > 0 && m); 1586 } else { 1587 do { 1588 uio->uio_resid = resid; 1589 error = uiomove(mtod(m, caddr_t), 1590 (int)szmin(resid, m->m_len), 1591 uio); 1592 resid = uio->uio_resid; 1593 m = m_free(m); 1594 } while (uio->uio_resid && error == 0 && m); 1595 } 1596 bad: 1597 if (m) 1598 m_freem(m); 1599 return (error); 1600 } 1601 1602 /* 1603 * The token interlocks against the protocol thread while 1604 * ssb_lock is a blocking lock against other userland entities. 1605 */ 1606 lwkt_gettoken(&so->so_rcv.ssb_token); 1607 restart: 1608 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 1609 if (error) 1610 goto done; 1611 1612 m = so->so_rcv.ssb_mb; 1613 /* 1614 * If we have less data than requested, block awaiting more 1615 * (subject to any timeout) if: 1616 * 1. the current count is less than the low water mark, or 1617 * 2. MSG_WAITALL is set, and it is possible to do the entire 1618 * receive operation at once if we block (resid <= hiwat). 1619 * 3. MSG_DONTWAIT is not set 1620 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1621 * we have to do the receive in sections, and thus risk returning 1622 * a short count if a timeout or signal occurs after we start. 1623 */ 1624 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1625 (size_t)so->so_rcv.ssb_cc < resid) && 1626 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 1627 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)))) { 1628 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 1629 if (so->so_error) { 1630 if (m) 1631 goto dontblock; 1632 error = so->so_error; 1633 if ((flags & MSG_PEEK) == 0) 1634 so->so_error = 0; 1635 goto release; 1636 } 1637 if (so->so_state & SS_CANTRCVMORE) { 1638 if (m) 1639 goto dontblock; 1640 else 1641 goto release; 1642 } 1643 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1644 (pr->pr_flags & PR_CONNREQUIRED)) { 1645 error = ENOTCONN; 1646 goto release; 1647 } 1648 if (resid == 0) 1649 goto release; 1650 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 1651 error = EWOULDBLOCK; 1652 goto release; 1653 } 1654 ssb_unlock(&so->so_rcv); 1655 error = ssb_wait(&so->so_rcv); 1656 if (error) 1657 goto done; 1658 goto restart; 1659 } 1660 dontblock: 1661 if (uio && uio->uio_td && uio->uio_td->td_proc) 1662 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 1663 1664 /* 1665 * note: m should be == sb_mb here. Cache the next record while 1666 * cleaning up. Note that calling m_free*() will break out critical 1667 * section. 1668 */ 1669 KKASSERT(m == so->so_rcv.ssb_mb); 1670 1671 /* 1672 * Copy to the UIO or mbuf return chain (*mp). 1673 */ 1674 moff = 0; 1675 offset = 0; 1676 while (m && resid > 0 && error == 0) { 1677 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1678 ("receive 3")); 1679 1680 soclrstate(so, SS_RCVATMARK); 1681 len = (resid > INT_MAX) ? INT_MAX : resid; 1682 if (so->so_oobmark && len > so->so_oobmark - offset) 1683 len = so->so_oobmark - offset; 1684 if (len > m->m_len - moff) 1685 len = m->m_len - moff; 1686 1687 /* 1688 * Copy out to the UIO or pass the mbufs back to the SIO. 1689 * The SIO is dealt with when we eat the mbuf, but deal 1690 * with the resid here either way. 1691 */ 1692 if (uio) { 1693 uio->uio_resid = resid; 1694 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1695 resid = uio->uio_resid; 1696 if (error) 1697 goto release; 1698 } else { 1699 resid -= (size_t)len; 1700 } 1701 1702 /* 1703 * Eat the entire mbuf or just a piece of it 1704 */ 1705 if (len == m->m_len - moff) { 1706 if (flags & MSG_PEEK) { 1707 m = m->m_next; 1708 moff = 0; 1709 } else { 1710 if (sio) { 1711 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1712 sbappend(sio, m); 1713 m = n; 1714 } else { 1715 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1716 } 1717 } 1718 } else { 1719 if (flags & MSG_PEEK) { 1720 moff += len; 1721 } else { 1722 if (sio) { 1723 n = m_copym(m, 0, len, MB_WAIT); 1724 if (n) 1725 sbappend(sio, n); 1726 } 1727 m->m_data += len; 1728 m->m_len -= len; 1729 so->so_rcv.ssb_cc -= len; 1730 } 1731 } 1732 if (so->so_oobmark) { 1733 if ((flags & MSG_PEEK) == 0) { 1734 so->so_oobmark -= len; 1735 if (so->so_oobmark == 0) { 1736 sosetstate(so, SS_RCVATMARK); 1737 break; 1738 } 1739 } else { 1740 offset += len; 1741 if (offset == so->so_oobmark) 1742 break; 1743 } 1744 } 1745 /* 1746 * If the MSG_WAITALL flag is set (for non-atomic socket), 1747 * we must not quit until resid == 0 or an error 1748 * termination. If a signal/timeout occurs, return 1749 * with a short count but without error. 1750 * Keep signalsockbuf locked against other readers. 1751 */ 1752 while ((flags & MSG_WAITALL) && m == NULL && 1753 resid > 0 && !sosendallatonce(so) && 1754 so->so_rcv.ssb_mb == NULL) { 1755 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1756 break; 1757 /* 1758 * The window might have closed to zero, make 1759 * sure we send an ack now that we've drained 1760 * the buffer or we might end up blocking until 1761 * the idle takes over (5 seconds). 1762 */ 1763 if (so->so_pcb) 1764 so_pru_rcvd_async(so); 1765 error = ssb_wait(&so->so_rcv); 1766 if (error) { 1767 ssb_unlock(&so->so_rcv); 1768 error = 0; 1769 goto done; 1770 } 1771 m = so->so_rcv.ssb_mb; 1772 } 1773 } 1774 1775 /* 1776 * Cleanup. If an atomic read was requested drop any unread data. 1777 */ 1778 if ((flags & MSG_PEEK) == 0) { 1779 if (so->so_pcb) 1780 so_pru_rcvd_async(so); 1781 } 1782 1783 if (orig_resid == resid && orig_resid && 1784 (so->so_state & SS_CANTRCVMORE) == 0) { 1785 ssb_unlock(&so->so_rcv); 1786 goto restart; 1787 } 1788 1789 if (flagsp) 1790 *flagsp |= flags; 1791 release: 1792 ssb_unlock(&so->so_rcv); 1793 done: 1794 lwkt_reltoken(&so->so_rcv.ssb_token); 1795 if (free_chain) 1796 m_freem(free_chain); 1797 return (error); 1798 } 1799 1800 /* 1801 * Shut a socket down. Note that we do not get a frontend lock as we 1802 * want to be able to shut the socket down even if another thread is 1803 * blocked in a read(), thus waking it up. 1804 */ 1805 int 1806 soshutdown(struct socket *so, int how) 1807 { 1808 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1809 return (EINVAL); 1810 1811 if (how != SHUT_WR) { 1812 /*ssb_lock(&so->so_rcv, M_WAITOK);*/ 1813 sorflush(so); 1814 /*ssb_unlock(&so->so_rcv);*/ 1815 } 1816 if (how != SHUT_RD) 1817 return (so_pru_shutdown(so)); 1818 return (0); 1819 } 1820 1821 void 1822 sorflush(struct socket *so) 1823 { 1824 struct signalsockbuf *ssb = &so->so_rcv; 1825 struct protosw *pr = so->so_proto; 1826 struct signalsockbuf asb; 1827 1828 atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); 1829 1830 lwkt_gettoken(&ssb->ssb_token); 1831 socantrcvmore(so); 1832 asb = *ssb; 1833 1834 /* 1835 * Can't just blow up the ssb structure here 1836 */ 1837 bzero(&ssb->sb, sizeof(ssb->sb)); 1838 ssb->ssb_timeo = 0; 1839 ssb->ssb_lowat = 0; 1840 ssb->ssb_hiwat = 0; 1841 ssb->ssb_mbmax = 0; 1842 atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); 1843 1844 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) 1845 (*pr->pr_domain->dom_dispose)(asb.ssb_mb); 1846 ssb_release(&asb, so); 1847 1848 lwkt_reltoken(&ssb->ssb_token); 1849 } 1850 1851 #ifdef INET 1852 static int 1853 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) 1854 { 1855 struct accept_filter_arg *afap = NULL; 1856 struct accept_filter *afp; 1857 struct so_accf *af = so->so_accf; 1858 int error = 0; 1859 1860 /* do not set/remove accept filters on non listen sockets */ 1861 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1862 error = EINVAL; 1863 goto out; 1864 } 1865 1866 /* removing the filter */ 1867 if (sopt == NULL) { 1868 if (af != NULL) { 1869 if (af->so_accept_filter != NULL && 1870 af->so_accept_filter->accf_destroy != NULL) { 1871 af->so_accept_filter->accf_destroy(so); 1872 } 1873 if (af->so_accept_filter_str != NULL) { 1874 kfree(af->so_accept_filter_str, M_ACCF); 1875 } 1876 kfree(af, M_ACCF); 1877 so->so_accf = NULL; 1878 } 1879 so->so_options &= ~SO_ACCEPTFILTER; 1880 return (0); 1881 } 1882 /* adding a filter */ 1883 /* must remove previous filter first */ 1884 if (af != NULL) { 1885 error = EINVAL; 1886 goto out; 1887 } 1888 /* don't put large objects on the kernel stack */ 1889 afap = kmalloc(sizeof(*afap), M_TEMP, M_WAITOK); 1890 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1891 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1892 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1893 if (error) 1894 goto out; 1895 afp = accept_filt_get(afap->af_name); 1896 if (afp == NULL) { 1897 error = ENOENT; 1898 goto out; 1899 } 1900 af = kmalloc(sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1901 if (afp->accf_create != NULL) { 1902 if (afap->af_name[0] != '\0') { 1903 int len = strlen(afap->af_name) + 1; 1904 1905 af->so_accept_filter_str = kmalloc(len, M_ACCF, 1906 M_WAITOK); 1907 strcpy(af->so_accept_filter_str, afap->af_name); 1908 } 1909 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1910 if (af->so_accept_filter_arg == NULL) { 1911 kfree(af->so_accept_filter_str, M_ACCF); 1912 kfree(af, M_ACCF); 1913 so->so_accf = NULL; 1914 error = EINVAL; 1915 goto out; 1916 } 1917 } 1918 af->so_accept_filter = afp; 1919 so->so_accf = af; 1920 so->so_options |= SO_ACCEPTFILTER; 1921 out: 1922 if (afap != NULL) 1923 kfree(afap, M_TEMP); 1924 return (error); 1925 } 1926 #endif /* INET */ 1927 1928 /* 1929 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1930 * an additional variant to handle the case where the option value needs 1931 * to be some kind of integer, but not a specific size. 1932 * In addition to their use here, these functions are also called by the 1933 * protocol-level pr_ctloutput() routines. 1934 */ 1935 int 1936 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1937 { 1938 return soopt_to_kbuf(sopt, buf, len, minlen); 1939 } 1940 1941 int 1942 soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1943 { 1944 size_t valsize; 1945 1946 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1947 KKASSERT(kva_p(buf)); 1948 1949 /* 1950 * If the user gives us more than we wanted, we ignore it, 1951 * but if we don't get the minimum length the caller 1952 * wants, we return EINVAL. On success, sopt->sopt_valsize 1953 * is set to however much we actually retrieved. 1954 */ 1955 if ((valsize = sopt->sopt_valsize) < minlen) 1956 return EINVAL; 1957 if (valsize > len) 1958 sopt->sopt_valsize = valsize = len; 1959 1960 bcopy(sopt->sopt_val, buf, valsize); 1961 return 0; 1962 } 1963 1964 1965 int 1966 sosetopt(struct socket *so, struct sockopt *sopt) 1967 { 1968 int error, optval; 1969 struct linger l; 1970 struct timeval tv; 1971 u_long val; 1972 struct signalsockbuf *sotmp; 1973 1974 error = 0; 1975 sopt->sopt_dir = SOPT_SET; 1976 if (sopt->sopt_level != SOL_SOCKET) { 1977 if (so->so_proto && so->so_proto->pr_ctloutput) { 1978 return (so_pr_ctloutput(so, sopt)); 1979 } 1980 error = ENOPROTOOPT; 1981 } else { 1982 switch (sopt->sopt_name) { 1983 #ifdef INET 1984 case SO_ACCEPTFILTER: 1985 error = do_setopt_accept_filter(so, sopt); 1986 if (error) 1987 goto bad; 1988 break; 1989 #endif /* INET */ 1990 case SO_LINGER: 1991 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1992 if (error) 1993 goto bad; 1994 1995 so->so_linger = l.l_linger; 1996 if (l.l_onoff) 1997 so->so_options |= SO_LINGER; 1998 else 1999 so->so_options &= ~SO_LINGER; 2000 break; 2001 2002 case SO_DEBUG: 2003 case SO_KEEPALIVE: 2004 case SO_DONTROUTE: 2005 case SO_USELOOPBACK: 2006 case SO_BROADCAST: 2007 case SO_REUSEADDR: 2008 case SO_REUSEPORT: 2009 case SO_OOBINLINE: 2010 case SO_TIMESTAMP: 2011 case SO_NOSIGPIPE: 2012 error = sooptcopyin(sopt, &optval, sizeof optval, 2013 sizeof optval); 2014 if (error) 2015 goto bad; 2016 if (optval) 2017 so->so_options |= sopt->sopt_name; 2018 else 2019 so->so_options &= ~sopt->sopt_name; 2020 break; 2021 2022 case SO_SNDBUF: 2023 case SO_RCVBUF: 2024 case SO_SNDLOWAT: 2025 case SO_RCVLOWAT: 2026 error = sooptcopyin(sopt, &optval, sizeof optval, 2027 sizeof optval); 2028 if (error) 2029 goto bad; 2030 2031 /* 2032 * Values < 1 make no sense for any of these 2033 * options, so disallow them. 2034 */ 2035 if (optval < 1) { 2036 error = EINVAL; 2037 goto bad; 2038 } 2039 2040 switch (sopt->sopt_name) { 2041 case SO_SNDBUF: 2042 case SO_RCVBUF: 2043 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? 2044 &so->so_snd : &so->so_rcv, (u_long)optval, 2045 so, 2046 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { 2047 error = ENOBUFS; 2048 goto bad; 2049 } 2050 sotmp = (sopt->sopt_name == SO_SNDBUF) ? 2051 &so->so_snd : &so->so_rcv; 2052 atomic_clear_int(&sotmp->ssb_flags, 2053 SSB_AUTOSIZE); 2054 break; 2055 2056 /* 2057 * Make sure the low-water is never greater than 2058 * the high-water. 2059 */ 2060 case SO_SNDLOWAT: 2061 so->so_snd.ssb_lowat = 2062 (optval > so->so_snd.ssb_hiwat) ? 2063 so->so_snd.ssb_hiwat : optval; 2064 atomic_clear_int(&so->so_snd.ssb_flags, 2065 SSB_AUTOLOWAT); 2066 break; 2067 case SO_RCVLOWAT: 2068 so->so_rcv.ssb_lowat = 2069 (optval > so->so_rcv.ssb_hiwat) ? 2070 so->so_rcv.ssb_hiwat : optval; 2071 atomic_clear_int(&so->so_rcv.ssb_flags, 2072 SSB_AUTOLOWAT); 2073 break; 2074 } 2075 break; 2076 2077 case SO_SNDTIMEO: 2078 case SO_RCVTIMEO: 2079 error = sooptcopyin(sopt, &tv, sizeof tv, 2080 sizeof tv); 2081 if (error) 2082 goto bad; 2083 2084 /* assert(hz > 0); */ 2085 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 2086 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 2087 error = EDOM; 2088 goto bad; 2089 } 2090 /* assert(tick > 0); */ 2091 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 2092 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; 2093 if (val > INT_MAX) { 2094 error = EDOM; 2095 goto bad; 2096 } 2097 if (val == 0 && tv.tv_usec != 0) 2098 val = 1; 2099 2100 switch (sopt->sopt_name) { 2101 case SO_SNDTIMEO: 2102 so->so_snd.ssb_timeo = val; 2103 break; 2104 case SO_RCVTIMEO: 2105 so->so_rcv.ssb_timeo = val; 2106 break; 2107 } 2108 break; 2109 default: 2110 error = ENOPROTOOPT; 2111 break; 2112 } 2113 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 2114 (void) so_pr_ctloutput(so, sopt); 2115 } 2116 } 2117 bad: 2118 return (error); 2119 } 2120 2121 /* Helper routine for getsockopt */ 2122 int 2123 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 2124 { 2125 soopt_from_kbuf(sopt, buf, len); 2126 return 0; 2127 } 2128 2129 void 2130 soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) 2131 { 2132 size_t valsize; 2133 2134 if (len == 0) { 2135 sopt->sopt_valsize = 0; 2136 return; 2137 } 2138 2139 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 2140 KKASSERT(kva_p(buf)); 2141 2142 /* 2143 * Documented get behavior is that we always return a value, 2144 * possibly truncated to fit in the user's buffer. 2145 * Traditional behavior is that we always tell the user 2146 * precisely how much we copied, rather than something useful 2147 * like the total amount we had available for her. 2148 * Note that this interface is not idempotent; the entire answer must 2149 * generated ahead of time. 2150 */ 2151 valsize = szmin(len, sopt->sopt_valsize); 2152 sopt->sopt_valsize = valsize; 2153 if (sopt->sopt_val != 0) { 2154 bcopy(buf, sopt->sopt_val, valsize); 2155 } 2156 } 2157 2158 int 2159 sogetopt(struct socket *so, struct sockopt *sopt) 2160 { 2161 int error, optval; 2162 long optval_l; 2163 struct linger l; 2164 struct timeval tv; 2165 #ifdef INET 2166 struct accept_filter_arg *afap; 2167 #endif 2168 2169 error = 0; 2170 sopt->sopt_dir = SOPT_GET; 2171 if (sopt->sopt_level != SOL_SOCKET) { 2172 if (so->so_proto && so->so_proto->pr_ctloutput) { 2173 return (so_pr_ctloutput(so, sopt)); 2174 } else 2175 return (ENOPROTOOPT); 2176 } else { 2177 switch (sopt->sopt_name) { 2178 #ifdef INET 2179 case SO_ACCEPTFILTER: 2180 if ((so->so_options & SO_ACCEPTCONN) == 0) 2181 return (EINVAL); 2182 afap = kmalloc(sizeof(*afap), M_TEMP, 2183 M_WAITOK | M_ZERO); 2184 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 2185 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 2186 if (so->so_accf->so_accept_filter_str != NULL) 2187 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 2188 } 2189 error = sooptcopyout(sopt, afap, sizeof(*afap)); 2190 kfree(afap, M_TEMP); 2191 break; 2192 #endif /* INET */ 2193 2194 case SO_LINGER: 2195 l.l_onoff = so->so_options & SO_LINGER; 2196 l.l_linger = so->so_linger; 2197 error = sooptcopyout(sopt, &l, sizeof l); 2198 break; 2199 2200 case SO_USELOOPBACK: 2201 case SO_DONTROUTE: 2202 case SO_DEBUG: 2203 case SO_KEEPALIVE: 2204 case SO_REUSEADDR: 2205 case SO_REUSEPORT: 2206 case SO_BROADCAST: 2207 case SO_OOBINLINE: 2208 case SO_TIMESTAMP: 2209 case SO_NOSIGPIPE: 2210 optval = so->so_options & sopt->sopt_name; 2211 integer: 2212 error = sooptcopyout(sopt, &optval, sizeof optval); 2213 break; 2214 2215 case SO_TYPE: 2216 optval = so->so_type; 2217 goto integer; 2218 2219 case SO_ERROR: 2220 optval = so->so_error; 2221 so->so_error = 0; 2222 goto integer; 2223 2224 case SO_SNDBUF: 2225 optval = so->so_snd.ssb_hiwat; 2226 goto integer; 2227 2228 case SO_RCVBUF: 2229 optval = so->so_rcv.ssb_hiwat; 2230 goto integer; 2231 2232 case SO_SNDLOWAT: 2233 optval = so->so_snd.ssb_lowat; 2234 goto integer; 2235 2236 case SO_RCVLOWAT: 2237 optval = so->so_rcv.ssb_lowat; 2238 goto integer; 2239 2240 case SO_SNDTIMEO: 2241 case SO_RCVTIMEO: 2242 optval = (sopt->sopt_name == SO_SNDTIMEO ? 2243 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); 2244 2245 tv.tv_sec = optval / hz; 2246 tv.tv_usec = (optval % hz) * ustick; 2247 error = sooptcopyout(sopt, &tv, sizeof tv); 2248 break; 2249 2250 case SO_SNDSPACE: 2251 optval_l = ssb_space(&so->so_snd); 2252 error = sooptcopyout(sopt, &optval_l, sizeof(optval_l)); 2253 break; 2254 2255 default: 2256 error = ENOPROTOOPT; 2257 break; 2258 } 2259 return (error); 2260 } 2261 } 2262 2263 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 2264 int 2265 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 2266 { 2267 struct mbuf *m, *m_prev; 2268 int sopt_size = sopt->sopt_valsize, msize; 2269 2270 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA, 2271 0, &msize); 2272 if (m == NULL) 2273 return (ENOBUFS); 2274 m->m_len = min(msize, sopt_size); 2275 sopt_size -= m->m_len; 2276 *mp = m; 2277 m_prev = m; 2278 2279 while (sopt_size > 0) { 2280 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, 2281 MT_DATA, 0, &msize); 2282 if (m == NULL) { 2283 m_freem(*mp); 2284 return (ENOBUFS); 2285 } 2286 m->m_len = min(msize, sopt_size); 2287 sopt_size -= m->m_len; 2288 m_prev->m_next = m; 2289 m_prev = m; 2290 } 2291 return (0); 2292 } 2293 2294 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 2295 int 2296 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 2297 { 2298 soopt_to_mbuf(sopt, m); 2299 return 0; 2300 } 2301 2302 void 2303 soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) 2304 { 2305 size_t valsize; 2306 void *val; 2307 2308 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 2309 KKASSERT(kva_p(m)); 2310 if (sopt->sopt_val == NULL) 2311 return; 2312 val = sopt->sopt_val; 2313 valsize = sopt->sopt_valsize; 2314 while (m != NULL && valsize >= m->m_len) { 2315 bcopy(val, mtod(m, char *), m->m_len); 2316 valsize -= m->m_len; 2317 val = (caddr_t)val + m->m_len; 2318 m = m->m_next; 2319 } 2320 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 2321 panic("ip6_sooptmcopyin"); 2322 } 2323 2324 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 2325 int 2326 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 2327 { 2328 return soopt_from_mbuf(sopt, m); 2329 } 2330 2331 int 2332 soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) 2333 { 2334 struct mbuf *m0 = m; 2335 size_t valsize = 0; 2336 size_t maxsize; 2337 void *val; 2338 2339 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 2340 KKASSERT(kva_p(m)); 2341 if (sopt->sopt_val == NULL) 2342 return 0; 2343 val = sopt->sopt_val; 2344 maxsize = sopt->sopt_valsize; 2345 while (m != NULL && maxsize >= m->m_len) { 2346 bcopy(mtod(m, char *), val, m->m_len); 2347 maxsize -= m->m_len; 2348 val = (caddr_t)val + m->m_len; 2349 valsize += m->m_len; 2350 m = m->m_next; 2351 } 2352 if (m != NULL) { 2353 /* enough soopt buffer should be given from user-land */ 2354 m_freem(m0); 2355 return (EINVAL); 2356 } 2357 sopt->sopt_valsize = valsize; 2358 return 0; 2359 } 2360 2361 void 2362 sohasoutofband(struct socket *so) 2363 { 2364 if (so->so_sigio != NULL) 2365 pgsigio(so->so_sigio, SIGURG, 0); 2366 KNOTE(&so->so_rcv.ssb_kq.ki_note, NOTE_OOB); 2367 } 2368 2369 int 2370 sokqfilter(struct file *fp, struct knote *kn) 2371 { 2372 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2373 struct signalsockbuf *ssb; 2374 2375 switch (kn->kn_filter) { 2376 case EVFILT_READ: 2377 if (so->so_options & SO_ACCEPTCONN) 2378 kn->kn_fop = &solisten_filtops; 2379 else 2380 kn->kn_fop = &soread_filtops; 2381 ssb = &so->so_rcv; 2382 break; 2383 case EVFILT_WRITE: 2384 kn->kn_fop = &sowrite_filtops; 2385 ssb = &so->so_snd; 2386 break; 2387 case EVFILT_EXCEPT: 2388 kn->kn_fop = &soexcept_filtops; 2389 ssb = &so->so_rcv; 2390 break; 2391 default: 2392 return (EOPNOTSUPP); 2393 } 2394 2395 knote_insert(&ssb->ssb_kq.ki_note, kn); 2396 atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); 2397 return (0); 2398 } 2399 2400 static void 2401 filt_sordetach(struct knote *kn) 2402 { 2403 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2404 2405 knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); 2406 if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) 2407 atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); 2408 } 2409 2410 /*ARGSUSED*/ 2411 static int 2412 filt_soread(struct knote *kn, long hint) 2413 { 2414 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2415 2416 if (kn->kn_sfflags & NOTE_OOB) { 2417 if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { 2418 kn->kn_fflags |= NOTE_OOB; 2419 return (1); 2420 } 2421 return (0); 2422 } 2423 kn->kn_data = so->so_rcv.ssb_cc; 2424 2425 if (so->so_state & SS_CANTRCVMORE) { 2426 /* 2427 * Only set NODATA if all data has been exhausted. 2428 */ 2429 if (kn->kn_data == 0) 2430 kn->kn_flags |= EV_NODATA; 2431 kn->kn_flags |= EV_EOF; 2432 kn->kn_fflags = so->so_error; 2433 return (1); 2434 } 2435 if (so->so_error) /* temporary udp error */ 2436 return (1); 2437 if (kn->kn_sfflags & NOTE_LOWAT) 2438 return (kn->kn_data >= kn->kn_sdata); 2439 return ((kn->kn_data >= so->so_rcv.ssb_lowat) || 2440 !TAILQ_EMPTY(&so->so_comp)); 2441 } 2442 2443 static void 2444 filt_sowdetach(struct knote *kn) 2445 { 2446 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2447 2448 knote_remove(&so->so_snd.ssb_kq.ki_note, kn); 2449 if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) 2450 atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); 2451 } 2452 2453 /*ARGSUSED*/ 2454 static int 2455 filt_sowrite(struct knote *kn, long hint) 2456 { 2457 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2458 2459 kn->kn_data = ssb_space(&so->so_snd); 2460 if (so->so_state & SS_CANTSENDMORE) { 2461 kn->kn_flags |= (EV_EOF | EV_NODATA); 2462 kn->kn_fflags = so->so_error; 2463 return (1); 2464 } 2465 if (so->so_error) /* temporary udp error */ 2466 return (1); 2467 if (((so->so_state & SS_ISCONNECTED) == 0) && 2468 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2469 return (0); 2470 if (kn->kn_sfflags & NOTE_LOWAT) 2471 return (kn->kn_data >= kn->kn_sdata); 2472 return (kn->kn_data >= so->so_snd.ssb_lowat); 2473 } 2474 2475 /*ARGSUSED*/ 2476 static int 2477 filt_solisten(struct knote *kn, long hint) 2478 { 2479 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2480 2481 kn->kn_data = so->so_qlen; 2482 return (! TAILQ_EMPTY(&so->so_comp)); 2483 } 2484