1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ 68 * $DragonFly: src/sys/kern/uipc_socket.c,v 1.55 2008/09/02 16:17:52 dillon Exp $ 69 */ 70 71 #include "opt_inet.h" 72 #include "opt_sctp.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/fcntl.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/domain.h> 80 #include <sys/file.h> /* for struct knote */ 81 #include <sys/kernel.h> 82 #include <sys/malloc.h> 83 #include <sys/event.h> 84 #include <sys/proc.h> 85 #include <sys/protosw.h> 86 #include <sys/socket.h> 87 #include <sys/socketvar.h> 88 #include <sys/socketops.h> 89 #include <sys/resourcevar.h> 90 #include <sys/signalvar.h> 91 #include <sys/sysctl.h> 92 #include <sys/uio.h> 93 #include <sys/jail.h> 94 #include <vm/vm_zone.h> 95 #include <vm/pmap.h> 96 97 #include <sys/thread2.h> 98 #include <sys/socketvar2.h> 99 100 #include <machine/limits.h> 101 102 #ifdef INET 103 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 104 #endif /* INET */ 105 106 static void filt_sordetach(struct knote *kn); 107 static int filt_soread(struct knote *kn, long hint); 108 static void filt_sowdetach(struct knote *kn); 109 static int filt_sowrite(struct knote *kn, long hint); 110 static int filt_solisten(struct knote *kn, long hint); 111 112 static struct filterops solisten_filtops = 113 { FILTEROP_ISFD, NULL, filt_sordetach, filt_solisten }; 114 static struct filterops soread_filtops = 115 { FILTEROP_ISFD, NULL, filt_sordetach, filt_soread }; 116 static struct filterops sowrite_filtops = 117 { FILTEROP_ISFD, NULL, filt_sowdetach, filt_sowrite }; 118 static struct filterops soexcept_filtops = 119 { FILTEROP_ISFD, NULL, filt_sordetach, filt_soread }; 120 121 MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); 122 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 123 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 124 125 126 static int somaxconn = SOMAXCONN; 127 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 128 &somaxconn, 0, "Maximum pending socket connection queue size"); 129 130 /* 131 * Socket operation routines. 132 * These routines are called by the routines in 133 * sys_socket.c or from a system process, and 134 * implement the semantics of socket operations by 135 * switching out to the protocol specific routines. 136 */ 137 138 /* 139 * Get a socket structure, and initialize it. 140 * Note that it would probably be better to allocate socket 141 * and PCB at the same time, but I'm not convinced that all 142 * the protocols can be easily modified to do this. 143 */ 144 struct socket * 145 soalloc(int waitok) 146 { 147 struct socket *so; 148 unsigned waitmask; 149 150 waitmask = waitok ? M_WAITOK : M_NOWAIT; 151 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); 152 if (so) { 153 /* XXX race condition for reentrant kernel */ 154 TAILQ_INIT(&so->so_aiojobq); 155 TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); 156 TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); 157 lwkt_token_init(&so->so_rcv.ssb_token, 1, "rcvtok"); 158 lwkt_token_init(&so->so_snd.ssb_token, 1, "rcvtok"); 159 so->so_state = SS_NOFDREF; 160 so->so_refs = 1; 161 } 162 return so; 163 } 164 165 int 166 socreate(int dom, struct socket **aso, int type, 167 int proto, struct thread *td) 168 { 169 struct proc *p = td->td_proc; 170 struct protosw *prp; 171 struct socket *so; 172 struct pru_attach_info ai; 173 int error; 174 175 if (proto) 176 prp = pffindproto(dom, proto, type); 177 else 178 prp = pffindtype(dom, type); 179 180 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 181 return (EPROTONOSUPPORT); 182 183 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only && 184 prp->pr_domain->dom_family != PF_LOCAL && 185 prp->pr_domain->dom_family != PF_INET && 186 prp->pr_domain->dom_family != PF_INET6 && 187 prp->pr_domain->dom_family != PF_ROUTE) { 188 return (EPROTONOSUPPORT); 189 } 190 191 if (prp->pr_type != type) 192 return (EPROTOTYPE); 193 so = soalloc(p != 0); 194 if (so == NULL) 195 return (ENOBUFS); 196 197 /* 198 * Callers of socreate() presumably will connect up a descriptor 199 * and call soclose() if they cannot. This represents our so_refs 200 * (which should be 1) from soalloc(). 201 */ 202 soclrstate(so, SS_NOFDREF); 203 204 /* 205 * Set a default port for protocol processing. No action will occur 206 * on the socket on this port until an inpcb is attached to it and 207 * is able to match incoming packets, or until the socket becomes 208 * available to userland. 209 */ 210 so->so_port = cpu0_soport(so, NULL, NULL); 211 212 TAILQ_INIT(&so->so_incomp); 213 TAILQ_INIT(&so->so_comp); 214 so->so_type = type; 215 so->so_cred = crhold(p->p_ucred); 216 so->so_proto = prp; 217 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; 218 ai.p_ucred = p->p_ucred; 219 ai.fd_rdir = p->p_fd->fd_rdir; 220 221 /* 222 * Auto-sizing of socket buffers is managed by the protocols and 223 * the appropriate flags must be set in the pru_attach function. 224 */ 225 error = so_pru_attach(so, proto, &ai); 226 if (error) { 227 sosetstate(so, SS_NOFDREF); 228 sofree(so); /* from soalloc */ 229 return error; 230 } 231 232 /* 233 * NOTE: Returns referenced socket. 234 */ 235 *aso = so; 236 return (0); 237 } 238 239 int 240 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 241 { 242 int error; 243 244 error = so_pru_bind(so, nam, td); 245 return (error); 246 } 247 248 static void 249 sodealloc(struct socket *so) 250 { 251 if (so->so_rcv.ssb_hiwat) 252 (void)chgsbsize(so->so_cred->cr_uidinfo, 253 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); 254 if (so->so_snd.ssb_hiwat) 255 (void)chgsbsize(so->so_cred->cr_uidinfo, 256 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); 257 #ifdef INET 258 /* remove accept filter if present */ 259 if (so->so_accf != NULL) 260 do_setopt_accept_filter(so, NULL); 261 #endif /* INET */ 262 crfree(so->so_cred); 263 kfree(so, M_SOCKET); 264 } 265 266 int 267 solisten(struct socket *so, int backlog, struct thread *td) 268 { 269 int error; 270 #ifdef SCTP 271 short oldopt, oldqlimit; 272 #endif /* SCTP */ 273 274 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) 275 return (EINVAL); 276 277 #ifdef SCTP 278 oldopt = so->so_options; 279 oldqlimit = so->so_qlimit; 280 #endif /* SCTP */ 281 282 lwkt_gettoken(&so->so_rcv.ssb_token); 283 if (TAILQ_EMPTY(&so->so_comp)) 284 so->so_options |= SO_ACCEPTCONN; 285 lwkt_reltoken(&so->so_rcv.ssb_token); 286 if (backlog < 0 || backlog > somaxconn) 287 backlog = somaxconn; 288 so->so_qlimit = backlog; 289 /* SCTP needs to look at tweak both the inbound backlog parameter AND 290 * the so_options (UDP model both connect's and gets inbound 291 * connections .. implicitly). 292 */ 293 error = so_pru_listen(so, td); 294 if (error) { 295 #ifdef SCTP 296 /* Restore the params */ 297 so->so_options = oldopt; 298 so->so_qlimit = oldqlimit; 299 #endif /* SCTP */ 300 return (error); 301 } 302 return (0); 303 } 304 305 /* 306 * Destroy a disconnected socket. This routine is a NOP if entities 307 * still have a reference on the socket: 308 * 309 * so_pcb - The protocol stack still has a reference 310 * SS_NOFDREF - There is no longer a file pointer reference 311 */ 312 void 313 sofree(struct socket *so) 314 { 315 struct socket *head = so->so_head; 316 317 /* 318 * Arbitrage the last free. 319 */ 320 KKASSERT(so->so_refs > 0); 321 if (atomic_fetchadd_int(&so->so_refs, -1) != 1) 322 return; 323 324 KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); 325 326 /* 327 * We're done, clean up 328 */ 329 if (head != NULL) { 330 lwkt_gettoken(&head->so_rcv.ssb_token); 331 if (so->so_state & SS_INCOMP) { 332 TAILQ_REMOVE(&head->so_incomp, so, so_list); 333 head->so_incqlen--; 334 } else if (so->so_state & SS_COMP) { 335 /* 336 * We must not decommission a socket that's 337 * on the accept(2) queue. If we do, then 338 * accept(2) may hang after select(2) indicated 339 * that the listening socket was ready. 340 */ 341 lwkt_reltoken(&head->so_rcv.ssb_token); 342 return; 343 } else { 344 panic("sofree: not queued"); 345 } 346 soclrstate(so, SS_INCOMP); 347 so->so_head = NULL; 348 lwkt_reltoken(&head->so_rcv.ssb_token); 349 } 350 ssb_release(&so->so_snd, so); 351 sorflush(so); 352 sodealloc(so); 353 } 354 355 /* 356 * Close a socket on last file table reference removal. 357 * Initiate disconnect if connected. 358 * Free socket when disconnect complete. 359 */ 360 int 361 soclose(struct socket *so, int fflag) 362 { 363 int error = 0; 364 365 funsetown(so->so_sigio); 366 if (so->so_pcb == NULL) 367 goto discard; 368 if (so->so_state & SS_ISCONNECTED) { 369 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 370 error = sodisconnect(so); 371 if (error) 372 goto drop; 373 } 374 if (so->so_options & SO_LINGER) { 375 if ((so->so_state & SS_ISDISCONNECTING) && 376 (fflag & FNONBLOCK)) 377 goto drop; 378 while (so->so_state & SS_ISCONNECTED) { 379 error = tsleep(&so->so_timeo, PCATCH, 380 "soclos", so->so_linger * hz); 381 if (error) 382 break; 383 } 384 } 385 } 386 drop: 387 if (so->so_pcb) { 388 int error2; 389 390 error2 = so_pru_detach(so); 391 if (error == 0) 392 error = error2; 393 } 394 discard: 395 lwkt_gettoken(&so->so_rcv.ssb_token); 396 if (so->so_options & SO_ACCEPTCONN) { 397 struct socket *sp; 398 399 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 400 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 401 soclrstate(sp, SS_INCOMP); 402 sp->so_head = NULL; 403 so->so_incqlen--; 404 soaborta(sp); 405 } 406 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 407 TAILQ_REMOVE(&so->so_comp, sp, so_list); 408 soclrstate(sp, SS_COMP); 409 sp->so_head = NULL; 410 so->so_qlen--; 411 soaborta(sp); 412 } 413 } 414 lwkt_reltoken(&so->so_rcv.ssb_token); 415 if (so->so_state & SS_NOFDREF) 416 panic("soclose: NOFDREF"); 417 sosetstate(so, SS_NOFDREF); /* take ref */ 418 sofree(so); /* dispose of ref */ 419 return (error); 420 } 421 422 /* 423 * Abort and destroy a socket. Only one abort can be in progress 424 * at any given moment. 425 */ 426 void 427 soabort(struct socket *so) 428 { 429 soreference(so); 430 so_pru_abort(so); 431 } 432 433 void 434 soaborta(struct socket *so) 435 { 436 soreference(so); 437 so_pru_aborta(so); 438 } 439 440 void 441 soabort_oncpu(struct socket *so) 442 { 443 soreference(so); 444 so_pru_abort_oncpu(so); 445 } 446 447 int 448 soaccept(struct socket *so, struct sockaddr **nam) 449 { 450 int error; 451 452 if ((so->so_state & SS_NOFDREF) == 0) 453 panic("soaccept: !NOFDREF"); 454 soreference(so); /* create ref */ 455 soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ 456 error = so_pru_accept(so, nam); 457 return (error); 458 } 459 460 int 461 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 462 { 463 int error; 464 465 if (so->so_options & SO_ACCEPTCONN) 466 return (EOPNOTSUPP); 467 /* 468 * If protocol is connection-based, can only connect once. 469 * Otherwise, if connected, try to disconnect first. 470 * This allows user to disconnect by connecting to, e.g., 471 * a null address. 472 */ 473 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 474 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 475 (error = sodisconnect(so)))) { 476 error = EISCONN; 477 } else { 478 /* 479 * Prevent accumulated error from previous connection 480 * from biting us. 481 */ 482 so->so_error = 0; 483 error = so_pru_connect(so, nam, td); 484 } 485 return (error); 486 } 487 488 int 489 soconnect2(struct socket *so1, struct socket *so2) 490 { 491 int error; 492 493 error = so_pru_connect2(so1, so2); 494 return (error); 495 } 496 497 int 498 sodisconnect(struct socket *so) 499 { 500 int error; 501 502 if ((so->so_state & SS_ISCONNECTED) == 0) { 503 error = ENOTCONN; 504 goto bad; 505 } 506 if (so->so_state & SS_ISDISCONNECTING) { 507 error = EALREADY; 508 goto bad; 509 } 510 error = so_pru_disconnect(so); 511 bad: 512 return (error); 513 } 514 515 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 516 /* 517 * Send on a socket. 518 * If send must go all at once and message is larger than 519 * send buffering, then hard error. 520 * Lock against other senders. 521 * If must go all at once and not enough room now, then 522 * inform user that this would block and do nothing. 523 * Otherwise, if nonblocking, send as much as possible. 524 * The data to be sent is described by "uio" if nonzero, 525 * otherwise by the mbuf chain "top" (which must be null 526 * if uio is not). Data provided in mbuf chain must be small 527 * enough to send all at once. 528 * 529 * Returns nonzero on error, timeout or signal; callers 530 * must check for short counts if EINTR/ERESTART are returned. 531 * Data and control buffers are freed on return. 532 */ 533 int 534 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 535 struct mbuf *top, struct mbuf *control, int flags, 536 struct thread *td) 537 { 538 struct mbuf **mp; 539 struct mbuf *m; 540 size_t resid; 541 int space, len; 542 int clen = 0, error, dontroute, mlen; 543 int atomic = sosendallatonce(so) || top; 544 int pru_flags; 545 546 if (uio) { 547 resid = uio->uio_resid; 548 } else { 549 resid = (size_t)top->m_pkthdr.len; 550 #ifdef INVARIANTS 551 len = 0; 552 for (m = top; m; m = m->m_next) 553 len += m->m_len; 554 KKASSERT(top->m_pkthdr.len == len); 555 #endif 556 } 557 558 /* 559 * WARNING! resid is unsigned, space and len are signed. space 560 * can wind up negative if the sockbuf is overcommitted. 561 * 562 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 563 * type sockets since that's an error. 564 */ 565 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 566 error = EINVAL; 567 goto out; 568 } 569 570 dontroute = 571 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 572 (so->so_proto->pr_flags & PR_ATOMIC); 573 if (td->td_lwp != NULL) 574 td->td_lwp->lwp_ru.ru_msgsnd++; 575 if (control) 576 clen = control->m_len; 577 #define gotoerr(errcode) { error = errcode; goto release; } 578 579 restart: 580 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 581 if (error) 582 goto out; 583 584 do { 585 if (so->so_state & SS_CANTSENDMORE) 586 gotoerr(EPIPE); 587 if (so->so_error) { 588 error = so->so_error; 589 so->so_error = 0; 590 goto release; 591 } 592 if ((so->so_state & SS_ISCONNECTED) == 0) { 593 /* 594 * `sendto' and `sendmsg' is allowed on a connection- 595 * based socket if it supports implied connect. 596 * Return ENOTCONN if not connected and no address is 597 * supplied. 598 */ 599 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 600 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 601 if ((so->so_state & SS_ISCONFIRMING) == 0 && 602 !(resid == 0 && clen != 0)) 603 gotoerr(ENOTCONN); 604 } else if (addr == 0) 605 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 606 ENOTCONN : EDESTADDRREQ); 607 } 608 if ((atomic && resid > so->so_snd.ssb_hiwat) || 609 clen > so->so_snd.ssb_hiwat) { 610 gotoerr(EMSGSIZE); 611 } 612 space = ssb_space(&so->so_snd); 613 if (flags & MSG_OOB) 614 space += 1024; 615 if ((space < 0 || (size_t)space < resid + clen) && uio && 616 (atomic || space < so->so_snd.ssb_lowat || space < clen)) { 617 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 618 gotoerr(EWOULDBLOCK); 619 ssb_unlock(&so->so_snd); 620 error = ssb_wait(&so->so_snd); 621 if (error) 622 goto out; 623 goto restart; 624 } 625 mp = ⊤ 626 space -= clen; 627 do { 628 if (uio == NULL) { 629 /* 630 * Data is prepackaged in "top". 631 */ 632 resid = 0; 633 if (flags & MSG_EOR) 634 top->m_flags |= M_EOR; 635 } else do { 636 if (resid > INT_MAX) 637 resid = INT_MAX; 638 m = m_getl((int)resid, MB_WAIT, MT_DATA, 639 top == NULL ? M_PKTHDR : 0, &mlen); 640 if (top == NULL) { 641 m->m_pkthdr.len = 0; 642 m->m_pkthdr.rcvif = NULL; 643 } 644 len = imin((int)szmin(mlen, resid), space); 645 if (resid < MINCLSIZE) { 646 /* 647 * For datagram protocols, leave room 648 * for protocol headers in first mbuf. 649 */ 650 if (atomic && top == 0 && len < mlen) 651 MH_ALIGN(m, len); 652 } 653 space -= len; 654 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 655 resid = uio->uio_resid; 656 m->m_len = len; 657 *mp = m; 658 top->m_pkthdr.len += len; 659 if (error) 660 goto release; 661 mp = &m->m_next; 662 if (resid == 0) { 663 if (flags & MSG_EOR) 664 top->m_flags |= M_EOR; 665 break; 666 } 667 } while (space > 0 && atomic); 668 if (dontroute) 669 so->so_options |= SO_DONTROUTE; 670 if (flags & MSG_OOB) { 671 pru_flags = PRUS_OOB; 672 } else if ((flags & MSG_EOF) && 673 (so->so_proto->pr_flags & PR_IMPLOPCL) && 674 (resid == 0)) { 675 /* 676 * If the user set MSG_EOF, the protocol 677 * understands this flag and nothing left to 678 * send then use PRU_SEND_EOF instead of PRU_SEND. 679 */ 680 pru_flags = PRUS_EOF; 681 } else if (resid > 0 && space > 0) { 682 /* If there is more to send, set PRUS_MORETOCOME */ 683 pru_flags = PRUS_MORETOCOME; 684 } else { 685 pru_flags = 0; 686 } 687 /* 688 * XXX all the SS_CANTSENDMORE checks previously 689 * done could be out of date. We could have recieved 690 * a reset packet in an interrupt or maybe we slept 691 * while doing page faults in uiomove() etc. We could 692 * probably recheck again inside the splnet() protection 693 * here, but there are probably other places that this 694 * also happens. We must rethink this. 695 */ 696 error = so_pru_send(so, pru_flags, top, addr, control, td); 697 if (dontroute) 698 so->so_options &= ~SO_DONTROUTE; 699 clen = 0; 700 control = 0; 701 top = 0; 702 mp = ⊤ 703 if (error) 704 goto release; 705 } while (resid && space > 0); 706 } while (resid); 707 708 release: 709 ssb_unlock(&so->so_snd); 710 out: 711 if (top) 712 m_freem(top); 713 if (control) 714 m_freem(control); 715 return (error); 716 } 717 718 /* 719 * A specialization of sosend() for UDP based on protocol-specific knowledge: 720 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that 721 * sosendallatonce() returns true, 722 * the "atomic" variable is true, 723 * and sosendudp() blocks until space is available for the entire send. 724 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or 725 * PR_IMPLOPCL flags set. 726 * UDP has no out-of-band data. 727 * UDP has no control data. 728 * UDP does not support MSG_EOR. 729 */ 730 int 731 sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, 732 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 733 { 734 boolean_t dontroute; /* temporary SO_DONTROUTE setting */ 735 size_t resid; 736 int error; 737 int space; 738 739 if (td->td_lwp != NULL) 740 td->td_lwp->lwp_ru.ru_msgsnd++; 741 if (control) 742 m_freem(control); 743 744 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); 745 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; 746 747 restart: 748 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 749 if (error) 750 goto out; 751 752 if (so->so_state & SS_CANTSENDMORE) 753 gotoerr(EPIPE); 754 if (so->so_error) { 755 error = so->so_error; 756 so->so_error = 0; 757 goto release; 758 } 759 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) 760 gotoerr(EDESTADDRREQ); 761 if (resid > so->so_snd.ssb_hiwat) 762 gotoerr(EMSGSIZE); 763 space = ssb_space(&so->so_snd); 764 if (uio && (space < 0 || (size_t)space < resid)) { 765 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 766 gotoerr(EWOULDBLOCK); 767 ssb_unlock(&so->so_snd); 768 error = ssb_wait(&so->so_snd); 769 if (error) 770 goto out; 771 goto restart; 772 } 773 774 if (uio) { 775 top = m_uiomove(uio); 776 if (top == NULL) 777 goto release; 778 } 779 780 dontroute = (flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE); 781 if (dontroute) 782 so->so_options |= SO_DONTROUTE; 783 784 error = so_pru_send(so, 0, top, addr, NULL, td); 785 top = NULL; /* sent or freed in lower layer */ 786 787 if (dontroute) 788 so->so_options &= ~SO_DONTROUTE; 789 790 release: 791 ssb_unlock(&so->so_snd); 792 out: 793 if (top) 794 m_freem(top); 795 return (error); 796 } 797 798 /* 799 * Implement receive operations on a socket. 800 * 801 * We depend on the way that records are added to the signalsockbuf 802 * by sbappend*. In particular, each record (mbufs linked through m_next) 803 * must begin with an address if the protocol so specifies, 804 * followed by an optional mbuf or mbufs containing ancillary data, 805 * and then zero or more mbufs of data. 806 * 807 * Although the signalsockbuf is locked, new data may still be appended. 808 * A token inside the ssb_lock deals with MP issues and still allows 809 * the network to access the socket if we block in a uio. 810 * 811 * The caller may receive the data as a single mbuf chain by supplying 812 * an mbuf **mp0 for use in returning the chain. The uio is then used 813 * only for the count in uio_resid. 814 */ 815 int 816 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 817 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 818 { 819 struct mbuf *m, *n; 820 struct mbuf *free_chain = NULL; 821 int flags, len, error, offset; 822 struct protosw *pr = so->so_proto; 823 int moff, type = 0; 824 size_t resid, orig_resid; 825 826 if (uio) 827 resid = uio->uio_resid; 828 else 829 resid = (size_t)(sio->sb_climit - sio->sb_cc); 830 orig_resid = resid; 831 832 if (psa) 833 *psa = NULL; 834 if (controlp) 835 *controlp = NULL; 836 if (flagsp) 837 flags = *flagsp &~ MSG_EOR; 838 else 839 flags = 0; 840 if (flags & MSG_OOB) { 841 m = m_get(MB_WAIT, MT_DATA); 842 if (m == NULL) 843 return (ENOBUFS); 844 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 845 if (error) 846 goto bad; 847 if (sio) { 848 do { 849 sbappend(sio, m); 850 KKASSERT(resid >= (size_t)m->m_len); 851 resid -= (size_t)m->m_len; 852 } while (resid > 0 && m); 853 } else { 854 do { 855 uio->uio_resid = resid; 856 error = uiomove(mtod(m, caddr_t), 857 (int)szmin(resid, m->m_len), 858 uio); 859 resid = uio->uio_resid; 860 m = m_free(m); 861 } while (uio->uio_resid && error == 0 && m); 862 } 863 bad: 864 if (m) 865 m_freem(m); 866 return (error); 867 } 868 if ((so->so_state & SS_ISCONFIRMING) && resid) 869 so_pru_rcvd(so, 0); 870 871 restart: 872 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 873 if (error) 874 goto done; 875 876 m = so->so_rcv.ssb_mb; 877 /* 878 * If we have less data than requested, block awaiting more 879 * (subject to any timeout) if: 880 * 1. the current count is less than the low water mark, or 881 * 2. MSG_WAITALL is set, and it is possible to do the entire 882 * receive operation at once if we block (resid <= hiwat). 883 * 3. MSG_DONTWAIT is not set 884 * If MSG_WAITALL is set but resid is larger than the receive buffer, 885 * we have to do the receive in sections, and thus risk returning 886 * a short count if a timeout or signal occurs after we start. 887 */ 888 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 889 (size_t)so->so_rcv.ssb_cc < resid) && 890 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 891 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) && 892 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 893 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 894 if (so->so_error) { 895 if (m) 896 goto dontblock; 897 error = so->so_error; 898 if ((flags & MSG_PEEK) == 0) 899 so->so_error = 0; 900 goto release; 901 } 902 if (so->so_state & SS_CANTRCVMORE) { 903 if (m) 904 goto dontblock; 905 else 906 goto release; 907 } 908 for (; m; m = m->m_next) { 909 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 910 m = so->so_rcv.ssb_mb; 911 goto dontblock; 912 } 913 } 914 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 915 (pr->pr_flags & PR_CONNREQUIRED)) { 916 error = ENOTCONN; 917 goto release; 918 } 919 if (resid == 0) 920 goto release; 921 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 922 error = EWOULDBLOCK; 923 goto release; 924 } 925 ssb_unlock(&so->so_rcv); 926 error = ssb_wait(&so->so_rcv); 927 if (error) 928 goto done; 929 goto restart; 930 } 931 dontblock: 932 if (uio && uio->uio_td && uio->uio_td->td_proc) 933 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 934 935 /* 936 * note: m should be == sb_mb here. Cache the next record while 937 * cleaning up. Note that calling m_free*() will break out critical 938 * section. 939 */ 940 KKASSERT(m == so->so_rcv.ssb_mb); 941 942 /* 943 * Skip any address mbufs prepending the record. 944 */ 945 if (pr->pr_flags & PR_ADDR) { 946 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 947 orig_resid = 0; 948 if (psa) 949 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 950 if (flags & MSG_PEEK) 951 m = m->m_next; 952 else 953 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 954 } 955 956 /* 957 * Skip any control mbufs prepending the record. 958 */ 959 #ifdef SCTP 960 if (pr->pr_flags & PR_ADDR_OPT) { 961 /* 962 * For SCTP we may be getting a 963 * whole message OR a partial delivery. 964 */ 965 if (m && m->m_type == MT_SONAME) { 966 orig_resid = 0; 967 if (psa) 968 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 969 if (flags & MSG_PEEK) 970 m = m->m_next; 971 else 972 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 973 } 974 } 975 #endif /* SCTP */ 976 while (m && m->m_type == MT_CONTROL && error == 0) { 977 if (flags & MSG_PEEK) { 978 if (controlp) 979 *controlp = m_copy(m, 0, m->m_len); 980 m = m->m_next; /* XXX race */ 981 } else { 982 if (controlp) { 983 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 984 if (pr->pr_domain->dom_externalize && 985 mtod(m, struct cmsghdr *)->cmsg_type == 986 SCM_RIGHTS) 987 error = (*pr->pr_domain->dom_externalize)(m); 988 *controlp = m; 989 m = n; 990 } else { 991 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 992 } 993 } 994 if (controlp && *controlp) { 995 orig_resid = 0; 996 controlp = &(*controlp)->m_next; 997 } 998 } 999 1000 /* 1001 * flag OOB data. 1002 */ 1003 if (m) { 1004 type = m->m_type; 1005 if (type == MT_OOBDATA) 1006 flags |= MSG_OOB; 1007 } 1008 1009 /* 1010 * Copy to the UIO or mbuf return chain (*mp). 1011 */ 1012 moff = 0; 1013 offset = 0; 1014 while (m && resid > 0 && error == 0) { 1015 if (m->m_type == MT_OOBDATA) { 1016 if (type != MT_OOBDATA) 1017 break; 1018 } else if (type == MT_OOBDATA) 1019 break; 1020 else 1021 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1022 ("receive 3")); 1023 soclrstate(so, SS_RCVATMARK); 1024 len = (resid > INT_MAX) ? INT_MAX : resid; 1025 if (so->so_oobmark && len > so->so_oobmark - offset) 1026 len = so->so_oobmark - offset; 1027 if (len > m->m_len - moff) 1028 len = m->m_len - moff; 1029 1030 /* 1031 * Copy out to the UIO or pass the mbufs back to the SIO. 1032 * The SIO is dealt with when we eat the mbuf, but deal 1033 * with the resid here either way. 1034 */ 1035 if (uio) { 1036 uio->uio_resid = resid; 1037 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1038 resid = uio->uio_resid; 1039 if (error) 1040 goto release; 1041 } else { 1042 resid -= (size_t)len; 1043 } 1044 1045 /* 1046 * Eat the entire mbuf or just a piece of it 1047 */ 1048 if (len == m->m_len - moff) { 1049 if (m->m_flags & M_EOR) 1050 flags |= MSG_EOR; 1051 #ifdef SCTP 1052 if (m->m_flags & M_NOTIFICATION) 1053 flags |= MSG_NOTIFICATION; 1054 #endif /* SCTP */ 1055 if (flags & MSG_PEEK) { 1056 m = m->m_next; 1057 moff = 0; 1058 } else { 1059 if (sio) { 1060 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1061 sbappend(sio, m); 1062 m = n; 1063 } else { 1064 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1065 } 1066 } 1067 } else { 1068 if (flags & MSG_PEEK) { 1069 moff += len; 1070 } else { 1071 if (sio) { 1072 n = m_copym(m, 0, len, MB_WAIT); 1073 if (n) 1074 sbappend(sio, n); 1075 } 1076 m->m_data += len; 1077 m->m_len -= len; 1078 so->so_rcv.ssb_cc -= len; 1079 } 1080 } 1081 if (so->so_oobmark) { 1082 if ((flags & MSG_PEEK) == 0) { 1083 so->so_oobmark -= len; 1084 if (so->so_oobmark == 0) { 1085 sosetstate(so, SS_RCVATMARK); 1086 break; 1087 } 1088 } else { 1089 offset += len; 1090 if (offset == so->so_oobmark) 1091 break; 1092 } 1093 } 1094 if (flags & MSG_EOR) 1095 break; 1096 /* 1097 * If the MSG_WAITALL flag is set (for non-atomic socket), 1098 * we must not quit until resid == 0 or an error 1099 * termination. If a signal/timeout occurs, return 1100 * with a short count but without error. 1101 * Keep signalsockbuf locked against other readers. 1102 */ 1103 while ((flags & MSG_WAITALL) && m == NULL && 1104 resid > 0 && !sosendallatonce(so) && 1105 so->so_rcv.ssb_mb == NULL) { 1106 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1107 break; 1108 /* 1109 * The window might have closed to zero, make 1110 * sure we send an ack now that we've drained 1111 * the buffer or we might end up blocking until 1112 * the idle takes over (5 seconds). 1113 */ 1114 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1115 so_pru_rcvd(so, flags); 1116 error = ssb_wait(&so->so_rcv); 1117 if (error) { 1118 ssb_unlock(&so->so_rcv); 1119 error = 0; 1120 goto done; 1121 } 1122 m = so->so_rcv.ssb_mb; 1123 } 1124 } 1125 1126 /* 1127 * If an atomic read was requested but unread data still remains 1128 * in the record, set MSG_TRUNC. 1129 */ 1130 if (m && pr->pr_flags & PR_ATOMIC) 1131 flags |= MSG_TRUNC; 1132 1133 /* 1134 * Cleanup. If an atomic read was requested drop any unread data. 1135 */ 1136 if ((flags & MSG_PEEK) == 0) { 1137 if (m && (pr->pr_flags & PR_ATOMIC)) 1138 sbdroprecord(&so->so_rcv.sb); 1139 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1140 so_pru_rcvd(so, flags); 1141 } 1142 1143 if (orig_resid == resid && orig_resid && 1144 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1145 ssb_unlock(&so->so_rcv); 1146 goto restart; 1147 } 1148 1149 if (flagsp) 1150 *flagsp |= flags; 1151 release: 1152 ssb_unlock(&so->so_rcv); 1153 done: 1154 if (free_chain) 1155 m_freem(free_chain); 1156 return (error); 1157 } 1158 1159 int 1160 soshutdown(struct socket *so, int how) 1161 { 1162 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1163 return (EINVAL); 1164 1165 if (how != SHUT_WR) { 1166 ssb_lock(&so->so_rcv, M_WAITOK); /* frontend lock */ 1167 sorflush(so); 1168 ssb_unlock(&so->so_rcv); 1169 } 1170 if (how != SHUT_RD) 1171 return (so_pru_shutdown(so)); 1172 return (0); 1173 } 1174 1175 void 1176 sorflush(struct socket *so) 1177 { 1178 struct signalsockbuf *ssb = &so->so_rcv; 1179 struct protosw *pr = so->so_proto; 1180 struct signalsockbuf asb; 1181 1182 atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); 1183 1184 lwkt_gettoken(&ssb->ssb_token); 1185 socantrcvmore(so); 1186 asb = *ssb; 1187 1188 /* 1189 * Can't just blow up the ssb structure here 1190 */ 1191 bzero(&ssb->sb, sizeof(ssb->sb)); 1192 ssb->ssb_timeo = 0; 1193 ssb->ssb_unused01 = 0; 1194 ssb->ssb_lowat = 0; 1195 ssb->ssb_hiwat = 0; 1196 ssb->ssb_mbmax = 0; 1197 atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); 1198 1199 lwkt_reltoken(&ssb->ssb_token); 1200 1201 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1202 (*pr->pr_domain->dom_dispose)(asb.ssb_mb); 1203 ssb_release(&asb, so); 1204 } 1205 1206 #ifdef INET 1207 static int 1208 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) 1209 { 1210 struct accept_filter_arg *afap = NULL; 1211 struct accept_filter *afp; 1212 struct so_accf *af = so->so_accf; 1213 int error = 0; 1214 1215 /* do not set/remove accept filters on non listen sockets */ 1216 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1217 error = EINVAL; 1218 goto out; 1219 } 1220 1221 /* removing the filter */ 1222 if (sopt == NULL) { 1223 if (af != NULL) { 1224 if (af->so_accept_filter != NULL && 1225 af->so_accept_filter->accf_destroy != NULL) { 1226 af->so_accept_filter->accf_destroy(so); 1227 } 1228 if (af->so_accept_filter_str != NULL) { 1229 FREE(af->so_accept_filter_str, M_ACCF); 1230 } 1231 FREE(af, M_ACCF); 1232 so->so_accf = NULL; 1233 } 1234 so->so_options &= ~SO_ACCEPTFILTER; 1235 return (0); 1236 } 1237 /* adding a filter */ 1238 /* must remove previous filter first */ 1239 if (af != NULL) { 1240 error = EINVAL; 1241 goto out; 1242 } 1243 /* don't put large objects on the kernel stack */ 1244 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1245 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1246 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1247 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1248 if (error) 1249 goto out; 1250 afp = accept_filt_get(afap->af_name); 1251 if (afp == NULL) { 1252 error = ENOENT; 1253 goto out; 1254 } 1255 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1256 if (afp->accf_create != NULL) { 1257 if (afap->af_name[0] != '\0') { 1258 int len = strlen(afap->af_name) + 1; 1259 1260 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1261 strcpy(af->so_accept_filter_str, afap->af_name); 1262 } 1263 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1264 if (af->so_accept_filter_arg == NULL) { 1265 FREE(af->so_accept_filter_str, M_ACCF); 1266 FREE(af, M_ACCF); 1267 so->so_accf = NULL; 1268 error = EINVAL; 1269 goto out; 1270 } 1271 } 1272 af->so_accept_filter = afp; 1273 so->so_accf = af; 1274 so->so_options |= SO_ACCEPTFILTER; 1275 out: 1276 if (afap != NULL) 1277 FREE(afap, M_TEMP); 1278 return (error); 1279 } 1280 #endif /* INET */ 1281 1282 /* 1283 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1284 * an additional variant to handle the case where the option value needs 1285 * to be some kind of integer, but not a specific size. 1286 * In addition to their use here, these functions are also called by the 1287 * protocol-level pr_ctloutput() routines. 1288 */ 1289 int 1290 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1291 { 1292 return soopt_to_kbuf(sopt, buf, len, minlen); 1293 } 1294 1295 int 1296 soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1297 { 1298 size_t valsize; 1299 1300 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1301 KKASSERT(kva_p(buf)); 1302 1303 /* 1304 * If the user gives us more than we wanted, we ignore it, 1305 * but if we don't get the minimum length the caller 1306 * wants, we return EINVAL. On success, sopt->sopt_valsize 1307 * is set to however much we actually retrieved. 1308 */ 1309 if ((valsize = sopt->sopt_valsize) < minlen) 1310 return EINVAL; 1311 if (valsize > len) 1312 sopt->sopt_valsize = valsize = len; 1313 1314 bcopy(sopt->sopt_val, buf, valsize); 1315 return 0; 1316 } 1317 1318 1319 int 1320 sosetopt(struct socket *so, struct sockopt *sopt) 1321 { 1322 int error, optval; 1323 struct linger l; 1324 struct timeval tv; 1325 u_long val; 1326 struct signalsockbuf *sotmp; 1327 1328 error = 0; 1329 sopt->sopt_dir = SOPT_SET; 1330 if (sopt->sopt_level != SOL_SOCKET) { 1331 if (so->so_proto && so->so_proto->pr_ctloutput) { 1332 return (so_pru_ctloutput(so, sopt)); 1333 } 1334 error = ENOPROTOOPT; 1335 } else { 1336 switch (sopt->sopt_name) { 1337 #ifdef INET 1338 case SO_ACCEPTFILTER: 1339 error = do_setopt_accept_filter(so, sopt); 1340 if (error) 1341 goto bad; 1342 break; 1343 #endif /* INET */ 1344 case SO_LINGER: 1345 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1346 if (error) 1347 goto bad; 1348 1349 so->so_linger = l.l_linger; 1350 if (l.l_onoff) 1351 so->so_options |= SO_LINGER; 1352 else 1353 so->so_options &= ~SO_LINGER; 1354 break; 1355 1356 case SO_DEBUG: 1357 case SO_KEEPALIVE: 1358 case SO_DONTROUTE: 1359 case SO_USELOOPBACK: 1360 case SO_BROADCAST: 1361 case SO_REUSEADDR: 1362 case SO_REUSEPORT: 1363 case SO_OOBINLINE: 1364 case SO_TIMESTAMP: 1365 error = sooptcopyin(sopt, &optval, sizeof optval, 1366 sizeof optval); 1367 if (error) 1368 goto bad; 1369 if (optval) 1370 so->so_options |= sopt->sopt_name; 1371 else 1372 so->so_options &= ~sopt->sopt_name; 1373 break; 1374 1375 case SO_SNDBUF: 1376 case SO_RCVBUF: 1377 case SO_SNDLOWAT: 1378 case SO_RCVLOWAT: 1379 error = sooptcopyin(sopt, &optval, sizeof optval, 1380 sizeof optval); 1381 if (error) 1382 goto bad; 1383 1384 /* 1385 * Values < 1 make no sense for any of these 1386 * options, so disallow them. 1387 */ 1388 if (optval < 1) { 1389 error = EINVAL; 1390 goto bad; 1391 } 1392 1393 switch (sopt->sopt_name) { 1394 case SO_SNDBUF: 1395 case SO_RCVBUF: 1396 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? 1397 &so->so_snd : &so->so_rcv, (u_long)optval, 1398 so, 1399 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { 1400 error = ENOBUFS; 1401 goto bad; 1402 } 1403 sotmp = (sopt->sopt_name == SO_SNDBUF) ? 1404 &so->so_snd : &so->so_rcv; 1405 atomic_clear_int(&sotmp->ssb_flags, 1406 SSB_AUTOSIZE); 1407 break; 1408 1409 /* 1410 * Make sure the low-water is never greater than 1411 * the high-water. 1412 */ 1413 case SO_SNDLOWAT: 1414 so->so_snd.ssb_lowat = 1415 (optval > so->so_snd.ssb_hiwat) ? 1416 so->so_snd.ssb_hiwat : optval; 1417 atomic_clear_int(&so->so_snd.ssb_flags, 1418 SSB_AUTOLOWAT); 1419 break; 1420 case SO_RCVLOWAT: 1421 so->so_rcv.ssb_lowat = 1422 (optval > so->so_rcv.ssb_hiwat) ? 1423 so->so_rcv.ssb_hiwat : optval; 1424 atomic_clear_int(&so->so_rcv.ssb_flags, 1425 SSB_AUTOLOWAT); 1426 break; 1427 } 1428 break; 1429 1430 case SO_SNDTIMEO: 1431 case SO_RCVTIMEO: 1432 error = sooptcopyin(sopt, &tv, sizeof tv, 1433 sizeof tv); 1434 if (error) 1435 goto bad; 1436 1437 /* assert(hz > 0); */ 1438 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1439 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1440 error = EDOM; 1441 goto bad; 1442 } 1443 /* assert(tick > 0); */ 1444 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1445 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; 1446 if (val > SHRT_MAX) { 1447 error = EDOM; 1448 goto bad; 1449 } 1450 if (val == 0 && tv.tv_usec != 0) 1451 val = 1; 1452 1453 switch (sopt->sopt_name) { 1454 case SO_SNDTIMEO: 1455 so->so_snd.ssb_timeo = val; 1456 break; 1457 case SO_RCVTIMEO: 1458 so->so_rcv.ssb_timeo = val; 1459 break; 1460 } 1461 break; 1462 default: 1463 error = ENOPROTOOPT; 1464 break; 1465 } 1466 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1467 (void) so_pru_ctloutput(so, sopt); 1468 } 1469 } 1470 bad: 1471 return (error); 1472 } 1473 1474 /* Helper routine for getsockopt */ 1475 int 1476 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 1477 { 1478 soopt_from_kbuf(sopt, buf, len); 1479 return 0; 1480 } 1481 1482 void 1483 soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) 1484 { 1485 size_t valsize; 1486 1487 if (len == 0) { 1488 sopt->sopt_valsize = 0; 1489 return; 1490 } 1491 1492 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1493 KKASSERT(kva_p(buf)); 1494 1495 /* 1496 * Documented get behavior is that we always return a value, 1497 * possibly truncated to fit in the user's buffer. 1498 * Traditional behavior is that we always tell the user 1499 * precisely how much we copied, rather than something useful 1500 * like the total amount we had available for her. 1501 * Note that this interface is not idempotent; the entire answer must 1502 * generated ahead of time. 1503 */ 1504 valsize = szmin(len, sopt->sopt_valsize); 1505 sopt->sopt_valsize = valsize; 1506 if (sopt->sopt_val != 0) { 1507 bcopy(buf, sopt->sopt_val, valsize); 1508 } 1509 } 1510 1511 int 1512 sogetopt(struct socket *so, struct sockopt *sopt) 1513 { 1514 int error, optval; 1515 struct linger l; 1516 struct timeval tv; 1517 #ifdef INET 1518 struct accept_filter_arg *afap; 1519 #endif 1520 1521 error = 0; 1522 sopt->sopt_dir = SOPT_GET; 1523 if (sopt->sopt_level != SOL_SOCKET) { 1524 if (so->so_proto && so->so_proto->pr_ctloutput) { 1525 return (so_pru_ctloutput(so, sopt)); 1526 } else 1527 return (ENOPROTOOPT); 1528 } else { 1529 switch (sopt->sopt_name) { 1530 #ifdef INET 1531 case SO_ACCEPTFILTER: 1532 if ((so->so_options & SO_ACCEPTCONN) == 0) 1533 return (EINVAL); 1534 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1535 M_TEMP, M_WAITOK | M_ZERO); 1536 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1537 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1538 if (so->so_accf->so_accept_filter_str != NULL) 1539 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1540 } 1541 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1542 FREE(afap, M_TEMP); 1543 break; 1544 #endif /* INET */ 1545 1546 case SO_LINGER: 1547 l.l_onoff = so->so_options & SO_LINGER; 1548 l.l_linger = so->so_linger; 1549 error = sooptcopyout(sopt, &l, sizeof l); 1550 break; 1551 1552 case SO_USELOOPBACK: 1553 case SO_DONTROUTE: 1554 case SO_DEBUG: 1555 case SO_KEEPALIVE: 1556 case SO_REUSEADDR: 1557 case SO_REUSEPORT: 1558 case SO_BROADCAST: 1559 case SO_OOBINLINE: 1560 case SO_TIMESTAMP: 1561 optval = so->so_options & sopt->sopt_name; 1562 integer: 1563 error = sooptcopyout(sopt, &optval, sizeof optval); 1564 break; 1565 1566 case SO_TYPE: 1567 optval = so->so_type; 1568 goto integer; 1569 1570 case SO_ERROR: 1571 optval = so->so_error; 1572 so->so_error = 0; 1573 goto integer; 1574 1575 case SO_SNDBUF: 1576 optval = so->so_snd.ssb_hiwat; 1577 goto integer; 1578 1579 case SO_RCVBUF: 1580 optval = so->so_rcv.ssb_hiwat; 1581 goto integer; 1582 1583 case SO_SNDLOWAT: 1584 optval = so->so_snd.ssb_lowat; 1585 goto integer; 1586 1587 case SO_RCVLOWAT: 1588 optval = so->so_rcv.ssb_lowat; 1589 goto integer; 1590 1591 case SO_SNDTIMEO: 1592 case SO_RCVTIMEO: 1593 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1594 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); 1595 1596 tv.tv_sec = optval / hz; 1597 tv.tv_usec = (optval % hz) * ustick; 1598 error = sooptcopyout(sopt, &tv, sizeof tv); 1599 break; 1600 1601 default: 1602 error = ENOPROTOOPT; 1603 break; 1604 } 1605 return (error); 1606 } 1607 } 1608 1609 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1610 int 1611 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1612 { 1613 struct mbuf *m, *m_prev; 1614 int sopt_size = sopt->sopt_valsize, msize; 1615 1616 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA, 1617 0, &msize); 1618 if (m == NULL) 1619 return (ENOBUFS); 1620 m->m_len = min(msize, sopt_size); 1621 sopt_size -= m->m_len; 1622 *mp = m; 1623 m_prev = m; 1624 1625 while (sopt_size > 0) { 1626 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, 1627 MT_DATA, 0, &msize); 1628 if (m == NULL) { 1629 m_freem(*mp); 1630 return (ENOBUFS); 1631 } 1632 m->m_len = min(msize, sopt_size); 1633 sopt_size -= m->m_len; 1634 m_prev->m_next = m; 1635 m_prev = m; 1636 } 1637 return (0); 1638 } 1639 1640 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1641 int 1642 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1643 { 1644 soopt_to_mbuf(sopt, m); 1645 return 0; 1646 } 1647 1648 void 1649 soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) 1650 { 1651 size_t valsize; 1652 void *val; 1653 1654 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1655 KKASSERT(kva_p(m)); 1656 if (sopt->sopt_val == NULL) 1657 return; 1658 val = sopt->sopt_val; 1659 valsize = sopt->sopt_valsize; 1660 while (m != NULL && valsize >= m->m_len) { 1661 bcopy(val, mtod(m, char *), m->m_len); 1662 valsize -= m->m_len; 1663 val = (caddr_t)val + m->m_len; 1664 m = m->m_next; 1665 } 1666 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1667 panic("ip6_sooptmcopyin"); 1668 } 1669 1670 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1671 int 1672 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1673 { 1674 return soopt_from_mbuf(sopt, m); 1675 } 1676 1677 int 1678 soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) 1679 { 1680 struct mbuf *m0 = m; 1681 size_t valsize = 0; 1682 size_t maxsize; 1683 void *val; 1684 1685 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1686 KKASSERT(kva_p(m)); 1687 if (sopt->sopt_val == NULL) 1688 return 0; 1689 val = sopt->sopt_val; 1690 maxsize = sopt->sopt_valsize; 1691 while (m != NULL && maxsize >= m->m_len) { 1692 bcopy(mtod(m, char *), val, m->m_len); 1693 maxsize -= m->m_len; 1694 val = (caddr_t)val + m->m_len; 1695 valsize += m->m_len; 1696 m = m->m_next; 1697 } 1698 if (m != NULL) { 1699 /* enough soopt buffer should be given from user-land */ 1700 m_freem(m0); 1701 return (EINVAL); 1702 } 1703 sopt->sopt_valsize = valsize; 1704 return 0; 1705 } 1706 1707 void 1708 sohasoutofband(struct socket *so) 1709 { 1710 if (so->so_sigio != NULL) 1711 pgsigio(so->so_sigio, SIGURG, 0); 1712 KNOTE(&so->so_rcv.ssb_kq.ki_note, NOTE_OOB); 1713 } 1714 1715 int 1716 sokqfilter(struct file *fp, struct knote *kn) 1717 { 1718 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1719 struct signalsockbuf *ssb; 1720 1721 switch (kn->kn_filter) { 1722 case EVFILT_READ: 1723 if (so->so_options & SO_ACCEPTCONN) 1724 kn->kn_fop = &solisten_filtops; 1725 else 1726 kn->kn_fop = &soread_filtops; 1727 ssb = &so->so_rcv; 1728 break; 1729 case EVFILT_WRITE: 1730 kn->kn_fop = &sowrite_filtops; 1731 ssb = &so->so_snd; 1732 break; 1733 case EVFILT_EXCEPT: 1734 kn->kn_fop = &soexcept_filtops; 1735 ssb = &so->so_rcv; 1736 break; 1737 default: 1738 return (EOPNOTSUPP); 1739 } 1740 1741 knote_insert(&ssb->ssb_kq.ki_note, kn); 1742 atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); 1743 return (0); 1744 } 1745 1746 static void 1747 filt_sordetach(struct knote *kn) 1748 { 1749 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1750 1751 knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); 1752 if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) 1753 atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); 1754 } 1755 1756 /*ARGSUSED*/ 1757 static int 1758 filt_soread(struct knote *kn, long hint) 1759 { 1760 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1761 1762 if (kn->kn_sfflags & NOTE_OOB) { 1763 if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { 1764 kn->kn_fflags |= NOTE_OOB; 1765 return (1); 1766 } 1767 return (0); 1768 } 1769 kn->kn_data = so->so_rcv.ssb_cc; 1770 1771 /* 1772 * Only set EOF if all data has been exhausted. 1773 */ 1774 if ((so->so_state & SS_CANTRCVMORE) && kn->kn_data == 0) { 1775 kn->kn_flags |= EV_EOF; 1776 kn->kn_fflags = so->so_error; 1777 return (1); 1778 } 1779 if (so->so_error) /* temporary udp error */ 1780 return (1); 1781 if (kn->kn_sfflags & NOTE_LOWAT) 1782 return (kn->kn_data >= kn->kn_sdata); 1783 return ((kn->kn_data >= so->so_rcv.ssb_lowat) || 1784 !TAILQ_EMPTY(&so->so_comp)); 1785 } 1786 1787 static void 1788 filt_sowdetach(struct knote *kn) 1789 { 1790 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1791 1792 knote_remove(&so->so_snd.ssb_kq.ki_note, kn); 1793 if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) 1794 atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); 1795 } 1796 1797 /*ARGSUSED*/ 1798 static int 1799 filt_sowrite(struct knote *kn, long hint) 1800 { 1801 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1802 1803 kn->kn_data = ssb_space(&so->so_snd); 1804 if (so->so_state & SS_CANTSENDMORE) { 1805 kn->kn_flags |= EV_EOF; 1806 kn->kn_fflags = so->so_error; 1807 return (1); 1808 } 1809 if (so->so_error) /* temporary udp error */ 1810 return (1); 1811 if (((so->so_state & SS_ISCONNECTED) == 0) && 1812 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1813 return (0); 1814 if (kn->kn_sfflags & NOTE_LOWAT) 1815 return (kn->kn_data >= kn->kn_sdata); 1816 return (kn->kn_data >= so->so_snd.ssb_lowat); 1817 } 1818 1819 /*ARGSUSED*/ 1820 static int 1821 filt_solisten(struct knote *kn, long hint) 1822 { 1823 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1824 1825 kn->kn_data = so->so_qlen; 1826 return (! TAILQ_EMPTY(&so->so_comp)); 1827 } 1828