1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ 68 * $DragonFly: src/sys/kern/uipc_socket.c,v 1.55 2008/09/02 16:17:52 dillon Exp $ 69 */ 70 71 #include "opt_inet.h" 72 #include "opt_sctp.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/fcntl.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/domain.h> 80 #include <sys/file.h> /* for struct knote */ 81 #include <sys/kernel.h> 82 #include <sys/malloc.h> 83 #include <sys/event.h> 84 #include <sys/proc.h> 85 #include <sys/protosw.h> 86 #include <sys/socket.h> 87 #include <sys/socketvar.h> 88 #include <sys/socketops.h> 89 #include <sys/resourcevar.h> 90 #include <sys/signalvar.h> 91 #include <sys/sysctl.h> 92 #include <sys/uio.h> 93 #include <sys/jail.h> 94 #include <vm/vm_zone.h> 95 #include <vm/pmap.h> 96 97 #include <sys/thread2.h> 98 #include <sys/socketvar2.h> 99 100 #include <machine/limits.h> 101 102 #ifdef INET 103 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 104 #endif /* INET */ 105 106 static void filt_sordetach(struct knote *kn); 107 static int filt_soread(struct knote *kn, long hint); 108 static void filt_sowdetach(struct knote *kn); 109 static int filt_sowrite(struct knote *kn, long hint); 110 static int filt_solisten(struct knote *kn, long hint); 111 112 static struct filterops solisten_filtops = 113 { FILTEROP_ISFD, NULL, filt_sordetach, filt_solisten }; 114 static struct filterops soread_filtops = 115 { FILTEROP_ISFD, NULL, filt_sordetach, filt_soread }; 116 static struct filterops sowrite_filtops = 117 { FILTEROP_ISFD, NULL, filt_sowdetach, filt_sowrite }; 118 static struct filterops soexcept_filtops = 119 { FILTEROP_ISFD, NULL, filt_sordetach, filt_soread }; 120 121 MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); 122 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 123 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 124 125 126 static int somaxconn = SOMAXCONN; 127 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 128 &somaxconn, 0, "Maximum pending socket connection queue size"); 129 130 /* 131 * Socket operation routines. 132 * These routines are called by the routines in 133 * sys_socket.c or from a system process, and 134 * implement the semantics of socket operations by 135 * switching out to the protocol specific routines. 136 */ 137 138 /* 139 * Get a socket structure, and initialize it. 140 * Note that it would probably be better to allocate socket 141 * and PCB at the same time, but I'm not convinced that all 142 * the protocols can be easily modified to do this. 143 */ 144 struct socket * 145 soalloc(int waitok) 146 { 147 struct socket *so; 148 unsigned waitmask; 149 150 waitmask = waitok ? M_WAITOK : M_NOWAIT; 151 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); 152 if (so) { 153 /* XXX race condition for reentrant kernel */ 154 TAILQ_INIT(&so->so_aiojobq); 155 TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); 156 TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); 157 lwkt_token_init(&so->so_rcv.ssb_token, 1, "rcvtok"); 158 lwkt_token_init(&so->so_snd.ssb_token, 1, "rcvtok"); 159 so->so_state = SS_NOFDREF; 160 so->so_refs = 1; 161 } 162 return so; 163 } 164 165 int 166 socreate(int dom, struct socket **aso, int type, 167 int proto, struct thread *td) 168 { 169 struct proc *p = td->td_proc; 170 struct protosw *prp; 171 struct socket *so; 172 struct pru_attach_info ai; 173 int error; 174 175 if (proto) 176 prp = pffindproto(dom, proto, type); 177 else 178 prp = pffindtype(dom, type); 179 180 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 181 return (EPROTONOSUPPORT); 182 183 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only && 184 prp->pr_domain->dom_family != PF_LOCAL && 185 prp->pr_domain->dom_family != PF_INET && 186 prp->pr_domain->dom_family != PF_INET6 && 187 prp->pr_domain->dom_family != PF_ROUTE) { 188 return (EPROTONOSUPPORT); 189 } 190 191 if (prp->pr_type != type) 192 return (EPROTOTYPE); 193 so = soalloc(p != 0); 194 if (so == NULL) 195 return (ENOBUFS); 196 197 /* 198 * Callers of socreate() presumably will connect up a descriptor 199 * and call soclose() if they cannot. This represents our so_refs 200 * (which should be 1) from soalloc(). 201 */ 202 soclrstate(so, SS_NOFDREF); 203 204 /* 205 * Set a default port for protocol processing. No action will occur 206 * on the socket on this port until an inpcb is attached to it and 207 * is able to match incoming packets, or until the socket becomes 208 * available to userland. 209 */ 210 so->so_port = cpu0_soport(so, NULL, NULL); 211 212 TAILQ_INIT(&so->so_incomp); 213 TAILQ_INIT(&so->so_comp); 214 so->so_type = type; 215 so->so_cred = crhold(p->p_ucred); 216 so->so_proto = prp; 217 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; 218 ai.p_ucred = p->p_ucred; 219 ai.fd_rdir = p->p_fd->fd_rdir; 220 221 /* 222 * Auto-sizing of socket buffers is managed by the protocols and 223 * the appropriate flags must be set in the pru_attach function. 224 */ 225 error = so_pru_attach(so, proto, &ai); 226 if (error) { 227 sosetstate(so, SS_NOFDREF); 228 sofree(so); /* from soalloc */ 229 return error; 230 } 231 232 /* 233 * NOTE: Returns referenced socket. 234 */ 235 *aso = so; 236 return (0); 237 } 238 239 int 240 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 241 { 242 int error; 243 244 error = so_pru_bind(so, nam, td); 245 return (error); 246 } 247 248 static void 249 sodealloc(struct socket *so) 250 { 251 if (so->so_rcv.ssb_hiwat) 252 (void)chgsbsize(so->so_cred->cr_uidinfo, 253 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); 254 if (so->so_snd.ssb_hiwat) 255 (void)chgsbsize(so->so_cred->cr_uidinfo, 256 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); 257 #ifdef INET 258 /* remove accept filter if present */ 259 if (so->so_accf != NULL) 260 do_setopt_accept_filter(so, NULL); 261 #endif /* INET */ 262 crfree(so->so_cred); 263 kfree(so, M_SOCKET); 264 } 265 266 int 267 solisten(struct socket *so, int backlog, struct thread *td) 268 { 269 int error; 270 #ifdef SCTP 271 short oldopt, oldqlimit; 272 #endif /* SCTP */ 273 274 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) 275 return (EINVAL); 276 277 #ifdef SCTP 278 oldopt = so->so_options; 279 oldqlimit = so->so_qlimit; 280 #endif /* SCTP */ 281 282 lwkt_gettoken(&so->so_rcv.ssb_token); 283 if (TAILQ_EMPTY(&so->so_comp)) 284 so->so_options |= SO_ACCEPTCONN; 285 lwkt_reltoken(&so->so_rcv.ssb_token); 286 if (backlog < 0 || backlog > somaxconn) 287 backlog = somaxconn; 288 so->so_qlimit = backlog; 289 /* SCTP needs to look at tweak both the inbound backlog parameter AND 290 * the so_options (UDP model both connect's and gets inbound 291 * connections .. implicitly). 292 */ 293 error = so_pru_listen(so, td); 294 if (error) { 295 #ifdef SCTP 296 /* Restore the params */ 297 so->so_options = oldopt; 298 so->so_qlimit = oldqlimit; 299 #endif /* SCTP */ 300 return (error); 301 } 302 return (0); 303 } 304 305 /* 306 * Destroy a disconnected socket. This routine is a NOP if entities 307 * still have a reference on the socket: 308 * 309 * so_pcb - The protocol stack still has a reference 310 * SS_NOFDREF - There is no longer a file pointer reference 311 */ 312 void 313 sofree(struct socket *so) 314 { 315 struct socket *head = so->so_head; 316 317 /* 318 * Arbitrage the last free. 319 */ 320 KKASSERT(so->so_refs > 0); 321 if (atomic_fetchadd_int(&so->so_refs, -1) != 1) 322 return; 323 324 KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); 325 326 /* 327 * We're done, clean up 328 */ 329 if (head != NULL) { 330 lwkt_gettoken(&head->so_rcv.ssb_token); 331 if (so->so_state & SS_INCOMP) { 332 TAILQ_REMOVE(&head->so_incomp, so, so_list); 333 head->so_incqlen--; 334 } else if (so->so_state & SS_COMP) { 335 /* 336 * We must not decommission a socket that's 337 * on the accept(2) queue. If we do, then 338 * accept(2) may hang after select(2) indicated 339 * that the listening socket was ready. 340 */ 341 lwkt_reltoken(&head->so_rcv.ssb_token); 342 return; 343 } else { 344 panic("sofree: not queued"); 345 } 346 soclrstate(so, SS_INCOMP); 347 so->so_head = NULL; 348 lwkt_reltoken(&head->so_rcv.ssb_token); 349 } 350 ssb_release(&so->so_snd, so); 351 sorflush(so); 352 sodealloc(so); 353 } 354 355 /* 356 * Close a socket on last file table reference removal. 357 * Initiate disconnect if connected. 358 * Free socket when disconnect complete. 359 */ 360 int 361 soclose(struct socket *so, int fflag) 362 { 363 int error = 0; 364 365 funsetown(so->so_sigio); 366 if (so->so_pcb == NULL) 367 goto discard; 368 if (so->so_state & SS_ISCONNECTED) { 369 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 370 error = sodisconnect(so); 371 if (error) 372 goto drop; 373 } 374 if (so->so_options & SO_LINGER) { 375 if ((so->so_state & SS_ISDISCONNECTING) && 376 (fflag & FNONBLOCK)) 377 goto drop; 378 while (so->so_state & SS_ISCONNECTED) { 379 error = tsleep(&so->so_timeo, PCATCH, 380 "soclos", so->so_linger * hz); 381 if (error) 382 break; 383 } 384 } 385 } 386 drop: 387 if (so->so_pcb) { 388 int error2; 389 390 error2 = so_pru_detach(so); 391 if (error == 0) 392 error = error2; 393 } 394 discard: 395 lwkt_gettoken(&so->so_rcv.ssb_token); 396 if (so->so_options & SO_ACCEPTCONN) { 397 struct socket *sp; 398 399 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 400 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 401 soclrstate(sp, SS_INCOMP); 402 sp->so_head = NULL; 403 so->so_incqlen--; 404 soaborta(sp); 405 } 406 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 407 TAILQ_REMOVE(&so->so_comp, sp, so_list); 408 soclrstate(sp, SS_COMP); 409 sp->so_head = NULL; 410 so->so_qlen--; 411 soaborta(sp); 412 } 413 } 414 lwkt_reltoken(&so->so_rcv.ssb_token); 415 if (so->so_state & SS_NOFDREF) 416 panic("soclose: NOFDREF"); 417 sosetstate(so, SS_NOFDREF); /* take ref */ 418 sofree(so); /* dispose of ref */ 419 return (error); 420 } 421 422 /* 423 * Abort and destroy a socket. Only one abort can be in progress 424 * at any given moment. 425 */ 426 void 427 soabort(struct socket *so) 428 { 429 soreference(so); 430 so_pru_abort(so); 431 } 432 433 void 434 soaborta(struct socket *so) 435 { 436 soreference(so); 437 so_pru_aborta(so); 438 } 439 440 void 441 soabort_oncpu(struct socket *so) 442 { 443 soreference(so); 444 so_pru_abort_oncpu(so); 445 } 446 447 int 448 soaccept(struct socket *so, struct sockaddr **nam) 449 { 450 int error; 451 452 if ((so->so_state & SS_NOFDREF) == 0) 453 panic("soaccept: !NOFDREF"); 454 soreference(so); /* create ref */ 455 soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ 456 error = so_pru_accept(so, nam); 457 return (error); 458 } 459 460 int 461 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 462 { 463 int error; 464 465 if (so->so_options & SO_ACCEPTCONN) 466 return (EOPNOTSUPP); 467 /* 468 * If protocol is connection-based, can only connect once. 469 * Otherwise, if connected, try to disconnect first. 470 * This allows user to disconnect by connecting to, e.g., 471 * a null address. 472 */ 473 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 474 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 475 (error = sodisconnect(so)))) { 476 error = EISCONN; 477 } else { 478 /* 479 * Prevent accumulated error from previous connection 480 * from biting us. 481 */ 482 so->so_error = 0; 483 error = so_pru_connect(so, nam, td); 484 } 485 return (error); 486 } 487 488 int 489 soconnect2(struct socket *so1, struct socket *so2) 490 { 491 int error; 492 493 error = so_pru_connect2(so1, so2); 494 return (error); 495 } 496 497 int 498 sodisconnect(struct socket *so) 499 { 500 int error; 501 502 if ((so->so_state & SS_ISCONNECTED) == 0) { 503 error = ENOTCONN; 504 goto bad; 505 } 506 if (so->so_state & SS_ISDISCONNECTING) { 507 error = EALREADY; 508 goto bad; 509 } 510 error = so_pru_disconnect(so); 511 bad: 512 return (error); 513 } 514 515 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 516 /* 517 * Send on a socket. 518 * If send must go all at once and message is larger than 519 * send buffering, then hard error. 520 * Lock against other senders. 521 * If must go all at once and not enough room now, then 522 * inform user that this would block and do nothing. 523 * Otherwise, if nonblocking, send as much as possible. 524 * The data to be sent is described by "uio" if nonzero, 525 * otherwise by the mbuf chain "top" (which must be null 526 * if uio is not). Data provided in mbuf chain must be small 527 * enough to send all at once. 528 * 529 * Returns nonzero on error, timeout or signal; callers 530 * must check for short counts if EINTR/ERESTART are returned. 531 * Data and control buffers are freed on return. 532 */ 533 int 534 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 535 struct mbuf *top, struct mbuf *control, int flags, 536 struct thread *td) 537 { 538 struct mbuf **mp; 539 struct mbuf *m; 540 size_t resid; 541 int space, len; 542 int clen = 0, error, dontroute, mlen; 543 int atomic = sosendallatonce(so) || top; 544 int pru_flags; 545 546 if (uio) { 547 resid = uio->uio_resid; 548 } else { 549 resid = (size_t)top->m_pkthdr.len; 550 #ifdef INVARIANTS 551 len = 0; 552 for (m = top; m; m = m->m_next) 553 len += m->m_len; 554 KKASSERT(top->m_pkthdr.len == len); 555 #endif 556 } 557 558 /* 559 * WARNING! resid is unsigned, space and len are signed. space 560 * can wind up negative if the sockbuf is overcommitted. 561 * 562 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 563 * type sockets since that's an error. 564 */ 565 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 566 error = EINVAL; 567 goto out; 568 } 569 570 dontroute = 571 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 572 (so->so_proto->pr_flags & PR_ATOMIC); 573 if (td->td_lwp != NULL) 574 td->td_lwp->lwp_ru.ru_msgsnd++; 575 if (control) 576 clen = control->m_len; 577 #define gotoerr(errcode) { error = errcode; goto release; } 578 579 restart: 580 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 581 if (error) 582 goto out; 583 584 do { 585 if (so->so_state & SS_CANTSENDMORE) 586 gotoerr(EPIPE); 587 if (so->so_error) { 588 error = so->so_error; 589 so->so_error = 0; 590 goto release; 591 } 592 if ((so->so_state & SS_ISCONNECTED) == 0) { 593 /* 594 * `sendto' and `sendmsg' is allowed on a connection- 595 * based socket if it supports implied connect. 596 * Return ENOTCONN if not connected and no address is 597 * supplied. 598 */ 599 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 600 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 601 if ((so->so_state & SS_ISCONFIRMING) == 0 && 602 !(resid == 0 && clen != 0)) 603 gotoerr(ENOTCONN); 604 } else if (addr == 0) 605 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 606 ENOTCONN : EDESTADDRREQ); 607 } 608 if ((atomic && resid > so->so_snd.ssb_hiwat) || 609 clen > so->so_snd.ssb_hiwat) { 610 gotoerr(EMSGSIZE); 611 } 612 space = ssb_space(&so->so_snd); 613 if (flags & MSG_OOB) 614 space += 1024; 615 if ((space < 0 || (size_t)space < resid + clen) && uio && 616 (atomic || space < so->so_snd.ssb_lowat || space < clen)) { 617 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 618 gotoerr(EWOULDBLOCK); 619 ssb_unlock(&so->so_snd); 620 error = ssb_wait(&so->so_snd); 621 if (error) 622 goto out; 623 goto restart; 624 } 625 mp = ⊤ 626 space -= clen; 627 do { 628 if (uio == NULL) { 629 /* 630 * Data is prepackaged in "top". 631 */ 632 resid = 0; 633 if (flags & MSG_EOR) 634 top->m_flags |= M_EOR; 635 } else do { 636 if (resid > INT_MAX) 637 resid = INT_MAX; 638 m = m_getl((int)resid, MB_WAIT, MT_DATA, 639 top == NULL ? M_PKTHDR : 0, &mlen); 640 if (top == NULL) { 641 m->m_pkthdr.len = 0; 642 m->m_pkthdr.rcvif = NULL; 643 } 644 len = imin((int)szmin(mlen, resid), space); 645 if (resid < MINCLSIZE) { 646 /* 647 * For datagram protocols, leave room 648 * for protocol headers in first mbuf. 649 */ 650 if (atomic && top == 0 && len < mlen) 651 MH_ALIGN(m, len); 652 } 653 space -= len; 654 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 655 resid = uio->uio_resid; 656 m->m_len = len; 657 *mp = m; 658 top->m_pkthdr.len += len; 659 if (error) 660 goto release; 661 mp = &m->m_next; 662 if (resid == 0) { 663 if (flags & MSG_EOR) 664 top->m_flags |= M_EOR; 665 break; 666 } 667 } while (space > 0 && atomic); 668 if (dontroute) 669 so->so_options |= SO_DONTROUTE; 670 if (flags & MSG_OOB) { 671 pru_flags = PRUS_OOB; 672 } else if ((flags & MSG_EOF) && 673 (so->so_proto->pr_flags & PR_IMPLOPCL) && 674 (resid == 0)) { 675 /* 676 * If the user set MSG_EOF, the protocol 677 * understands this flag and nothing left to 678 * send then use PRU_SEND_EOF instead of PRU_SEND. 679 */ 680 pru_flags = PRUS_EOF; 681 } else if (resid > 0 && space > 0) { 682 /* If there is more to send, set PRUS_MORETOCOME */ 683 pru_flags = PRUS_MORETOCOME; 684 } else { 685 pru_flags = 0; 686 } 687 /* 688 * XXX all the SS_CANTSENDMORE checks previously 689 * done could be out of date. We could have recieved 690 * a reset packet in an interrupt or maybe we slept 691 * while doing page faults in uiomove() etc. We could 692 * probably recheck again inside the splnet() protection 693 * here, but there are probably other places that this 694 * also happens. We must rethink this. 695 */ 696 error = so_pru_send(so, pru_flags, top, addr, control, td); 697 if (dontroute) 698 so->so_options &= ~SO_DONTROUTE; 699 clen = 0; 700 control = 0; 701 top = 0; 702 mp = ⊤ 703 if (error) 704 goto release; 705 } while (resid && space > 0); 706 } while (resid); 707 708 release: 709 ssb_unlock(&so->so_snd); 710 out: 711 if (top) 712 m_freem(top); 713 if (control) 714 m_freem(control); 715 return (error); 716 } 717 718 /* 719 * A specialization of sosend() for UDP based on protocol-specific knowledge: 720 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that 721 * sosendallatonce() returns true, 722 * the "atomic" variable is true, 723 * and sosendudp() blocks until space is available for the entire send. 724 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or 725 * PR_IMPLOPCL flags set. 726 * UDP has no out-of-band data. 727 * UDP has no control data. 728 * UDP does not support MSG_EOR. 729 */ 730 int 731 sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, 732 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 733 { 734 boolean_t dontroute; /* temporary SO_DONTROUTE setting */ 735 size_t resid; 736 int error; 737 int space; 738 739 if (td->td_lwp != NULL) 740 td->td_lwp->lwp_ru.ru_msgsnd++; 741 if (control) 742 m_freem(control); 743 744 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); 745 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; 746 747 restart: 748 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 749 if (error) 750 goto out; 751 752 if (so->so_state & SS_CANTSENDMORE) 753 gotoerr(EPIPE); 754 if (so->so_error) { 755 error = so->so_error; 756 so->so_error = 0; 757 goto release; 758 } 759 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) 760 gotoerr(EDESTADDRREQ); 761 if (resid > so->so_snd.ssb_hiwat) 762 gotoerr(EMSGSIZE); 763 space = ssb_space(&so->so_snd); 764 if (uio && (space < 0 || (size_t)space < resid)) { 765 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 766 gotoerr(EWOULDBLOCK); 767 ssb_unlock(&so->so_snd); 768 error = ssb_wait(&so->so_snd); 769 if (error) 770 goto out; 771 goto restart; 772 } 773 774 if (uio) { 775 top = m_uiomove(uio); 776 if (top == NULL) 777 goto release; 778 } 779 780 dontroute = (flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE); 781 if (dontroute) 782 so->so_options |= SO_DONTROUTE; 783 784 error = so_pru_send(so, 0, top, addr, NULL, td); 785 top = NULL; /* sent or freed in lower layer */ 786 787 if (dontroute) 788 so->so_options &= ~SO_DONTROUTE; 789 790 release: 791 ssb_unlock(&so->so_snd); 792 out: 793 if (top) 794 m_freem(top); 795 return (error); 796 } 797 798 /* 799 * Implement receive operations on a socket. 800 * 801 * We depend on the way that records are added to the signalsockbuf 802 * by sbappend*. In particular, each record (mbufs linked through m_next) 803 * must begin with an address if the protocol so specifies, 804 * followed by an optional mbuf or mbufs containing ancillary data, 805 * and then zero or more mbufs of data. 806 * 807 * Although the signalsockbuf is locked, new data may still be appended. 808 * A token inside the ssb_lock deals with MP issues and still allows 809 * the network to access the socket if we block in a uio. 810 * 811 * The caller may receive the data as a single mbuf chain by supplying 812 * an mbuf **mp0 for use in returning the chain. The uio is then used 813 * only for the count in uio_resid. 814 */ 815 int 816 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 817 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 818 { 819 struct mbuf *m, *n; 820 struct mbuf *free_chain = NULL; 821 int flags, len, error, offset; 822 struct protosw *pr = so->so_proto; 823 int moff, type = 0; 824 size_t resid, orig_resid; 825 826 if (uio) 827 resid = uio->uio_resid; 828 else 829 resid = (size_t)(sio->sb_climit - sio->sb_cc); 830 orig_resid = resid; 831 832 if (psa) 833 *psa = NULL; 834 if (controlp) 835 *controlp = NULL; 836 if (flagsp) 837 flags = *flagsp &~ MSG_EOR; 838 else 839 flags = 0; 840 if (flags & MSG_OOB) { 841 m = m_get(MB_WAIT, MT_DATA); 842 if (m == NULL) 843 return (ENOBUFS); 844 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 845 if (error) 846 goto bad; 847 if (sio) { 848 do { 849 sbappend(sio, m); 850 KKASSERT(resid >= (size_t)m->m_len); 851 resid -= (size_t)m->m_len; 852 } while (resid > 0 && m); 853 } else { 854 do { 855 uio->uio_resid = resid; 856 error = uiomove(mtod(m, caddr_t), 857 (int)szmin(resid, m->m_len), 858 uio); 859 resid = uio->uio_resid; 860 m = m_free(m); 861 } while (uio->uio_resid && error == 0 && m); 862 } 863 bad: 864 if (m) 865 m_freem(m); 866 return (error); 867 } 868 if ((so->so_state & SS_ISCONFIRMING) && resid) 869 so_pru_rcvd(so, 0); 870 871 restart: 872 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 873 if (error) 874 goto done; 875 876 m = so->so_rcv.ssb_mb; 877 /* 878 * If we have less data than requested, block awaiting more 879 * (subject to any timeout) if: 880 * 1. the current count is less than the low water mark, or 881 * 2. MSG_WAITALL is set, and it is possible to do the entire 882 * receive operation at once if we block (resid <= hiwat). 883 * 3. MSG_DONTWAIT is not set 884 * If MSG_WAITALL is set but resid is larger than the receive buffer, 885 * we have to do the receive in sections, and thus risk returning 886 * a short count if a timeout or signal occurs after we start. 887 */ 888 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 889 (size_t)so->so_rcv.ssb_cc < resid) && 890 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 891 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) && 892 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 893 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 894 if (so->so_error) { 895 if (m) 896 goto dontblock; 897 error = so->so_error; 898 if ((flags & MSG_PEEK) == 0) 899 so->so_error = 0; 900 goto release; 901 } 902 if (so->so_state & SS_CANTRCVMORE) { 903 if (m) 904 goto dontblock; 905 else 906 goto release; 907 } 908 for (; m; m = m->m_next) { 909 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 910 m = so->so_rcv.ssb_mb; 911 goto dontblock; 912 } 913 } 914 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 915 (pr->pr_flags & PR_CONNREQUIRED)) { 916 error = ENOTCONN; 917 goto release; 918 } 919 if (resid == 0) 920 goto release; 921 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 922 error = EWOULDBLOCK; 923 goto release; 924 } 925 ssb_unlock(&so->so_rcv); 926 error = ssb_wait(&so->so_rcv); 927 if (error) 928 goto done; 929 goto restart; 930 } 931 dontblock: 932 if (uio && uio->uio_td && uio->uio_td->td_proc) 933 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 934 935 /* 936 * note: m should be == sb_mb here. Cache the next record while 937 * cleaning up. Note that calling m_free*() will break out critical 938 * section. 939 */ 940 KKASSERT(m == so->so_rcv.ssb_mb); 941 942 /* 943 * Skip any address mbufs prepending the record. 944 */ 945 if (pr->pr_flags & PR_ADDR) { 946 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 947 orig_resid = 0; 948 if (psa) 949 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 950 if (flags & MSG_PEEK) 951 m = m->m_next; 952 else 953 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 954 } 955 956 /* 957 * Skip any control mbufs prepending the record. 958 */ 959 #ifdef SCTP 960 if (pr->pr_flags & PR_ADDR_OPT) { 961 /* 962 * For SCTP we may be getting a 963 * whole message OR a partial delivery. 964 */ 965 if (m && m->m_type == MT_SONAME) { 966 orig_resid = 0; 967 if (psa) 968 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 969 if (flags & MSG_PEEK) 970 m = m->m_next; 971 else 972 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 973 } 974 } 975 #endif /* SCTP */ 976 while (m && m->m_type == MT_CONTROL && error == 0) { 977 if (flags & MSG_PEEK) { 978 if (controlp) 979 *controlp = m_copy(m, 0, m->m_len); 980 m = m->m_next; /* XXX race */ 981 } else { 982 if (controlp) { 983 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 984 if (pr->pr_domain->dom_externalize && 985 mtod(m, struct cmsghdr *)->cmsg_type == 986 SCM_RIGHTS) 987 error = (*pr->pr_domain->dom_externalize)(m); 988 *controlp = m; 989 m = n; 990 } else { 991 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 992 } 993 } 994 if (controlp && *controlp) { 995 orig_resid = 0; 996 controlp = &(*controlp)->m_next; 997 } 998 } 999 1000 /* 1001 * flag OOB data. 1002 */ 1003 if (m) { 1004 type = m->m_type; 1005 if (type == MT_OOBDATA) 1006 flags |= MSG_OOB; 1007 } 1008 1009 /* 1010 * Copy to the UIO or mbuf return chain (*mp). 1011 */ 1012 moff = 0; 1013 offset = 0; 1014 while (m && resid > 0 && error == 0) { 1015 if (m->m_type == MT_OOBDATA) { 1016 if (type != MT_OOBDATA) 1017 break; 1018 } else if (type == MT_OOBDATA) 1019 break; 1020 else 1021 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1022 ("receive 3")); 1023 soclrstate(so, SS_RCVATMARK); 1024 len = (resid > INT_MAX) ? INT_MAX : resid; 1025 if (so->so_oobmark && len > so->so_oobmark - offset) 1026 len = so->so_oobmark - offset; 1027 if (len > m->m_len - moff) 1028 len = m->m_len - moff; 1029 1030 /* 1031 * Copy out to the UIO or pass the mbufs back to the SIO. 1032 * The SIO is dealt with when we eat the mbuf, but deal 1033 * with the resid here either way. 1034 */ 1035 if (uio) { 1036 uio->uio_resid = resid; 1037 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1038 resid = uio->uio_resid; 1039 if (error) 1040 goto release; 1041 } else { 1042 resid -= (size_t)len; 1043 } 1044 1045 /* 1046 * Eat the entire mbuf or just a piece of it 1047 */ 1048 if (len == m->m_len - moff) { 1049 if (m->m_flags & M_EOR) 1050 flags |= MSG_EOR; 1051 #ifdef SCTP 1052 if (m->m_flags & M_NOTIFICATION) 1053 flags |= MSG_NOTIFICATION; 1054 #endif /* SCTP */ 1055 if (flags & MSG_PEEK) { 1056 m = m->m_next; 1057 moff = 0; 1058 } else { 1059 if (sio) { 1060 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1061 sbappend(sio, m); 1062 m = n; 1063 } else { 1064 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1065 } 1066 } 1067 } else { 1068 if (flags & MSG_PEEK) { 1069 moff += len; 1070 } else { 1071 if (sio) { 1072 n = m_copym(m, 0, len, MB_WAIT); 1073 if (n) 1074 sbappend(sio, n); 1075 } 1076 m->m_data += len; 1077 m->m_len -= len; 1078 so->so_rcv.ssb_cc -= len; 1079 } 1080 } 1081 if (so->so_oobmark) { 1082 if ((flags & MSG_PEEK) == 0) { 1083 so->so_oobmark -= len; 1084 if (so->so_oobmark == 0) { 1085 sosetstate(so, SS_RCVATMARK); 1086 break; 1087 } 1088 } else { 1089 offset += len; 1090 if (offset == so->so_oobmark) 1091 break; 1092 } 1093 } 1094 if (flags & MSG_EOR) 1095 break; 1096 /* 1097 * If the MSG_WAITALL flag is set (for non-atomic socket), 1098 * we must not quit until resid == 0 or an error 1099 * termination. If a signal/timeout occurs, return 1100 * with a short count but without error. 1101 * Keep signalsockbuf locked against other readers. 1102 */ 1103 while ((flags & MSG_WAITALL) && m == NULL && 1104 resid > 0 && !sosendallatonce(so) && 1105 so->so_rcv.ssb_mb == NULL) { 1106 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1107 break; 1108 /* 1109 * The window might have closed to zero, make 1110 * sure we send an ack now that we've drained 1111 * the buffer or we might end up blocking until 1112 * the idle takes over (5 seconds). 1113 */ 1114 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1115 so_pru_rcvd(so, flags); 1116 error = ssb_wait(&so->so_rcv); 1117 if (error) { 1118 ssb_unlock(&so->so_rcv); 1119 error = 0; 1120 goto done; 1121 } 1122 m = so->so_rcv.ssb_mb; 1123 } 1124 } 1125 1126 /* 1127 * If an atomic read was requested but unread data still remains 1128 * in the record, set MSG_TRUNC. 1129 */ 1130 if (m && pr->pr_flags & PR_ATOMIC) 1131 flags |= MSG_TRUNC; 1132 1133 /* 1134 * Cleanup. If an atomic read was requested drop any unread data. 1135 */ 1136 if ((flags & MSG_PEEK) == 0) { 1137 if (m && (pr->pr_flags & PR_ATOMIC)) 1138 sbdroprecord(&so->so_rcv.sb); 1139 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1140 so_pru_rcvd(so, flags); 1141 } 1142 1143 if (orig_resid == resid && orig_resid && 1144 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1145 ssb_unlock(&so->so_rcv); 1146 goto restart; 1147 } 1148 1149 if (flagsp) 1150 *flagsp |= flags; 1151 release: 1152 ssb_unlock(&so->so_rcv); 1153 done: 1154 if (free_chain) 1155 m_freem(free_chain); 1156 return (error); 1157 } 1158 1159 int 1160 soshutdown(struct socket *so, int how) 1161 { 1162 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1163 return (EINVAL); 1164 1165 if (how != SHUT_WR) 1166 sorflush(so); 1167 if (how != SHUT_RD) 1168 return (so_pru_shutdown(so)); 1169 return (0); 1170 } 1171 1172 void 1173 sorflush(struct socket *so) 1174 { 1175 struct signalsockbuf *ssb = &so->so_rcv; 1176 struct protosw *pr = so->so_proto; 1177 struct signalsockbuf asb; 1178 1179 atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); 1180 1181 ssb_lock(ssb, M_WAITOK); 1182 socantrcvmore(so); 1183 asb = *ssb; 1184 1185 /* 1186 * Can't just blow up the ssb structure here 1187 */ 1188 ssb->ssb_timeo = 0; 1189 ssb->ssb_unused01 = 0; 1190 ssb->ssb_lowat = 0; 1191 ssb->ssb_hiwat = 0; 1192 ssb->ssb_mbmax = 0; 1193 atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); 1194 1195 ssb_unlock(ssb); 1196 1197 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1198 (*pr->pr_domain->dom_dispose)(asb.ssb_mb); 1199 ssb_release(&asb, so); 1200 } 1201 1202 #ifdef INET 1203 static int 1204 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) 1205 { 1206 struct accept_filter_arg *afap = NULL; 1207 struct accept_filter *afp; 1208 struct so_accf *af = so->so_accf; 1209 int error = 0; 1210 1211 /* do not set/remove accept filters on non listen sockets */ 1212 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1213 error = EINVAL; 1214 goto out; 1215 } 1216 1217 /* removing the filter */ 1218 if (sopt == NULL) { 1219 if (af != NULL) { 1220 if (af->so_accept_filter != NULL && 1221 af->so_accept_filter->accf_destroy != NULL) { 1222 af->so_accept_filter->accf_destroy(so); 1223 } 1224 if (af->so_accept_filter_str != NULL) { 1225 FREE(af->so_accept_filter_str, M_ACCF); 1226 } 1227 FREE(af, M_ACCF); 1228 so->so_accf = NULL; 1229 } 1230 so->so_options &= ~SO_ACCEPTFILTER; 1231 return (0); 1232 } 1233 /* adding a filter */ 1234 /* must remove previous filter first */ 1235 if (af != NULL) { 1236 error = EINVAL; 1237 goto out; 1238 } 1239 /* don't put large objects on the kernel stack */ 1240 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1241 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1242 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1243 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1244 if (error) 1245 goto out; 1246 afp = accept_filt_get(afap->af_name); 1247 if (afp == NULL) { 1248 error = ENOENT; 1249 goto out; 1250 } 1251 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1252 if (afp->accf_create != NULL) { 1253 if (afap->af_name[0] != '\0') { 1254 int len = strlen(afap->af_name) + 1; 1255 1256 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1257 strcpy(af->so_accept_filter_str, afap->af_name); 1258 } 1259 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1260 if (af->so_accept_filter_arg == NULL) { 1261 FREE(af->so_accept_filter_str, M_ACCF); 1262 FREE(af, M_ACCF); 1263 so->so_accf = NULL; 1264 error = EINVAL; 1265 goto out; 1266 } 1267 } 1268 af->so_accept_filter = afp; 1269 so->so_accf = af; 1270 so->so_options |= SO_ACCEPTFILTER; 1271 out: 1272 if (afap != NULL) 1273 FREE(afap, M_TEMP); 1274 return (error); 1275 } 1276 #endif /* INET */ 1277 1278 /* 1279 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1280 * an additional variant to handle the case where the option value needs 1281 * to be some kind of integer, but not a specific size. 1282 * In addition to their use here, these functions are also called by the 1283 * protocol-level pr_ctloutput() routines. 1284 */ 1285 int 1286 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1287 { 1288 return soopt_to_kbuf(sopt, buf, len, minlen); 1289 } 1290 1291 int 1292 soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1293 { 1294 size_t valsize; 1295 1296 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1297 KKASSERT(kva_p(buf)); 1298 1299 /* 1300 * If the user gives us more than we wanted, we ignore it, 1301 * but if we don't get the minimum length the caller 1302 * wants, we return EINVAL. On success, sopt->sopt_valsize 1303 * is set to however much we actually retrieved. 1304 */ 1305 if ((valsize = sopt->sopt_valsize) < minlen) 1306 return EINVAL; 1307 if (valsize > len) 1308 sopt->sopt_valsize = valsize = len; 1309 1310 bcopy(sopt->sopt_val, buf, valsize); 1311 return 0; 1312 } 1313 1314 1315 int 1316 sosetopt(struct socket *so, struct sockopt *sopt) 1317 { 1318 int error, optval; 1319 struct linger l; 1320 struct timeval tv; 1321 u_long val; 1322 struct signalsockbuf *sotmp; 1323 1324 error = 0; 1325 sopt->sopt_dir = SOPT_SET; 1326 if (sopt->sopt_level != SOL_SOCKET) { 1327 if (so->so_proto && so->so_proto->pr_ctloutput) { 1328 return (so_pru_ctloutput(so, sopt)); 1329 } 1330 error = ENOPROTOOPT; 1331 } else { 1332 switch (sopt->sopt_name) { 1333 #ifdef INET 1334 case SO_ACCEPTFILTER: 1335 error = do_setopt_accept_filter(so, sopt); 1336 if (error) 1337 goto bad; 1338 break; 1339 #endif /* INET */ 1340 case SO_LINGER: 1341 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1342 if (error) 1343 goto bad; 1344 1345 so->so_linger = l.l_linger; 1346 if (l.l_onoff) 1347 so->so_options |= SO_LINGER; 1348 else 1349 so->so_options &= ~SO_LINGER; 1350 break; 1351 1352 case SO_DEBUG: 1353 case SO_KEEPALIVE: 1354 case SO_DONTROUTE: 1355 case SO_USELOOPBACK: 1356 case SO_BROADCAST: 1357 case SO_REUSEADDR: 1358 case SO_REUSEPORT: 1359 case SO_OOBINLINE: 1360 case SO_TIMESTAMP: 1361 error = sooptcopyin(sopt, &optval, sizeof optval, 1362 sizeof optval); 1363 if (error) 1364 goto bad; 1365 if (optval) 1366 so->so_options |= sopt->sopt_name; 1367 else 1368 so->so_options &= ~sopt->sopt_name; 1369 break; 1370 1371 case SO_SNDBUF: 1372 case SO_RCVBUF: 1373 case SO_SNDLOWAT: 1374 case SO_RCVLOWAT: 1375 error = sooptcopyin(sopt, &optval, sizeof optval, 1376 sizeof optval); 1377 if (error) 1378 goto bad; 1379 1380 /* 1381 * Values < 1 make no sense for any of these 1382 * options, so disallow them. 1383 */ 1384 if (optval < 1) { 1385 error = EINVAL; 1386 goto bad; 1387 } 1388 1389 switch (sopt->sopt_name) { 1390 case SO_SNDBUF: 1391 case SO_RCVBUF: 1392 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? 1393 &so->so_snd : &so->so_rcv, (u_long)optval, 1394 so, 1395 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { 1396 error = ENOBUFS; 1397 goto bad; 1398 } 1399 sotmp = (sopt->sopt_name == SO_SNDBUF) ? 1400 &so->so_snd : &so->so_rcv; 1401 atomic_clear_int(&sotmp->ssb_flags, 1402 SSB_AUTOSIZE); 1403 break; 1404 1405 /* 1406 * Make sure the low-water is never greater than 1407 * the high-water. 1408 */ 1409 case SO_SNDLOWAT: 1410 so->so_snd.ssb_lowat = 1411 (optval > so->so_snd.ssb_hiwat) ? 1412 so->so_snd.ssb_hiwat : optval; 1413 atomic_clear_int(&so->so_snd.ssb_flags, 1414 SSB_AUTOLOWAT); 1415 break; 1416 case SO_RCVLOWAT: 1417 so->so_rcv.ssb_lowat = 1418 (optval > so->so_rcv.ssb_hiwat) ? 1419 so->so_rcv.ssb_hiwat : optval; 1420 atomic_clear_int(&so->so_rcv.ssb_flags, 1421 SSB_AUTOLOWAT); 1422 break; 1423 } 1424 break; 1425 1426 case SO_SNDTIMEO: 1427 case SO_RCVTIMEO: 1428 error = sooptcopyin(sopt, &tv, sizeof tv, 1429 sizeof tv); 1430 if (error) 1431 goto bad; 1432 1433 /* assert(hz > 0); */ 1434 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1435 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1436 error = EDOM; 1437 goto bad; 1438 } 1439 /* assert(tick > 0); */ 1440 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1441 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; 1442 if (val > SHRT_MAX) { 1443 error = EDOM; 1444 goto bad; 1445 } 1446 if (val == 0 && tv.tv_usec != 0) 1447 val = 1; 1448 1449 switch (sopt->sopt_name) { 1450 case SO_SNDTIMEO: 1451 so->so_snd.ssb_timeo = val; 1452 break; 1453 case SO_RCVTIMEO: 1454 so->so_rcv.ssb_timeo = val; 1455 break; 1456 } 1457 break; 1458 default: 1459 error = ENOPROTOOPT; 1460 break; 1461 } 1462 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1463 (void) so_pru_ctloutput(so, sopt); 1464 } 1465 } 1466 bad: 1467 return (error); 1468 } 1469 1470 /* Helper routine for getsockopt */ 1471 int 1472 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 1473 { 1474 soopt_from_kbuf(sopt, buf, len); 1475 return 0; 1476 } 1477 1478 void 1479 soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) 1480 { 1481 size_t valsize; 1482 1483 if (len == 0) { 1484 sopt->sopt_valsize = 0; 1485 return; 1486 } 1487 1488 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1489 KKASSERT(kva_p(buf)); 1490 1491 /* 1492 * Documented get behavior is that we always return a value, 1493 * possibly truncated to fit in the user's buffer. 1494 * Traditional behavior is that we always tell the user 1495 * precisely how much we copied, rather than something useful 1496 * like the total amount we had available for her. 1497 * Note that this interface is not idempotent; the entire answer must 1498 * generated ahead of time. 1499 */ 1500 valsize = szmin(len, sopt->sopt_valsize); 1501 sopt->sopt_valsize = valsize; 1502 if (sopt->sopt_val != 0) { 1503 bcopy(buf, sopt->sopt_val, valsize); 1504 } 1505 } 1506 1507 int 1508 sogetopt(struct socket *so, struct sockopt *sopt) 1509 { 1510 int error, optval; 1511 struct linger l; 1512 struct timeval tv; 1513 #ifdef INET 1514 struct accept_filter_arg *afap; 1515 #endif 1516 1517 error = 0; 1518 sopt->sopt_dir = SOPT_GET; 1519 if (sopt->sopt_level != SOL_SOCKET) { 1520 if (so->so_proto && so->so_proto->pr_ctloutput) { 1521 return (so_pru_ctloutput(so, sopt)); 1522 } else 1523 return (ENOPROTOOPT); 1524 } else { 1525 switch (sopt->sopt_name) { 1526 #ifdef INET 1527 case SO_ACCEPTFILTER: 1528 if ((so->so_options & SO_ACCEPTCONN) == 0) 1529 return (EINVAL); 1530 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1531 M_TEMP, M_WAITOK | M_ZERO); 1532 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1533 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1534 if (so->so_accf->so_accept_filter_str != NULL) 1535 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1536 } 1537 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1538 FREE(afap, M_TEMP); 1539 break; 1540 #endif /* INET */ 1541 1542 case SO_LINGER: 1543 l.l_onoff = so->so_options & SO_LINGER; 1544 l.l_linger = so->so_linger; 1545 error = sooptcopyout(sopt, &l, sizeof l); 1546 break; 1547 1548 case SO_USELOOPBACK: 1549 case SO_DONTROUTE: 1550 case SO_DEBUG: 1551 case SO_KEEPALIVE: 1552 case SO_REUSEADDR: 1553 case SO_REUSEPORT: 1554 case SO_BROADCAST: 1555 case SO_OOBINLINE: 1556 case SO_TIMESTAMP: 1557 optval = so->so_options & sopt->sopt_name; 1558 integer: 1559 error = sooptcopyout(sopt, &optval, sizeof optval); 1560 break; 1561 1562 case SO_TYPE: 1563 optval = so->so_type; 1564 goto integer; 1565 1566 case SO_ERROR: 1567 optval = so->so_error; 1568 so->so_error = 0; 1569 goto integer; 1570 1571 case SO_SNDBUF: 1572 optval = so->so_snd.ssb_hiwat; 1573 goto integer; 1574 1575 case SO_RCVBUF: 1576 optval = so->so_rcv.ssb_hiwat; 1577 goto integer; 1578 1579 case SO_SNDLOWAT: 1580 optval = so->so_snd.ssb_lowat; 1581 goto integer; 1582 1583 case SO_RCVLOWAT: 1584 optval = so->so_rcv.ssb_lowat; 1585 goto integer; 1586 1587 case SO_SNDTIMEO: 1588 case SO_RCVTIMEO: 1589 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1590 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); 1591 1592 tv.tv_sec = optval / hz; 1593 tv.tv_usec = (optval % hz) * ustick; 1594 error = sooptcopyout(sopt, &tv, sizeof tv); 1595 break; 1596 1597 default: 1598 error = ENOPROTOOPT; 1599 break; 1600 } 1601 return (error); 1602 } 1603 } 1604 1605 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1606 int 1607 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1608 { 1609 struct mbuf *m, *m_prev; 1610 int sopt_size = sopt->sopt_valsize, msize; 1611 1612 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA, 1613 0, &msize); 1614 if (m == NULL) 1615 return (ENOBUFS); 1616 m->m_len = min(msize, sopt_size); 1617 sopt_size -= m->m_len; 1618 *mp = m; 1619 m_prev = m; 1620 1621 while (sopt_size > 0) { 1622 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, 1623 MT_DATA, 0, &msize); 1624 if (m == NULL) { 1625 m_freem(*mp); 1626 return (ENOBUFS); 1627 } 1628 m->m_len = min(msize, sopt_size); 1629 sopt_size -= m->m_len; 1630 m_prev->m_next = m; 1631 m_prev = m; 1632 } 1633 return (0); 1634 } 1635 1636 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1637 int 1638 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1639 { 1640 soopt_to_mbuf(sopt, m); 1641 return 0; 1642 } 1643 1644 void 1645 soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) 1646 { 1647 size_t valsize; 1648 void *val; 1649 1650 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1651 KKASSERT(kva_p(m)); 1652 if (sopt->sopt_val == NULL) 1653 return; 1654 val = sopt->sopt_val; 1655 valsize = sopt->sopt_valsize; 1656 while (m != NULL && valsize >= m->m_len) { 1657 bcopy(val, mtod(m, char *), m->m_len); 1658 valsize -= m->m_len; 1659 val = (caddr_t)val + m->m_len; 1660 m = m->m_next; 1661 } 1662 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1663 panic("ip6_sooptmcopyin"); 1664 } 1665 1666 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1667 int 1668 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1669 { 1670 return soopt_from_mbuf(sopt, m); 1671 } 1672 1673 int 1674 soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) 1675 { 1676 struct mbuf *m0 = m; 1677 size_t valsize = 0; 1678 size_t maxsize; 1679 void *val; 1680 1681 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1682 KKASSERT(kva_p(m)); 1683 if (sopt->sopt_val == NULL) 1684 return 0; 1685 val = sopt->sopt_val; 1686 maxsize = sopt->sopt_valsize; 1687 while (m != NULL && maxsize >= m->m_len) { 1688 bcopy(mtod(m, char *), val, m->m_len); 1689 maxsize -= m->m_len; 1690 val = (caddr_t)val + m->m_len; 1691 valsize += m->m_len; 1692 m = m->m_next; 1693 } 1694 if (m != NULL) { 1695 /* enough soopt buffer should be given from user-land */ 1696 m_freem(m0); 1697 return (EINVAL); 1698 } 1699 sopt->sopt_valsize = valsize; 1700 return 0; 1701 } 1702 1703 void 1704 sohasoutofband(struct socket *so) 1705 { 1706 if (so->so_sigio != NULL) 1707 pgsigio(so->so_sigio, SIGURG, 0); 1708 KNOTE(&so->so_rcv.ssb_kq.ki_note, NOTE_OOB); 1709 } 1710 1711 int 1712 sokqfilter(struct file *fp, struct knote *kn) 1713 { 1714 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1715 struct signalsockbuf *ssb; 1716 1717 switch (kn->kn_filter) { 1718 case EVFILT_READ: 1719 if (so->so_options & SO_ACCEPTCONN) 1720 kn->kn_fop = &solisten_filtops; 1721 else 1722 kn->kn_fop = &soread_filtops; 1723 ssb = &so->so_rcv; 1724 break; 1725 case EVFILT_WRITE: 1726 kn->kn_fop = &sowrite_filtops; 1727 ssb = &so->so_snd; 1728 break; 1729 case EVFILT_EXCEPT: 1730 kn->kn_fop = &soexcept_filtops; 1731 ssb = &so->so_rcv; 1732 break; 1733 default: 1734 return (EOPNOTSUPP); 1735 } 1736 1737 knote_insert(&ssb->ssb_kq.ki_note, kn); 1738 atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); 1739 return (0); 1740 } 1741 1742 static void 1743 filt_sordetach(struct knote *kn) 1744 { 1745 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1746 1747 knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); 1748 if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) 1749 atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); 1750 } 1751 1752 /*ARGSUSED*/ 1753 static int 1754 filt_soread(struct knote *kn, long hint) 1755 { 1756 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1757 1758 if (kn->kn_sfflags & NOTE_OOB) { 1759 if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { 1760 kn->kn_fflags |= NOTE_OOB; 1761 return (1); 1762 } 1763 return (0); 1764 } 1765 kn->kn_data = so->so_rcv.ssb_cc; 1766 1767 /* 1768 * Only set EOF if all data has been exhausted. 1769 */ 1770 if ((so->so_state & SS_CANTRCVMORE) && kn->kn_data == 0) { 1771 kn->kn_flags |= EV_EOF; 1772 kn->kn_fflags = so->so_error; 1773 return (1); 1774 } 1775 if (so->so_error) /* temporary udp error */ 1776 return (1); 1777 if (kn->kn_sfflags & NOTE_LOWAT) 1778 return (kn->kn_data >= kn->kn_sdata); 1779 return ((kn->kn_data >= so->so_rcv.ssb_lowat) || 1780 !TAILQ_EMPTY(&so->so_comp)); 1781 } 1782 1783 static void 1784 filt_sowdetach(struct knote *kn) 1785 { 1786 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1787 1788 knote_remove(&so->so_snd.ssb_kq.ki_note, kn); 1789 if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) 1790 atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); 1791 } 1792 1793 /*ARGSUSED*/ 1794 static int 1795 filt_sowrite(struct knote *kn, long hint) 1796 { 1797 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1798 1799 kn->kn_data = ssb_space(&so->so_snd); 1800 if (so->so_state & SS_CANTSENDMORE) { 1801 kn->kn_flags |= EV_EOF; 1802 kn->kn_fflags = so->so_error; 1803 return (1); 1804 } 1805 if (so->so_error) /* temporary udp error */ 1806 return (1); 1807 if (((so->so_state & SS_ISCONNECTED) == 0) && 1808 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1809 return (0); 1810 if (kn->kn_sfflags & NOTE_LOWAT) 1811 return (kn->kn_data >= kn->kn_sdata); 1812 return (kn->kn_data >= so->so_snd.ssb_lowat); 1813 } 1814 1815 /*ARGSUSED*/ 1816 static int 1817 filt_solisten(struct knote *kn, long hint) 1818 { 1819 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1820 1821 kn->kn_data = so->so_qlen; 1822 return (! TAILQ_EMPTY(&so->so_comp)); 1823 } 1824