1 /* $OpenBSD: uipc_socket.c,v 1.72 2008/08/07 17:43:37 reyk Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/domain.h> 42 #include <sys/kernel.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/signalvar.h> 48 #include <sys/resourcevar.h> 49 #include <sys/pool.h> 50 51 void filt_sordetach(struct knote *kn); 52 int filt_soread(struct knote *kn, long hint); 53 void filt_sowdetach(struct knote *kn); 54 int filt_sowrite(struct knote *kn, long hint); 55 int filt_solisten(struct knote *kn, long hint); 56 57 struct filterops solisten_filtops = 58 { 1, NULL, filt_sordetach, filt_solisten }; 59 struct filterops soread_filtops = 60 { 1, NULL, filt_sordetach, filt_soread }; 61 struct filterops sowrite_filtops = 62 { 1, NULL, filt_sowdetach, filt_sowrite }; 63 64 65 #ifndef SOMINCONN 66 #define SOMINCONN 80 67 #endif /* SOMINCONN */ 68 69 int somaxconn = SOMAXCONN; 70 int sominconn = SOMINCONN; 71 72 struct pool socket_pool; 73 74 void 75 soinit(void) 76 { 77 78 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 79 } 80 81 /* 82 * Socket operation routines. 83 * These routines are called by the routines in 84 * sys_socket.c or from a system process, and 85 * implement the semantics of socket operations by 86 * switching out to the protocol specific routines. 87 */ 88 /*ARGSUSED*/ 89 int 90 socreate(int dom, struct socket **aso, int type, int proto) 91 { 92 struct proc *p = curproc; /* XXX */ 93 struct protosw *prp; 94 struct socket *so; 95 int error, s; 96 97 if (proto) 98 prp = pffindproto(dom, proto, type); 99 else 100 prp = pffindtype(dom, type); 101 if (prp == NULL || prp->pr_usrreq == 0) 102 return (EPROTONOSUPPORT); 103 if (prp->pr_type != type) 104 return (EPROTOTYPE); 105 s = splsoftnet(); 106 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 107 TAILQ_INIT(&so->so_q0); 108 TAILQ_INIT(&so->so_q); 109 so->so_type = type; 110 if (p->p_ucred->cr_uid == 0) 111 so->so_state = SS_PRIV; 112 so->so_ruid = p->p_cred->p_ruid; 113 so->so_euid = p->p_ucred->cr_uid; 114 so->so_rgid = p->p_cred->p_rgid; 115 so->so_egid = p->p_ucred->cr_gid; 116 so->so_cpid = p->p_pid; 117 so->so_proto = prp; 118 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 119 (struct mbuf *)(long)proto, NULL, p); 120 if (error) { 121 so->so_state |= SS_NOFDREF; 122 sofree(so); 123 splx(s); 124 return (error); 125 } 126 #ifdef COMPAT_SUNOS 127 { 128 extern struct emul emul_sunos; 129 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 130 so->so_options |= SO_BROADCAST; 131 } 132 #endif 133 splx(s); 134 *aso = so; 135 return (0); 136 } 137 138 int 139 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 140 { 141 int s = splsoftnet(); 142 int error; 143 144 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 145 splx(s); 146 return (error); 147 } 148 149 int 150 solisten(struct socket *so, int backlog) 151 { 152 int s = splsoftnet(), error; 153 154 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 155 curproc); 156 if (error) { 157 splx(s); 158 return (error); 159 } 160 if (TAILQ_FIRST(&so->so_q) == NULL) 161 so->so_options |= SO_ACCEPTCONN; 162 if (backlog < 0 || backlog > somaxconn) 163 backlog = somaxconn; 164 if (backlog < sominconn) 165 backlog = sominconn; 166 so->so_qlimit = backlog; 167 splx(s); 168 return (0); 169 } 170 171 /* 172 * Must be called at splsoftnet() 173 */ 174 175 void 176 sofree(struct socket *so) 177 { 178 splassert(IPL_SOFTNET); 179 180 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 181 return; 182 if (so->so_head) { 183 /* 184 * We must not decommission a socket that's on the accept(2) 185 * queue. If we do, then accept(2) may hang after select(2) 186 * indicated that the listening socket was ready. 187 */ 188 if (!soqremque(so, 0)) 189 return; 190 } 191 sbrelease(&so->so_snd); 192 sorflush(so); 193 pool_put(&socket_pool, so); 194 } 195 196 /* 197 * Close a socket on last file table reference removal. 198 * Initiate disconnect if connected. 199 * Free socket when disconnect complete. 200 */ 201 int 202 soclose(struct socket *so) 203 { 204 struct socket *so2; 205 int s = splsoftnet(); /* conservative */ 206 int error = 0; 207 208 if (so->so_options & SO_ACCEPTCONN) { 209 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 210 (void) soqremque(so2, 0); 211 (void) soabort(so2); 212 } 213 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 214 (void) soqremque(so2, 1); 215 (void) soabort(so2); 216 } 217 } 218 if (so->so_pcb == 0) 219 goto discard; 220 if (so->so_state & SS_ISCONNECTED) { 221 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 222 error = sodisconnect(so); 223 if (error) 224 goto drop; 225 } 226 if (so->so_options & SO_LINGER) { 227 if ((so->so_state & SS_ISDISCONNECTING) && 228 (so->so_state & SS_NBIO)) 229 goto drop; 230 while (so->so_state & SS_ISCONNECTED) { 231 error = tsleep(&so->so_timeo, 232 PSOCK | PCATCH, netcls, 233 so->so_linger * hz); 234 if (error) 235 break; 236 } 237 } 238 } 239 drop: 240 if (so->so_pcb) { 241 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 242 NULL, NULL, curproc); 243 if (error == 0) 244 error = error2; 245 } 246 discard: 247 if (so->so_state & SS_NOFDREF) 248 panic("soclose: NOFDREF"); 249 so->so_state |= SS_NOFDREF; 250 sofree(so); 251 splx(s); 252 return (error); 253 } 254 255 /* 256 * Must be called at splsoftnet. 257 */ 258 int 259 soabort(struct socket *so) 260 { 261 splassert(IPL_SOFTNET); 262 263 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 264 curproc); 265 } 266 267 int 268 soaccept(struct socket *so, struct mbuf *nam) 269 { 270 int s = splsoftnet(); 271 int error = 0; 272 273 if ((so->so_state & SS_NOFDREF) == 0) 274 panic("soaccept: !NOFDREF"); 275 so->so_state &= ~SS_NOFDREF; 276 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 277 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 278 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 279 nam, NULL, curproc); 280 else 281 error = ECONNABORTED; 282 splx(s); 283 return (error); 284 } 285 286 int 287 soconnect(struct socket *so, struct mbuf *nam) 288 { 289 int s; 290 int error; 291 292 if (so->so_options & SO_ACCEPTCONN) 293 return (EOPNOTSUPP); 294 s = splsoftnet(); 295 /* 296 * If protocol is connection-based, can only connect once. 297 * Otherwise, if connected, try to disconnect first. 298 * This allows user to disconnect by connecting to, e.g., 299 * a null address. 300 */ 301 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 302 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 303 (error = sodisconnect(so)))) 304 error = EISCONN; 305 else 306 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 307 NULL, nam, NULL, curproc); 308 splx(s); 309 return (error); 310 } 311 312 int 313 soconnect2(struct socket *so1, struct socket *so2) 314 { 315 int s = splsoftnet(); 316 int error; 317 318 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 319 (struct mbuf *)so2, NULL, curproc); 320 splx(s); 321 return (error); 322 } 323 324 int 325 sodisconnect(struct socket *so) 326 { 327 int s = splsoftnet(); 328 int error; 329 330 if ((so->so_state & SS_ISCONNECTED) == 0) { 331 error = ENOTCONN; 332 goto bad; 333 } 334 if (so->so_state & SS_ISDISCONNECTING) { 335 error = EALREADY; 336 goto bad; 337 } 338 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 339 NULL, curproc); 340 bad: 341 splx(s); 342 return (error); 343 } 344 345 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 346 /* 347 * Send on a socket. 348 * If send must go all at once and message is larger than 349 * send buffering, then hard error. 350 * Lock against other senders. 351 * If must go all at once and not enough room now, then 352 * inform user that this would block and do nothing. 353 * Otherwise, if nonblocking, send as much as possible. 354 * The data to be sent is described by "uio" if nonzero, 355 * otherwise by the mbuf chain "top" (which must be null 356 * if uio is not). Data provided in mbuf chain must be small 357 * enough to send all at once. 358 * 359 * Returns nonzero on error, timeout or signal; callers 360 * must check for short counts if EINTR/ERESTART are returned. 361 * Data and control buffers are freed on return. 362 */ 363 int 364 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 365 struct mbuf *control, int flags) 366 { 367 struct mbuf **mp; 368 struct mbuf *m; 369 long space, len, mlen, clen = 0; 370 quad_t resid; 371 int error, s, dontroute; 372 int atomic = sosendallatonce(so) || top; 373 374 if (uio) 375 resid = uio->uio_resid; 376 else 377 resid = top->m_pkthdr.len; 378 /* 379 * In theory resid should be unsigned (since uio->uio_resid is). 380 * However, space must be signed, as it might be less than 0 381 * if we over-committed, and we must use a signed comparison 382 * of space and resid. On the other hand, a negative resid 383 * causes us to loop sending 0-length segments to the protocol. 384 * MSG_EOR on a SOCK_STREAM socket is also invalid. 385 */ 386 if (resid < 0 || 387 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 388 error = EINVAL; 389 goto out; 390 } 391 dontroute = 392 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 393 (so->so_proto->pr_flags & PR_ATOMIC); 394 if (uio && uio->uio_procp) 395 uio->uio_procp->p_stats->p_ru.ru_msgsnd++; 396 if (control) 397 clen = control->m_len; 398 #define snderr(errno) { error = errno; splx(s); goto release; } 399 400 restart: 401 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 402 goto out; 403 so->so_state |= SS_ISSENDING; 404 do { 405 s = splsoftnet(); 406 if (so->so_state & SS_CANTSENDMORE) 407 snderr(EPIPE); 408 if (so->so_error) { 409 error = so->so_error; 410 so->so_error = 0; 411 splx(s); 412 goto release; 413 } 414 if ((so->so_state & SS_ISCONNECTED) == 0) { 415 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 416 if ((so->so_state & SS_ISCONFIRMING) == 0 && 417 !(resid == 0 && clen != 0)) 418 snderr(ENOTCONN); 419 } else if (addr == 0) 420 snderr(EDESTADDRREQ); 421 } 422 space = sbspace(&so->so_snd); 423 if (flags & MSG_OOB) 424 space += 1024; 425 if ((atomic && resid > so->so_snd.sb_hiwat) || 426 clen > so->so_snd.sb_hiwat) 427 snderr(EMSGSIZE); 428 if (space < resid + clen && 429 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 430 if (so->so_state & SS_NBIO) 431 snderr(EWOULDBLOCK); 432 sbunlock(&so->so_snd); 433 error = sbwait(&so->so_snd); 434 so->so_state &= ~SS_ISSENDING; 435 splx(s); 436 if (error) 437 goto out; 438 goto restart; 439 } 440 splx(s); 441 mp = ⊤ 442 space -= clen; 443 do { 444 if (uio == NULL) { 445 /* 446 * Data is prepackaged in "top". 447 */ 448 resid = 0; 449 if (flags & MSG_EOR) 450 top->m_flags |= M_EOR; 451 } else do { 452 if (top == 0) { 453 MGETHDR(m, M_WAIT, MT_DATA); 454 mlen = MHLEN; 455 m->m_pkthdr.len = 0; 456 m->m_pkthdr.rcvif = (struct ifnet *)0; 457 } else { 458 MGET(m, M_WAIT, MT_DATA); 459 mlen = MLEN; 460 } 461 if (resid >= MINCLSIZE && space >= MCLBYTES) { 462 MCLGET(m, M_NOWAIT); 463 if ((m->m_flags & M_EXT) == 0) 464 goto nopages; 465 mlen = MCLBYTES; 466 if (atomic && top == 0) { 467 len = lmin(MCLBYTES - max_hdr, resid); 468 m->m_data += max_hdr; 469 } else 470 len = lmin(MCLBYTES, resid); 471 space -= len; 472 } else { 473 nopages: 474 len = lmin(lmin(mlen, resid), space); 475 space -= len; 476 /* 477 * For datagram protocols, leave room 478 * for protocol headers in first mbuf. 479 */ 480 if (atomic && top == 0 && len < mlen) 481 MH_ALIGN(m, len); 482 } 483 error = uiomove(mtod(m, caddr_t), (int)len, 484 uio); 485 resid = uio->uio_resid; 486 m->m_len = len; 487 *mp = m; 488 top->m_pkthdr.len += len; 489 if (error) 490 goto release; 491 mp = &m->m_next; 492 if (resid <= 0) { 493 if (flags & MSG_EOR) 494 top->m_flags |= M_EOR; 495 break; 496 } 497 } while (space > 0 && atomic); 498 if (dontroute) 499 so->so_options |= SO_DONTROUTE; 500 s = splsoftnet(); /* XXX */ 501 if (resid <= 0) 502 so->so_state &= ~SS_ISSENDING; 503 error = (*so->so_proto->pr_usrreq)(so, 504 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 505 top, addr, control, curproc); 506 splx(s); 507 if (dontroute) 508 so->so_options &= ~SO_DONTROUTE; 509 clen = 0; 510 control = 0; 511 top = 0; 512 mp = ⊤ 513 if (error) 514 goto release; 515 } while (resid && space > 0); 516 } while (resid); 517 518 release: 519 so->so_state &= ~SS_ISSENDING; 520 sbunlock(&so->so_snd); 521 out: 522 if (top) 523 m_freem(top); 524 if (control) 525 m_freem(control); 526 return (error); 527 } 528 529 /* 530 * Implement receive operations on a socket. 531 * We depend on the way that records are added to the sockbuf 532 * by sbappend*. In particular, each record (mbufs linked through m_next) 533 * must begin with an address if the protocol so specifies, 534 * followed by an optional mbuf or mbufs containing ancillary data, 535 * and then zero or more mbufs of data. 536 * In order to avoid blocking network interrupts for the entire time here, 537 * we splx() while doing the actual copy to user space. 538 * Although the sockbuf is locked, new data may still be appended, 539 * and thus we must maintain consistency of the sockbuf during that time. 540 * 541 * The caller may receive the data as a single mbuf chain by supplying 542 * an mbuf **mp0 for use in returning the chain. The uio is then used 543 * only for the count in uio_resid. 544 */ 545 int 546 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 547 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 548 { 549 struct mbuf *m, **mp; 550 int flags, len, error, s, offset; 551 struct protosw *pr = so->so_proto; 552 struct mbuf *nextrecord; 553 int moff, type = 0; 554 size_t orig_resid = uio->uio_resid; 555 int uio_error = 0; 556 int resid; 557 558 mp = mp0; 559 if (paddr) 560 *paddr = 0; 561 if (controlp) 562 *controlp = 0; 563 if (flagsp) 564 flags = *flagsp &~ MSG_EOR; 565 else 566 flags = 0; 567 if (so->so_state & SS_NBIO) 568 flags |= MSG_DONTWAIT; 569 if (flags & MSG_OOB) { 570 m = m_get(M_WAIT, MT_DATA); 571 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 572 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 573 if (error) 574 goto bad; 575 do { 576 error = uiomove(mtod(m, caddr_t), 577 (int) min(uio->uio_resid, m->m_len), uio); 578 m = m_free(m); 579 } while (uio->uio_resid && error == 0 && m); 580 bad: 581 if (m) 582 m_freem(m); 583 return (error); 584 } 585 if (mp) 586 *mp = NULL; 587 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 588 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, curproc); 589 590 restart: 591 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 592 return (error); 593 s = splsoftnet(); 594 595 m = so->so_rcv.sb_mb; 596 /* 597 * If we have less data than requested, block awaiting more 598 * (subject to any timeout) if: 599 * 1. the current count is less than the low water mark, 600 * 2. MSG_WAITALL is set, and it is possible to do the entire 601 * receive operation at once if we block (resid <= hiwat), or 602 * 3. MSG_DONTWAIT is not set. 603 * If MSG_WAITALL is set but resid is larger than the receive buffer, 604 * we have to do the receive in sections, and thus risk returning 605 * a short count if a timeout or signal occurs after we start. 606 */ 607 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 608 so->so_rcv.sb_cc < uio->uio_resid) && 609 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 610 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 611 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 612 #ifdef DIAGNOSTIC 613 if (m == NULL && so->so_rcv.sb_cc) 614 panic("receive 1"); 615 #endif 616 if (so->so_error) { 617 if (m) 618 goto dontblock; 619 error = so->so_error; 620 if ((flags & MSG_PEEK) == 0) 621 so->so_error = 0; 622 goto release; 623 } 624 if (so->so_state & SS_CANTRCVMORE) { 625 if (m) 626 goto dontblock; 627 else 628 goto release; 629 } 630 for (; m; m = m->m_next) 631 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 632 m = so->so_rcv.sb_mb; 633 goto dontblock; 634 } 635 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 636 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 637 error = ENOTCONN; 638 goto release; 639 } 640 if (uio->uio_resid == 0 && controlp == NULL) 641 goto release; 642 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 643 error = EWOULDBLOCK; 644 goto release; 645 } 646 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 647 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 648 sbunlock(&so->so_rcv); 649 error = sbwait(&so->so_rcv); 650 splx(s); 651 if (error) 652 return (error); 653 goto restart; 654 } 655 dontblock: 656 /* 657 * On entry here, m points to the first record of the socket buffer. 658 * While we process the initial mbufs containing address and control 659 * info, we save a copy of m->m_nextpkt into nextrecord. 660 */ 661 if (uio->uio_procp) 662 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 663 KASSERT(m == so->so_rcv.sb_mb); 664 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 665 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 666 nextrecord = m->m_nextpkt; 667 if (pr->pr_flags & PR_ADDR) { 668 #ifdef DIAGNOSTIC 669 if (m->m_type != MT_SONAME) 670 panic("receive 1a"); 671 #endif 672 orig_resid = 0; 673 if (flags & MSG_PEEK) { 674 if (paddr) 675 *paddr = m_copy(m, 0, m->m_len); 676 m = m->m_next; 677 } else { 678 sbfree(&so->so_rcv, m); 679 if (paddr) { 680 *paddr = m; 681 so->so_rcv.sb_mb = m->m_next; 682 m->m_next = 0; 683 m = so->so_rcv.sb_mb; 684 } else { 685 MFREE(m, so->so_rcv.sb_mb); 686 m = so->so_rcv.sb_mb; 687 } 688 } 689 } 690 while (m && m->m_type == MT_CONTROL && error == 0) { 691 if (flags & MSG_PEEK) { 692 if (controlp) 693 *controlp = m_copy(m, 0, m->m_len); 694 m = m->m_next; 695 } else { 696 sbfree(&so->so_rcv, m); 697 if (controlp) { 698 if (pr->pr_domain->dom_externalize && 699 mtod(m, struct cmsghdr *)->cmsg_type == 700 SCM_RIGHTS) 701 error = (*pr->pr_domain->dom_externalize)(m); 702 *controlp = m; 703 so->so_rcv.sb_mb = m->m_next; 704 m->m_next = 0; 705 m = so->so_rcv.sb_mb; 706 } else { 707 /* 708 * Dispose of any SCM_RIGHTS message that went 709 * through the read path rather than recv. 710 */ 711 if (pr->pr_domain->dom_dispose && 712 mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 713 pr->pr_domain->dom_dispose(m); 714 MFREE(m, so->so_rcv.sb_mb); 715 m = so->so_rcv.sb_mb; 716 } 717 } 718 if (controlp) { 719 orig_resid = 0; 720 controlp = &(*controlp)->m_next; 721 } 722 } 723 724 /* 725 * If m is non-NULL, we have some data to read. From now on, 726 * make sure to keep sb_lastrecord consistent when working on 727 * the last packet on the chain (nextrecord == NULL) and we 728 * change m->m_nextpkt. 729 */ 730 if (m) { 731 if ((flags & MSG_PEEK) == 0) { 732 m->m_nextpkt = nextrecord; 733 /* 734 * If nextrecord == NULL (this is a single chain), 735 * then sb_lastrecord may not be valid here if m 736 * was changed earlier. 737 */ 738 if (nextrecord == NULL) { 739 KASSERT(so->so_rcv.sb_mb == m); 740 so->so_rcv.sb_lastrecord = m; 741 } 742 } 743 type = m->m_type; 744 if (type == MT_OOBDATA) 745 flags |= MSG_OOB; 746 if (m->m_flags & M_BCAST) 747 flags |= MSG_BCAST; 748 if (m->m_flags & M_MCAST) 749 flags |= MSG_MCAST; 750 } else { 751 if ((flags & MSG_PEEK) == 0) { 752 KASSERT(so->so_rcv.sb_mb == m); 753 so->so_rcv.sb_mb = nextrecord; 754 SB_EMPTY_FIXUP(&so->so_rcv); 755 } 756 } 757 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 758 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 759 760 moff = 0; 761 offset = 0; 762 while (m && uio->uio_resid > 0 && error == 0) { 763 if (m->m_type == MT_OOBDATA) { 764 if (type != MT_OOBDATA) 765 break; 766 } else if (type == MT_OOBDATA) 767 break; 768 #ifdef DIAGNOSTIC 769 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 770 panic("receive 3"); 771 #endif 772 so->so_state &= ~SS_RCVATMARK; 773 len = uio->uio_resid; 774 if (so->so_oobmark && len > so->so_oobmark - offset) 775 len = so->so_oobmark - offset; 776 if (len > m->m_len - moff) 777 len = m->m_len - moff; 778 /* 779 * If mp is set, just pass back the mbufs. 780 * Otherwise copy them out via the uio, then free. 781 * Sockbuf must be consistent here (points to current mbuf, 782 * it points to next record) when we drop priority; 783 * we must note any additions to the sockbuf when we 784 * block interrupts again. 785 */ 786 if (mp == NULL && uio_error == 0) { 787 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 788 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 789 resid = uio->uio_resid; 790 splx(s); 791 uio_error = 792 uiomove(mtod(m, caddr_t) + moff, (int)len, 793 uio); 794 s = splsoftnet(); 795 if (uio_error) 796 uio->uio_resid = resid - len; 797 } else 798 uio->uio_resid -= len; 799 if (len == m->m_len - moff) { 800 if (m->m_flags & M_EOR) 801 flags |= MSG_EOR; 802 if (flags & MSG_PEEK) { 803 m = m->m_next; 804 moff = 0; 805 } else { 806 nextrecord = m->m_nextpkt; 807 sbfree(&so->so_rcv, m); 808 if (mp) { 809 *mp = m; 810 mp = &m->m_next; 811 so->so_rcv.sb_mb = m = m->m_next; 812 *mp = NULL; 813 } else { 814 MFREE(m, so->so_rcv.sb_mb); 815 m = so->so_rcv.sb_mb; 816 } 817 /* 818 * If m != NULL, we also know that 819 * so->so_rcv.sb_mb != NULL. 820 */ 821 KASSERT(so->so_rcv.sb_mb == m); 822 if (m) { 823 m->m_nextpkt = nextrecord; 824 if (nextrecord == NULL) 825 so->so_rcv.sb_lastrecord = m; 826 } else { 827 so->so_rcv.sb_mb = nextrecord; 828 SB_EMPTY_FIXUP(&so->so_rcv); 829 } 830 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 831 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 832 } 833 } else { 834 if (flags & MSG_PEEK) 835 moff += len; 836 else { 837 if (mp) 838 *mp = m_copym(m, 0, len, M_WAIT); 839 m->m_data += len; 840 m->m_len -= len; 841 so->so_rcv.sb_cc -= len; 842 so->so_rcv.sb_datacc -= len; 843 } 844 } 845 if (so->so_oobmark) { 846 if ((flags & MSG_PEEK) == 0) { 847 so->so_oobmark -= len; 848 if (so->so_oobmark == 0) { 849 so->so_state |= SS_RCVATMARK; 850 break; 851 } 852 } else { 853 offset += len; 854 if (offset == so->so_oobmark) 855 break; 856 } 857 } 858 if (flags & MSG_EOR) 859 break; 860 /* 861 * If the MSG_WAITALL flag is set (for non-atomic socket), 862 * we must not quit until "uio->uio_resid == 0" or an error 863 * termination. If a signal/timeout occurs, return 864 * with a short count but without error. 865 * Keep sockbuf locked against other readers. 866 */ 867 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 868 !sosendallatonce(so) && !nextrecord) { 869 if (so->so_error || so->so_state & SS_CANTRCVMORE) 870 break; 871 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 872 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 873 error = sbwait(&so->so_rcv); 874 if (error) { 875 sbunlock(&so->so_rcv); 876 splx(s); 877 return (0); 878 } 879 if ((m = so->so_rcv.sb_mb) != NULL) 880 nextrecord = m->m_nextpkt; 881 } 882 } 883 884 if (m && pr->pr_flags & PR_ATOMIC) { 885 flags |= MSG_TRUNC; 886 if ((flags & MSG_PEEK) == 0) 887 (void) sbdroprecord(&so->so_rcv); 888 } 889 if ((flags & MSG_PEEK) == 0) { 890 if (m == NULL) { 891 /* 892 * First part is an inline SB_EMPTY_FIXUP(). Second 893 * part makes sure sb_lastrecord is up-to-date if 894 * there is still data in the socket buffer. 895 */ 896 so->so_rcv.sb_mb = nextrecord; 897 if (so->so_rcv.sb_mb == NULL) { 898 so->so_rcv.sb_mbtail = NULL; 899 so->so_rcv.sb_lastrecord = NULL; 900 } else if (nextrecord->m_nextpkt == NULL) 901 so->so_rcv.sb_lastrecord = nextrecord; 902 } 903 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 904 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 905 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 906 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 907 (struct mbuf *)(long)flags, NULL, curproc); 908 } 909 if (orig_resid == uio->uio_resid && orig_resid && 910 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 911 sbunlock(&so->so_rcv); 912 splx(s); 913 goto restart; 914 } 915 916 if (uio_error) 917 error = uio_error; 918 919 if (flagsp) 920 *flagsp |= flags; 921 release: 922 sbunlock(&so->so_rcv); 923 splx(s); 924 return (error); 925 } 926 927 int 928 soshutdown(struct socket *so, int how) 929 { 930 struct protosw *pr = so->so_proto; 931 932 switch (how) { 933 case SHUT_RD: 934 case SHUT_RDWR: 935 sorflush(so); 936 if (how == SHUT_RD) 937 return (0); 938 /* FALLTHROUGH */ 939 case SHUT_WR: 940 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 941 curproc); 942 default: 943 return (EINVAL); 944 } 945 } 946 947 void 948 sorflush(struct socket *so) 949 { 950 struct sockbuf *sb = &so->so_rcv; 951 struct protosw *pr = so->so_proto; 952 int s; 953 struct sockbuf asb; 954 955 sb->sb_flags |= SB_NOINTR; 956 (void) sblock(sb, M_WAITOK); 957 s = splnet(); 958 socantrcvmore(so); 959 sbunlock(sb); 960 asb = *sb; 961 bzero(sb, sizeof (*sb)); 962 /* XXX - the bzero stumps all over so_rcv */ 963 if (asb.sb_flags & SB_KNOTE) { 964 sb->sb_sel.si_note = asb.sb_sel.si_note; 965 sb->sb_flags = SB_KNOTE; 966 } 967 splx(s); 968 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 969 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 970 sbrelease(&asb); 971 } 972 973 int 974 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 975 { 976 int error = 0; 977 struct mbuf *m = m0; 978 979 if (level != SOL_SOCKET) { 980 if (so->so_proto && so->so_proto->pr_ctloutput) 981 return ((*so->so_proto->pr_ctloutput) 982 (PRCO_SETOPT, so, level, optname, &m0)); 983 error = ENOPROTOOPT; 984 } else { 985 switch (optname) { 986 case SO_BINDANY: 987 if ((error = suser(curproc, 0)) != 0) /* XXX */ 988 goto bad; 989 break; 990 } 991 992 switch (optname) { 993 994 case SO_LINGER: 995 if (m == NULL || m->m_len != sizeof (struct linger) || 996 mtod(m, struct linger *)->l_linger < 0 || 997 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 998 error = EINVAL; 999 goto bad; 1000 } 1001 so->so_linger = mtod(m, struct linger *)->l_linger; 1002 /* FALLTHROUGH */ 1003 1004 case SO_BINDANY: 1005 case SO_DEBUG: 1006 case SO_KEEPALIVE: 1007 case SO_DONTROUTE: 1008 case SO_USELOOPBACK: 1009 case SO_BROADCAST: 1010 case SO_REUSEADDR: 1011 case SO_REUSEPORT: 1012 case SO_OOBINLINE: 1013 case SO_JUMBO: 1014 case SO_TIMESTAMP: 1015 if (m == NULL || m->m_len < sizeof (int)) { 1016 error = EINVAL; 1017 goto bad; 1018 } 1019 if (*mtod(m, int *)) 1020 so->so_options |= optname; 1021 else 1022 so->so_options &= ~optname; 1023 break; 1024 1025 case SO_SNDBUF: 1026 case SO_RCVBUF: 1027 case SO_SNDLOWAT: 1028 case SO_RCVLOWAT: 1029 { 1030 u_long cnt; 1031 1032 if (m == NULL || m->m_len < sizeof (int)) { 1033 error = EINVAL; 1034 goto bad; 1035 } 1036 cnt = *mtod(m, int *); 1037 if ((long)cnt <= 0) 1038 cnt = 1; 1039 switch (optname) { 1040 1041 case SO_SNDBUF: 1042 if (sbcheckreserve(cnt, so->so_snd.sb_hiwat) || 1043 sbreserve(&so->so_snd, cnt) == 0) { 1044 error = ENOBUFS; 1045 goto bad; 1046 } 1047 break; 1048 1049 case SO_RCVBUF: 1050 if (sbcheckreserve(cnt, so->so_rcv.sb_hiwat) || 1051 sbreserve(&so->so_rcv, cnt) == 0) { 1052 error = ENOBUFS; 1053 goto bad; 1054 } 1055 break; 1056 1057 case SO_SNDLOWAT: 1058 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 1059 so->so_snd.sb_hiwat : cnt; 1060 break; 1061 case SO_RCVLOWAT: 1062 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 1063 so->so_rcv.sb_hiwat : cnt; 1064 break; 1065 } 1066 break; 1067 } 1068 1069 case SO_SNDTIMEO: 1070 case SO_RCVTIMEO: 1071 { 1072 struct timeval *tv; 1073 short val; 1074 1075 if (m == NULL || m->m_len < sizeof (*tv)) { 1076 error = EINVAL; 1077 goto bad; 1078 } 1079 tv = mtod(m, struct timeval *); 1080 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { 1081 error = EDOM; 1082 goto bad; 1083 } 1084 val = tv->tv_sec * hz + tv->tv_usec / tick; 1085 if (val == 0 && tv->tv_usec != 0) 1086 val = 1; 1087 1088 switch (optname) { 1089 1090 case SO_SNDTIMEO: 1091 so->so_snd.sb_timeo = val; 1092 break; 1093 case SO_RCVTIMEO: 1094 so->so_rcv.sb_timeo = val; 1095 break; 1096 } 1097 break; 1098 } 1099 1100 default: 1101 error = ENOPROTOOPT; 1102 break; 1103 } 1104 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1105 (void) ((*so->so_proto->pr_ctloutput) 1106 (PRCO_SETOPT, so, level, optname, &m0)); 1107 m = NULL; /* freed by protocol */ 1108 } 1109 } 1110 bad: 1111 if (m) 1112 (void) m_free(m); 1113 return (error); 1114 } 1115 1116 int 1117 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1118 { 1119 struct mbuf *m; 1120 1121 if (level != SOL_SOCKET) { 1122 if (so->so_proto && so->so_proto->pr_ctloutput) { 1123 return ((*so->so_proto->pr_ctloutput) 1124 (PRCO_GETOPT, so, level, optname, mp)); 1125 } else 1126 return (ENOPROTOOPT); 1127 } else { 1128 m = m_get(M_WAIT, MT_SOOPTS); 1129 m->m_len = sizeof (int); 1130 1131 switch (optname) { 1132 1133 case SO_LINGER: 1134 m->m_len = sizeof (struct linger); 1135 mtod(m, struct linger *)->l_onoff = 1136 so->so_options & SO_LINGER; 1137 mtod(m, struct linger *)->l_linger = so->so_linger; 1138 break; 1139 1140 case SO_BINDANY: 1141 case SO_USELOOPBACK: 1142 case SO_DONTROUTE: 1143 case SO_DEBUG: 1144 case SO_KEEPALIVE: 1145 case SO_REUSEADDR: 1146 case SO_REUSEPORT: 1147 case SO_BROADCAST: 1148 case SO_OOBINLINE: 1149 case SO_JUMBO: 1150 case SO_TIMESTAMP: 1151 *mtod(m, int *) = so->so_options & optname; 1152 break; 1153 1154 case SO_TYPE: 1155 *mtod(m, int *) = so->so_type; 1156 break; 1157 1158 case SO_ERROR: 1159 *mtod(m, int *) = so->so_error; 1160 so->so_error = 0; 1161 break; 1162 1163 case SO_SNDBUF: 1164 *mtod(m, int *) = so->so_snd.sb_hiwat; 1165 break; 1166 1167 case SO_RCVBUF: 1168 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1169 break; 1170 1171 case SO_SNDLOWAT: 1172 *mtod(m, int *) = so->so_snd.sb_lowat; 1173 break; 1174 1175 case SO_RCVLOWAT: 1176 *mtod(m, int *) = so->so_rcv.sb_lowat; 1177 break; 1178 1179 case SO_SNDTIMEO: 1180 case SO_RCVTIMEO: 1181 { 1182 int val = (optname == SO_SNDTIMEO ? 1183 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1184 1185 m->m_len = sizeof(struct timeval); 1186 mtod(m, struct timeval *)->tv_sec = val / hz; 1187 mtod(m, struct timeval *)->tv_usec = 1188 (val % hz) * tick; 1189 break; 1190 } 1191 1192 default: 1193 (void)m_free(m); 1194 return (ENOPROTOOPT); 1195 } 1196 *mp = m; 1197 return (0); 1198 } 1199 } 1200 1201 void 1202 sohasoutofband(struct socket *so) 1203 { 1204 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1205 selwakeup(&so->so_rcv.sb_sel); 1206 } 1207 1208 int 1209 soo_kqfilter(struct file *fp, struct knote *kn) 1210 { 1211 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1212 struct sockbuf *sb; 1213 int s; 1214 1215 switch (kn->kn_filter) { 1216 case EVFILT_READ: 1217 if (so->so_options & SO_ACCEPTCONN) 1218 kn->kn_fop = &solisten_filtops; 1219 else 1220 kn->kn_fop = &soread_filtops; 1221 sb = &so->so_rcv; 1222 break; 1223 case EVFILT_WRITE: 1224 kn->kn_fop = &sowrite_filtops; 1225 sb = &so->so_snd; 1226 break; 1227 default: 1228 return (1); 1229 } 1230 1231 s = splnet(); 1232 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1233 sb->sb_flags |= SB_KNOTE; 1234 splx(s); 1235 return (0); 1236 } 1237 1238 void 1239 filt_sordetach(struct knote *kn) 1240 { 1241 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1242 int s = splnet(); 1243 1244 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1245 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1246 so->so_rcv.sb_flags &= ~SB_KNOTE; 1247 splx(s); 1248 } 1249 1250 /*ARGSUSED*/ 1251 int 1252 filt_soread(struct knote *kn, long hint) 1253 { 1254 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1255 1256 kn->kn_data = so->so_rcv.sb_cc; 1257 if (so->so_state & SS_CANTRCVMORE) { 1258 kn->kn_flags |= EV_EOF; 1259 kn->kn_fflags = so->so_error; 1260 return (1); 1261 } 1262 if (so->so_error) /* temporary udp error */ 1263 return (1); 1264 if (kn->kn_sfflags & NOTE_LOWAT) 1265 return (kn->kn_data >= kn->kn_sdata); 1266 return (kn->kn_data >= so->so_rcv.sb_lowat); 1267 } 1268 1269 void 1270 filt_sowdetach(struct knote *kn) 1271 { 1272 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1273 int s = splnet(); 1274 1275 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1276 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1277 so->so_snd.sb_flags &= ~SB_KNOTE; 1278 splx(s); 1279 } 1280 1281 /*ARGSUSED*/ 1282 int 1283 filt_sowrite(struct knote *kn, long hint) 1284 { 1285 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1286 1287 kn->kn_data = sbspace(&so->so_snd); 1288 if (so->so_state & SS_CANTSENDMORE) { 1289 kn->kn_flags |= EV_EOF; 1290 kn->kn_fflags = so->so_error; 1291 return (1); 1292 } 1293 if (so->so_error) /* temporary udp error */ 1294 return (1); 1295 if (((so->so_state & SS_ISCONNECTED) == 0) && 1296 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1297 return (0); 1298 if (kn->kn_sfflags & NOTE_LOWAT) 1299 return (kn->kn_data >= kn->kn_sdata); 1300 return (kn->kn_data >= so->so_snd.sb_lowat); 1301 } 1302 1303 /*ARGSUSED*/ 1304 int 1305 filt_solisten(struct knote *kn, long hint) 1306 { 1307 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1308 1309 kn->kn_data = so->so_qlen; 1310 return (so->so_qlen != 0); 1311 } 1312