1 /* $OpenBSD: uipc_socket.c,v 1.79 2009/10/31 12:00:08 fgsch Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/domain.h> 42 #include <sys/kernel.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/signalvar.h> 48 #include <sys/resourcevar.h> 49 #include <net/route.h> 50 #include <sys/pool.h> 51 52 void filt_sordetach(struct knote *kn); 53 int filt_soread(struct knote *kn, long hint); 54 void filt_sowdetach(struct knote *kn); 55 int filt_sowrite(struct knote *kn, long hint); 56 int filt_solisten(struct knote *kn, long hint); 57 58 struct filterops solisten_filtops = 59 { 1, NULL, filt_sordetach, filt_solisten }; 60 struct filterops soread_filtops = 61 { 1, NULL, filt_sordetach, filt_soread }; 62 struct filterops sowrite_filtops = 63 { 1, NULL, filt_sowdetach, filt_sowrite }; 64 65 66 #ifndef SOMINCONN 67 #define SOMINCONN 80 68 #endif /* SOMINCONN */ 69 70 int somaxconn = SOMAXCONN; 71 int sominconn = SOMINCONN; 72 73 struct pool socket_pool; 74 75 void 76 soinit(void) 77 { 78 79 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 80 } 81 82 /* 83 * Socket operation routines. 84 * These routines are called by the routines in 85 * sys_socket.c or from a system process, and 86 * implement the semantics of socket operations by 87 * switching out to the protocol specific routines. 88 */ 89 /*ARGSUSED*/ 90 int 91 socreate(int dom, struct socket **aso, int type, int proto) 92 { 93 struct proc *p = curproc; /* XXX */ 94 struct protosw *prp; 95 struct socket *so; 96 int error, s; 97 98 if (proto) 99 prp = pffindproto(dom, proto, type); 100 else 101 prp = pffindtype(dom, type); 102 if (prp == NULL || prp->pr_usrreq == 0) 103 return (EPROTONOSUPPORT); 104 if (prp->pr_type != type) 105 return (EPROTOTYPE); 106 s = splsoftnet(); 107 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 108 TAILQ_INIT(&so->so_q0); 109 TAILQ_INIT(&so->so_q); 110 so->so_type = type; 111 if (suser(p, 0) == 0) 112 so->so_state = SS_PRIV; 113 so->so_ruid = p->p_cred->p_ruid; 114 so->so_euid = p->p_ucred->cr_uid; 115 so->so_rgid = p->p_cred->p_rgid; 116 so->so_egid = p->p_ucred->cr_gid; 117 so->so_cpid = p->p_pid; 118 so->so_proto = prp; 119 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 120 (struct mbuf *)(long)proto, NULL, p); 121 if (error) { 122 so->so_state |= SS_NOFDREF; 123 sofree(so); 124 splx(s); 125 return (error); 126 } 127 #ifdef COMPAT_SUNOS 128 { 129 extern struct emul emul_sunos; 130 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 131 so->so_options |= SO_BROADCAST; 132 } 133 #endif 134 splx(s); 135 *aso = so; 136 return (0); 137 } 138 139 int 140 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 141 { 142 int s = splsoftnet(); 143 int error; 144 145 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 146 splx(s); 147 return (error); 148 } 149 150 int 151 solisten(struct socket *so, int backlog) 152 { 153 int s = splsoftnet(), error; 154 155 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 156 curproc); 157 if (error) { 158 splx(s); 159 return (error); 160 } 161 if (TAILQ_FIRST(&so->so_q) == NULL) 162 so->so_options |= SO_ACCEPTCONN; 163 if (backlog < 0 || backlog > somaxconn) 164 backlog = somaxconn; 165 if (backlog < sominconn) 166 backlog = sominconn; 167 so->so_qlimit = backlog; 168 splx(s); 169 return (0); 170 } 171 172 /* 173 * Must be called at splsoftnet() 174 */ 175 176 void 177 sofree(struct socket *so) 178 { 179 splsoftassert(IPL_SOFTNET); 180 181 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 182 return; 183 if (so->so_head) { 184 /* 185 * We must not decommission a socket that's on the accept(2) 186 * queue. If we do, then accept(2) may hang after select(2) 187 * indicated that the listening socket was ready. 188 */ 189 if (!soqremque(so, 0)) 190 return; 191 } 192 sbrelease(&so->so_snd); 193 sorflush(so); 194 pool_put(&socket_pool, so); 195 } 196 197 /* 198 * Close a socket on last file table reference removal. 199 * Initiate disconnect if connected. 200 * Free socket when disconnect complete. 201 */ 202 int 203 soclose(struct socket *so) 204 { 205 struct socket *so2; 206 int s = splsoftnet(); /* conservative */ 207 int error = 0; 208 209 if (so->so_options & SO_ACCEPTCONN) { 210 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 211 (void) soqremque(so2, 0); 212 (void) soabort(so2); 213 } 214 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 215 (void) soqremque(so2, 1); 216 (void) soabort(so2); 217 } 218 } 219 if (so->so_pcb == 0) 220 goto discard; 221 if (so->so_state & SS_ISCONNECTED) { 222 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 223 error = sodisconnect(so); 224 if (error) 225 goto drop; 226 } 227 if (so->so_options & SO_LINGER) { 228 if ((so->so_state & SS_ISDISCONNECTING) && 229 (so->so_state & SS_NBIO)) 230 goto drop; 231 while (so->so_state & SS_ISCONNECTED) { 232 error = tsleep(&so->so_timeo, 233 PSOCK | PCATCH, "netcls", 234 so->so_linger * hz); 235 if (error) 236 break; 237 } 238 } 239 } 240 drop: 241 if (so->so_pcb) { 242 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 243 NULL, NULL, curproc); 244 if (error == 0) 245 error = error2; 246 } 247 discard: 248 if (so->so_state & SS_NOFDREF) 249 panic("soclose: NOFDREF"); 250 so->so_state |= SS_NOFDREF; 251 sofree(so); 252 splx(s); 253 return (error); 254 } 255 256 /* 257 * Must be called at splsoftnet. 258 */ 259 int 260 soabort(struct socket *so) 261 { 262 splsoftassert(IPL_SOFTNET); 263 264 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 265 curproc); 266 } 267 268 int 269 soaccept(struct socket *so, struct mbuf *nam) 270 { 271 int s = splsoftnet(); 272 int error = 0; 273 274 if ((so->so_state & SS_NOFDREF) == 0) 275 panic("soaccept: !NOFDREF"); 276 so->so_state &= ~SS_NOFDREF; 277 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 278 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 279 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 280 nam, NULL, curproc); 281 else 282 error = ECONNABORTED; 283 splx(s); 284 return (error); 285 } 286 287 int 288 soconnect(struct socket *so, struct mbuf *nam) 289 { 290 int s; 291 int error; 292 293 if (so->so_options & SO_ACCEPTCONN) 294 return (EOPNOTSUPP); 295 s = splsoftnet(); 296 /* 297 * If protocol is connection-based, can only connect once. 298 * Otherwise, if connected, try to disconnect first. 299 * This allows user to disconnect by connecting to, e.g., 300 * a null address. 301 */ 302 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 303 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 304 (error = sodisconnect(so)))) 305 error = EISCONN; 306 else 307 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 308 NULL, nam, NULL, curproc); 309 splx(s); 310 return (error); 311 } 312 313 int 314 soconnect2(struct socket *so1, struct socket *so2) 315 { 316 int s = splsoftnet(); 317 int error; 318 319 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 320 (struct mbuf *)so2, NULL, curproc); 321 splx(s); 322 return (error); 323 } 324 325 int 326 sodisconnect(struct socket *so) 327 { 328 int s = splsoftnet(); 329 int error; 330 331 if ((so->so_state & SS_ISCONNECTED) == 0) { 332 error = ENOTCONN; 333 goto bad; 334 } 335 if (so->so_state & SS_ISDISCONNECTING) { 336 error = EALREADY; 337 goto bad; 338 } 339 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 340 NULL, curproc); 341 bad: 342 splx(s); 343 return (error); 344 } 345 346 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 347 /* 348 * Send on a socket. 349 * If send must go all at once and message is larger than 350 * send buffering, then hard error. 351 * Lock against other senders. 352 * If must go all at once and not enough room now, then 353 * inform user that this would block and do nothing. 354 * Otherwise, if nonblocking, send as much as possible. 355 * The data to be sent is described by "uio" if nonzero, 356 * otherwise by the mbuf chain "top" (which must be null 357 * if uio is not). Data provided in mbuf chain must be small 358 * enough to send all at once. 359 * 360 * Returns nonzero on error, timeout or signal; callers 361 * must check for short counts if EINTR/ERESTART are returned. 362 * Data and control buffers are freed on return. 363 */ 364 int 365 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 366 struct mbuf *control, int flags) 367 { 368 struct mbuf **mp; 369 struct mbuf *m; 370 long space, len, mlen, clen = 0; 371 quad_t resid; 372 int error, s, dontroute; 373 int atomic = sosendallatonce(so) || top; 374 375 if (uio) 376 resid = uio->uio_resid; 377 else 378 resid = top->m_pkthdr.len; 379 /* 380 * In theory resid should be unsigned (since uio->uio_resid is). 381 * However, space must be signed, as it might be less than 0 382 * if we over-committed, and we must use a signed comparison 383 * of space and resid. On the other hand, a negative resid 384 * causes us to loop sending 0-length segments to the protocol. 385 * MSG_EOR on a SOCK_STREAM socket is also invalid. 386 */ 387 if (resid < 0 || 388 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 389 error = EINVAL; 390 goto out; 391 } 392 dontroute = 393 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 394 (so->so_proto->pr_flags & PR_ATOMIC); 395 if (uio && uio->uio_procp) 396 uio->uio_procp->p_stats->p_ru.ru_msgsnd++; 397 if (control) 398 clen = control->m_len; 399 #define snderr(errno) { error = errno; splx(s); goto release; } 400 401 restart: 402 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 403 goto out; 404 so->so_state |= SS_ISSENDING; 405 do { 406 s = splsoftnet(); 407 if (so->so_state & SS_CANTSENDMORE) 408 snderr(EPIPE); 409 if (so->so_error) { 410 error = so->so_error; 411 so->so_error = 0; 412 splx(s); 413 goto release; 414 } 415 if ((so->so_state & SS_ISCONNECTED) == 0) { 416 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 417 if ((so->so_state & SS_ISCONFIRMING) == 0 && 418 !(resid == 0 && clen != 0)) 419 snderr(ENOTCONN); 420 } else if (addr == 0) 421 snderr(EDESTADDRREQ); 422 } 423 space = sbspace(&so->so_snd); 424 if (flags & MSG_OOB) 425 space += 1024; 426 if ((atomic && resid > so->so_snd.sb_hiwat) || 427 clen > so->so_snd.sb_hiwat) 428 snderr(EMSGSIZE); 429 if (space < resid + clen && 430 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 431 if (so->so_state & SS_NBIO) 432 snderr(EWOULDBLOCK); 433 sbunlock(&so->so_snd); 434 error = sbwait(&so->so_snd); 435 so->so_state &= ~SS_ISSENDING; 436 splx(s); 437 if (error) 438 goto out; 439 goto restart; 440 } 441 splx(s); 442 mp = ⊤ 443 space -= clen; 444 do { 445 if (uio == NULL) { 446 /* 447 * Data is prepackaged in "top". 448 */ 449 resid = 0; 450 if (flags & MSG_EOR) 451 top->m_flags |= M_EOR; 452 } else do { 453 if (top == 0) { 454 MGETHDR(m, M_WAIT, MT_DATA); 455 mlen = MHLEN; 456 m->m_pkthdr.len = 0; 457 m->m_pkthdr.rcvif = (struct ifnet *)0; 458 } else { 459 MGET(m, M_WAIT, MT_DATA); 460 mlen = MLEN; 461 } 462 if (resid >= MINCLSIZE && space >= MCLBYTES) { 463 MCLGET(m, M_NOWAIT); 464 if ((m->m_flags & M_EXT) == 0) 465 goto nopages; 466 mlen = MCLBYTES; 467 if (atomic && top == 0) { 468 len = lmin(MCLBYTES - max_hdr, resid); 469 m->m_data += max_hdr; 470 } else 471 len = lmin(MCLBYTES, resid); 472 space -= len; 473 } else { 474 nopages: 475 len = lmin(lmin(mlen, resid), space); 476 space -= len; 477 /* 478 * For datagram protocols, leave room 479 * for protocol headers in first mbuf. 480 */ 481 if (atomic && top == 0 && len < mlen) 482 MH_ALIGN(m, len); 483 } 484 error = uiomove(mtod(m, caddr_t), (int)len, 485 uio); 486 resid = uio->uio_resid; 487 m->m_len = len; 488 *mp = m; 489 top->m_pkthdr.len += len; 490 if (error) 491 goto release; 492 mp = &m->m_next; 493 if (resid <= 0) { 494 if (flags & MSG_EOR) 495 top->m_flags |= M_EOR; 496 break; 497 } 498 } while (space > 0 && atomic); 499 if (dontroute) 500 so->so_options |= SO_DONTROUTE; 501 s = splsoftnet(); /* XXX */ 502 if (resid <= 0) 503 so->so_state &= ~SS_ISSENDING; 504 error = (*so->so_proto->pr_usrreq)(so, 505 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 506 top, addr, control, curproc); 507 splx(s); 508 if (dontroute) 509 so->so_options &= ~SO_DONTROUTE; 510 clen = 0; 511 control = 0; 512 top = 0; 513 mp = ⊤ 514 if (error) 515 goto release; 516 } while (resid && space > 0); 517 } while (resid); 518 519 release: 520 so->so_state &= ~SS_ISSENDING; 521 sbunlock(&so->so_snd); 522 out: 523 if (top) 524 m_freem(top); 525 if (control) 526 m_freem(control); 527 return (error); 528 } 529 530 /* 531 * Implement receive operations on a socket. 532 * We depend on the way that records are added to the sockbuf 533 * by sbappend*. In particular, each record (mbufs linked through m_next) 534 * must begin with an address if the protocol so specifies, 535 * followed by an optional mbuf or mbufs containing ancillary data, 536 * and then zero or more mbufs of data. 537 * In order to avoid blocking network interrupts for the entire time here, 538 * we splx() while doing the actual copy to user space. 539 * Although the sockbuf is locked, new data may still be appended, 540 * and thus we must maintain consistency of the sockbuf during that time. 541 * 542 * The caller may receive the data as a single mbuf chain by supplying 543 * an mbuf **mp0 for use in returning the chain. The uio is then used 544 * only for the count in uio_resid. 545 */ 546 int 547 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 548 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 549 socklen_t controllen) 550 { 551 struct mbuf *m, **mp; 552 int flags, len, error, s, offset; 553 struct protosw *pr = so->so_proto; 554 struct mbuf *nextrecord; 555 int moff, type = 0; 556 size_t orig_resid = uio->uio_resid; 557 int uio_error = 0; 558 int resid; 559 560 mp = mp0; 561 if (paddr) 562 *paddr = 0; 563 if (controlp) 564 *controlp = 0; 565 if (flagsp) 566 flags = *flagsp &~ MSG_EOR; 567 else 568 flags = 0; 569 if (so->so_state & SS_NBIO) 570 flags |= MSG_DONTWAIT; 571 if (flags & MSG_OOB) { 572 m = m_get(M_WAIT, MT_DATA); 573 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 574 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 575 if (error) 576 goto bad; 577 do { 578 error = uiomove(mtod(m, caddr_t), 579 (int) min(uio->uio_resid, m->m_len), uio); 580 m = m_free(m); 581 } while (uio->uio_resid && error == 0 && m); 582 bad: 583 if (m) 584 m_freem(m); 585 return (error); 586 } 587 if (mp) 588 *mp = NULL; 589 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 590 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, curproc); 591 592 restart: 593 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 594 return (error); 595 s = splsoftnet(); 596 597 m = so->so_rcv.sb_mb; 598 /* 599 * If we have less data than requested, block awaiting more 600 * (subject to any timeout) if: 601 * 1. the current count is less than the low water mark, 602 * 2. MSG_WAITALL is set, and it is possible to do the entire 603 * receive operation at once if we block (resid <= hiwat), or 604 * 3. MSG_DONTWAIT is not set. 605 * If MSG_WAITALL is set but resid is larger than the receive buffer, 606 * we have to do the receive in sections, and thus risk returning 607 * a short count if a timeout or signal occurs after we start. 608 */ 609 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 610 so->so_rcv.sb_cc < uio->uio_resid) && 611 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 612 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 613 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 614 #ifdef DIAGNOSTIC 615 if (m == NULL && so->so_rcv.sb_cc) 616 panic("receive 1"); 617 #endif 618 if (so->so_error) { 619 if (m) 620 goto dontblock; 621 error = so->so_error; 622 if ((flags & MSG_PEEK) == 0) 623 so->so_error = 0; 624 goto release; 625 } 626 if (so->so_state & SS_CANTRCVMORE) { 627 if (m) 628 goto dontblock; 629 else 630 goto release; 631 } 632 for (; m; m = m->m_next) 633 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 634 m = so->so_rcv.sb_mb; 635 goto dontblock; 636 } 637 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 638 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 639 error = ENOTCONN; 640 goto release; 641 } 642 if (uio->uio_resid == 0 && controlp == NULL) 643 goto release; 644 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 645 error = EWOULDBLOCK; 646 goto release; 647 } 648 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 649 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 650 sbunlock(&so->so_rcv); 651 error = sbwait(&so->so_rcv); 652 splx(s); 653 if (error) 654 return (error); 655 goto restart; 656 } 657 dontblock: 658 /* 659 * On entry here, m points to the first record of the socket buffer. 660 * While we process the initial mbufs containing address and control 661 * info, we save a copy of m->m_nextpkt into nextrecord. 662 */ 663 if (uio->uio_procp) 664 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 665 KASSERT(m == so->so_rcv.sb_mb); 666 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 667 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 668 nextrecord = m->m_nextpkt; 669 if (pr->pr_flags & PR_ADDR) { 670 #ifdef DIAGNOSTIC 671 if (m->m_type != MT_SONAME) 672 panic("receive 1a"); 673 #endif 674 orig_resid = 0; 675 if (flags & MSG_PEEK) { 676 if (paddr) 677 *paddr = m_copy(m, 0, m->m_len); 678 m = m->m_next; 679 } else { 680 sbfree(&so->so_rcv, m); 681 if (paddr) { 682 *paddr = m; 683 so->so_rcv.sb_mb = m->m_next; 684 m->m_next = 0; 685 m = so->so_rcv.sb_mb; 686 } else { 687 MFREE(m, so->so_rcv.sb_mb); 688 m = so->so_rcv.sb_mb; 689 } 690 } 691 } 692 while (m && m->m_type == MT_CONTROL && error == 0) { 693 if (flags & MSG_PEEK) { 694 if (controlp) 695 *controlp = m_copy(m, 0, m->m_len); 696 m = m->m_next; 697 } else { 698 sbfree(&so->so_rcv, m); 699 if (controlp) { 700 if (pr->pr_domain->dom_externalize && 701 mtod(m, struct cmsghdr *)->cmsg_type == 702 SCM_RIGHTS) 703 error = (*pr->pr_domain->dom_externalize)(m, 704 controllen); 705 *controlp = m; 706 so->so_rcv.sb_mb = m->m_next; 707 m->m_next = 0; 708 m = so->so_rcv.sb_mb; 709 } else { 710 /* 711 * Dispose of any SCM_RIGHTS message that went 712 * through the read path rather than recv. 713 */ 714 if (pr->pr_domain->dom_dispose && 715 mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 716 pr->pr_domain->dom_dispose(m); 717 MFREE(m, so->so_rcv.sb_mb); 718 m = so->so_rcv.sb_mb; 719 } 720 } 721 if (controlp) { 722 orig_resid = 0; 723 controlp = &(*controlp)->m_next; 724 } 725 } 726 727 /* 728 * If m is non-NULL, we have some data to read. From now on, 729 * make sure to keep sb_lastrecord consistent when working on 730 * the last packet on the chain (nextrecord == NULL) and we 731 * change m->m_nextpkt. 732 */ 733 if (m) { 734 if ((flags & MSG_PEEK) == 0) { 735 m->m_nextpkt = nextrecord; 736 /* 737 * If nextrecord == NULL (this is a single chain), 738 * then sb_lastrecord may not be valid here if m 739 * was changed earlier. 740 */ 741 if (nextrecord == NULL) { 742 KASSERT(so->so_rcv.sb_mb == m); 743 so->so_rcv.sb_lastrecord = m; 744 } 745 } 746 type = m->m_type; 747 if (type == MT_OOBDATA) 748 flags |= MSG_OOB; 749 if (m->m_flags & M_BCAST) 750 flags |= MSG_BCAST; 751 if (m->m_flags & M_MCAST) 752 flags |= MSG_MCAST; 753 } else { 754 if ((flags & MSG_PEEK) == 0) { 755 KASSERT(so->so_rcv.sb_mb == m); 756 so->so_rcv.sb_mb = nextrecord; 757 SB_EMPTY_FIXUP(&so->so_rcv); 758 } 759 } 760 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 761 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 762 763 moff = 0; 764 offset = 0; 765 while (m && uio->uio_resid > 0 && error == 0) { 766 if (m->m_type == MT_OOBDATA) { 767 if (type != MT_OOBDATA) 768 break; 769 } else if (type == MT_OOBDATA) 770 break; 771 #ifdef DIAGNOSTIC 772 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 773 panic("receive 3"); 774 #endif 775 so->so_state &= ~SS_RCVATMARK; 776 len = uio->uio_resid; 777 if (so->so_oobmark && len > so->so_oobmark - offset) 778 len = so->so_oobmark - offset; 779 if (len > m->m_len - moff) 780 len = m->m_len - moff; 781 /* 782 * If mp is set, just pass back the mbufs. 783 * Otherwise copy them out via the uio, then free. 784 * Sockbuf must be consistent here (points to current mbuf, 785 * it points to next record) when we drop priority; 786 * we must note any additions to the sockbuf when we 787 * block interrupts again. 788 */ 789 if (mp == NULL && uio_error == 0) { 790 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 791 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 792 resid = uio->uio_resid; 793 splx(s); 794 uio_error = 795 uiomove(mtod(m, caddr_t) + moff, (int)len, 796 uio); 797 s = splsoftnet(); 798 if (uio_error) 799 uio->uio_resid = resid - len; 800 } else 801 uio->uio_resid -= len; 802 if (len == m->m_len - moff) { 803 if (m->m_flags & M_EOR) 804 flags |= MSG_EOR; 805 if (flags & MSG_PEEK) { 806 m = m->m_next; 807 moff = 0; 808 } else { 809 nextrecord = m->m_nextpkt; 810 sbfree(&so->so_rcv, m); 811 if (mp) { 812 *mp = m; 813 mp = &m->m_next; 814 so->so_rcv.sb_mb = m = m->m_next; 815 *mp = NULL; 816 } else { 817 MFREE(m, so->so_rcv.sb_mb); 818 m = so->so_rcv.sb_mb; 819 } 820 /* 821 * If m != NULL, we also know that 822 * so->so_rcv.sb_mb != NULL. 823 */ 824 KASSERT(so->so_rcv.sb_mb == m); 825 if (m) { 826 m->m_nextpkt = nextrecord; 827 if (nextrecord == NULL) 828 so->so_rcv.sb_lastrecord = m; 829 } else { 830 so->so_rcv.sb_mb = nextrecord; 831 SB_EMPTY_FIXUP(&so->so_rcv); 832 } 833 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 834 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 835 } 836 } else { 837 if (flags & MSG_PEEK) 838 moff += len; 839 else { 840 if (mp) 841 *mp = m_copym(m, 0, len, M_WAIT); 842 m->m_data += len; 843 m->m_len -= len; 844 so->so_rcv.sb_cc -= len; 845 so->so_rcv.sb_datacc -= len; 846 } 847 } 848 if (so->so_oobmark) { 849 if ((flags & MSG_PEEK) == 0) { 850 so->so_oobmark -= len; 851 if (so->so_oobmark == 0) { 852 so->so_state |= SS_RCVATMARK; 853 break; 854 } 855 } else { 856 offset += len; 857 if (offset == so->so_oobmark) 858 break; 859 } 860 } 861 if (flags & MSG_EOR) 862 break; 863 /* 864 * If the MSG_WAITALL flag is set (for non-atomic socket), 865 * we must not quit until "uio->uio_resid == 0" or an error 866 * termination. If a signal/timeout occurs, return 867 * with a short count but without error. 868 * Keep sockbuf locked against other readers. 869 */ 870 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 871 !sosendallatonce(so) && !nextrecord) { 872 if (so->so_error || so->so_state & SS_CANTRCVMORE) 873 break; 874 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 875 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 876 error = sbwait(&so->so_rcv); 877 if (error) { 878 sbunlock(&so->so_rcv); 879 splx(s); 880 return (0); 881 } 882 if ((m = so->so_rcv.sb_mb) != NULL) 883 nextrecord = m->m_nextpkt; 884 } 885 } 886 887 if (m && pr->pr_flags & PR_ATOMIC) { 888 flags |= MSG_TRUNC; 889 if ((flags & MSG_PEEK) == 0) 890 (void) sbdroprecord(&so->so_rcv); 891 } 892 if ((flags & MSG_PEEK) == 0) { 893 if (m == NULL) { 894 /* 895 * First part is an inline SB_EMPTY_FIXUP(). Second 896 * part makes sure sb_lastrecord is up-to-date if 897 * there is still data in the socket buffer. 898 */ 899 so->so_rcv.sb_mb = nextrecord; 900 if (so->so_rcv.sb_mb == NULL) { 901 so->so_rcv.sb_mbtail = NULL; 902 so->so_rcv.sb_lastrecord = NULL; 903 } else if (nextrecord->m_nextpkt == NULL) 904 so->so_rcv.sb_lastrecord = nextrecord; 905 } 906 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 907 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 908 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 909 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 910 (struct mbuf *)(long)flags, NULL, curproc); 911 } 912 if (orig_resid == uio->uio_resid && orig_resid && 913 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 914 sbunlock(&so->so_rcv); 915 splx(s); 916 goto restart; 917 } 918 919 if (uio_error) 920 error = uio_error; 921 922 if (flagsp) 923 *flagsp |= flags; 924 release: 925 sbunlock(&so->so_rcv); 926 splx(s); 927 return (error); 928 } 929 930 int 931 soshutdown(struct socket *so, int how) 932 { 933 struct protosw *pr = so->so_proto; 934 935 switch (how) { 936 case SHUT_RD: 937 case SHUT_RDWR: 938 sorflush(so); 939 if (how == SHUT_RD) 940 return (0); 941 /* FALLTHROUGH */ 942 case SHUT_WR: 943 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 944 curproc); 945 default: 946 return (EINVAL); 947 } 948 } 949 950 void 951 sorflush(struct socket *so) 952 { 953 struct sockbuf *sb = &so->so_rcv; 954 struct protosw *pr = so->so_proto; 955 int s; 956 struct sockbuf asb; 957 958 sb->sb_flags |= SB_NOINTR; 959 (void) sblock(sb, M_WAITOK); 960 s = splnet(); 961 socantrcvmore(so); 962 sbunlock(sb); 963 asb = *sb; 964 bzero(sb, sizeof (*sb)); 965 /* XXX - the bzero stumps all over so_rcv */ 966 if (asb.sb_flags & SB_KNOTE) { 967 sb->sb_sel.si_note = asb.sb_sel.si_note; 968 sb->sb_flags = SB_KNOTE; 969 } 970 splx(s); 971 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 972 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 973 sbrelease(&asb); 974 } 975 976 int 977 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 978 { 979 int error = 0; 980 struct mbuf *m = m0; 981 982 if (level != SOL_SOCKET) { 983 if (so->so_proto && so->so_proto->pr_ctloutput) 984 return ((*so->so_proto->pr_ctloutput) 985 (PRCO_SETOPT, so, level, optname, &m0)); 986 error = ENOPROTOOPT; 987 } else { 988 switch (optname) { 989 case SO_BINDANY: 990 case SO_RDOMAIN: 991 if ((error = suser(curproc, 0)) != 0) /* XXX */ 992 goto bad; 993 break; 994 } 995 996 switch (optname) { 997 998 case SO_LINGER: 999 if (m == NULL || m->m_len != sizeof (struct linger) || 1000 mtod(m, struct linger *)->l_linger < 0 || 1001 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1002 error = EINVAL; 1003 goto bad; 1004 } 1005 so->so_linger = mtod(m, struct linger *)->l_linger; 1006 /* FALLTHROUGH */ 1007 1008 case SO_BINDANY: 1009 case SO_DEBUG: 1010 case SO_KEEPALIVE: 1011 case SO_DONTROUTE: 1012 case SO_USELOOPBACK: 1013 case SO_BROADCAST: 1014 case SO_REUSEADDR: 1015 case SO_REUSEPORT: 1016 case SO_OOBINLINE: 1017 case SO_JUMBO: 1018 case SO_TIMESTAMP: 1019 if (m == NULL || m->m_len < sizeof (int)) { 1020 error = EINVAL; 1021 goto bad; 1022 } 1023 if (*mtod(m, int *)) 1024 so->so_options |= optname; 1025 else 1026 so->so_options &= ~optname; 1027 break; 1028 1029 case SO_SNDBUF: 1030 case SO_RCVBUF: 1031 case SO_SNDLOWAT: 1032 case SO_RCVLOWAT: 1033 { 1034 u_long cnt; 1035 1036 if (m == NULL || m->m_len < sizeof (int)) { 1037 error = EINVAL; 1038 goto bad; 1039 } 1040 cnt = *mtod(m, int *); 1041 if ((long)cnt <= 0) 1042 cnt = 1; 1043 switch (optname) { 1044 1045 case SO_SNDBUF: 1046 if (sbcheckreserve(cnt, so->so_snd.sb_hiwat) || 1047 sbreserve(&so->so_snd, cnt)) { 1048 error = ENOBUFS; 1049 goto bad; 1050 } 1051 break; 1052 1053 case SO_RCVBUF: 1054 if (sbcheckreserve(cnt, so->so_rcv.sb_hiwat) || 1055 sbreserve(&so->so_rcv, cnt)) { 1056 error = ENOBUFS; 1057 goto bad; 1058 } 1059 break; 1060 1061 case SO_SNDLOWAT: 1062 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 1063 so->so_snd.sb_hiwat : cnt; 1064 break; 1065 case SO_RCVLOWAT: 1066 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 1067 so->so_rcv.sb_hiwat : cnt; 1068 break; 1069 } 1070 break; 1071 } 1072 1073 case SO_SNDTIMEO: 1074 case SO_RCVTIMEO: 1075 { 1076 struct timeval *tv; 1077 u_short val; 1078 1079 if (m == NULL || m->m_len < sizeof (*tv)) { 1080 error = EINVAL; 1081 goto bad; 1082 } 1083 tv = mtod(m, struct timeval *); 1084 if (tv->tv_sec > (USHRT_MAX - tv->tv_usec / tick) / hz) { 1085 error = EDOM; 1086 goto bad; 1087 } 1088 val = tv->tv_sec * hz + tv->tv_usec / tick; 1089 if (val == 0 && tv->tv_usec != 0) 1090 val = 1; 1091 1092 switch (optname) { 1093 1094 case SO_SNDTIMEO: 1095 so->so_snd.sb_timeo = val; 1096 break; 1097 case SO_RCVTIMEO: 1098 so->so_rcv.sb_timeo = val; 1099 break; 1100 } 1101 break; 1102 } 1103 1104 default: 1105 error = ENOPROTOOPT; 1106 break; 1107 } 1108 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1109 (void) ((*so->so_proto->pr_ctloutput) 1110 (PRCO_SETOPT, so, level, optname, &m0)); 1111 m = NULL; /* freed by protocol */ 1112 } 1113 } 1114 bad: 1115 if (m) 1116 (void) m_free(m); 1117 return (error); 1118 } 1119 1120 int 1121 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1122 { 1123 struct mbuf *m; 1124 1125 if (level != SOL_SOCKET) { 1126 if (so->so_proto && so->so_proto->pr_ctloutput) { 1127 return ((*so->so_proto->pr_ctloutput) 1128 (PRCO_GETOPT, so, level, optname, mp)); 1129 } else 1130 return (ENOPROTOOPT); 1131 } else { 1132 m = m_get(M_WAIT, MT_SOOPTS); 1133 m->m_len = sizeof (int); 1134 1135 switch (optname) { 1136 1137 case SO_LINGER: 1138 m->m_len = sizeof (struct linger); 1139 mtod(m, struct linger *)->l_onoff = 1140 so->so_options & SO_LINGER; 1141 mtod(m, struct linger *)->l_linger = so->so_linger; 1142 break; 1143 1144 case SO_BINDANY: 1145 case SO_USELOOPBACK: 1146 case SO_DONTROUTE: 1147 case SO_DEBUG: 1148 case SO_KEEPALIVE: 1149 case SO_REUSEADDR: 1150 case SO_REUSEPORT: 1151 case SO_BROADCAST: 1152 case SO_OOBINLINE: 1153 case SO_JUMBO: 1154 case SO_TIMESTAMP: 1155 *mtod(m, int *) = so->so_options & optname; 1156 break; 1157 1158 case SO_TYPE: 1159 *mtod(m, int *) = so->so_type; 1160 break; 1161 1162 case SO_ERROR: 1163 *mtod(m, int *) = so->so_error; 1164 so->so_error = 0; 1165 break; 1166 1167 case SO_SNDBUF: 1168 *mtod(m, int *) = so->so_snd.sb_hiwat; 1169 break; 1170 1171 case SO_RCVBUF: 1172 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1173 break; 1174 1175 case SO_SNDLOWAT: 1176 *mtod(m, int *) = so->so_snd.sb_lowat; 1177 break; 1178 1179 case SO_RCVLOWAT: 1180 *mtod(m, int *) = so->so_rcv.sb_lowat; 1181 break; 1182 1183 case SO_SNDTIMEO: 1184 case SO_RCVTIMEO: 1185 { 1186 int val = (optname == SO_SNDTIMEO ? 1187 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1188 1189 m->m_len = sizeof(struct timeval); 1190 mtod(m, struct timeval *)->tv_sec = val / hz; 1191 mtod(m, struct timeval *)->tv_usec = 1192 (val % hz) * tick; 1193 break; 1194 } 1195 1196 default: 1197 (void)m_free(m); 1198 return (ENOPROTOOPT); 1199 } 1200 *mp = m; 1201 return (0); 1202 } 1203 } 1204 1205 void 1206 sohasoutofband(struct socket *so) 1207 { 1208 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1209 selwakeup(&so->so_rcv.sb_sel); 1210 } 1211 1212 int 1213 soo_kqfilter(struct file *fp, struct knote *kn) 1214 { 1215 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1216 struct sockbuf *sb; 1217 int s; 1218 1219 switch (kn->kn_filter) { 1220 case EVFILT_READ: 1221 if (so->so_options & SO_ACCEPTCONN) 1222 kn->kn_fop = &solisten_filtops; 1223 else 1224 kn->kn_fop = &soread_filtops; 1225 sb = &so->so_rcv; 1226 break; 1227 case EVFILT_WRITE: 1228 kn->kn_fop = &sowrite_filtops; 1229 sb = &so->so_snd; 1230 break; 1231 default: 1232 return (1); 1233 } 1234 1235 s = splnet(); 1236 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1237 sb->sb_flags |= SB_KNOTE; 1238 splx(s); 1239 return (0); 1240 } 1241 1242 void 1243 filt_sordetach(struct knote *kn) 1244 { 1245 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1246 int s = splnet(); 1247 1248 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1249 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1250 so->so_rcv.sb_flags &= ~SB_KNOTE; 1251 splx(s); 1252 } 1253 1254 /*ARGSUSED*/ 1255 int 1256 filt_soread(struct knote *kn, long hint) 1257 { 1258 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1259 1260 kn->kn_data = so->so_rcv.sb_cc; 1261 if (so->so_state & SS_CANTRCVMORE) { 1262 kn->kn_flags |= EV_EOF; 1263 kn->kn_fflags = so->so_error; 1264 return (1); 1265 } 1266 if (so->so_error) /* temporary udp error */ 1267 return (1); 1268 if (kn->kn_sfflags & NOTE_LOWAT) 1269 return (kn->kn_data >= kn->kn_sdata); 1270 return (kn->kn_data >= so->so_rcv.sb_lowat); 1271 } 1272 1273 void 1274 filt_sowdetach(struct knote *kn) 1275 { 1276 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1277 int s = splnet(); 1278 1279 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1280 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1281 so->so_snd.sb_flags &= ~SB_KNOTE; 1282 splx(s); 1283 } 1284 1285 /*ARGSUSED*/ 1286 int 1287 filt_sowrite(struct knote *kn, long hint) 1288 { 1289 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1290 1291 kn->kn_data = sbspace(&so->so_snd); 1292 if (so->so_state & SS_CANTSENDMORE) { 1293 kn->kn_flags |= EV_EOF; 1294 kn->kn_fflags = so->so_error; 1295 return (1); 1296 } 1297 if (so->so_error) /* temporary udp error */ 1298 return (1); 1299 if (((so->so_state & SS_ISCONNECTED) == 0) && 1300 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1301 return (0); 1302 if (kn->kn_sfflags & NOTE_LOWAT) 1303 return (kn->kn_data >= kn->kn_sdata); 1304 return (kn->kn_data >= so->so_snd.sb_lowat); 1305 } 1306 1307 /*ARGSUSED*/ 1308 int 1309 filt_solisten(struct knote *kn, long hint) 1310 { 1311 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1312 1313 kn->kn_data = so->so_qlen; 1314 return (so->so_qlen != 0); 1315 } 1316