1 /* $OpenBSD: uipc_socket.c,v 1.51 2003/07/21 22:44:50 tedu Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/domain.h> 42 #include <sys/kernel.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/signalvar.h> 48 #include <sys/resourcevar.h> 49 #include <sys/pool.h> 50 51 void filt_sordetach(struct knote *kn); 52 int filt_soread(struct knote *kn, long hint); 53 void filt_sowdetach(struct knote *kn); 54 int filt_sowrite(struct knote *kn, long hint); 55 int filt_solisten(struct knote *kn, long hint); 56 57 struct filterops solisten_filtops = 58 { 1, NULL, filt_sordetach, filt_solisten }; 59 struct filterops soread_filtops = 60 { 1, NULL, filt_sordetach, filt_soread }; 61 struct filterops sowrite_filtops = 62 { 1, NULL, filt_sowdetach, filt_sowrite }; 63 64 65 #ifndef SOMINCONN 66 #define SOMINCONN 80 67 #endif /* SOMINCONN */ 68 69 int somaxconn = SOMAXCONN; 70 int sominconn = SOMINCONN; 71 72 struct pool socket_pool; 73 74 void 75 soinit(void) 76 { 77 78 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 79 } 80 81 /* 82 * Socket operation routines. 83 * These routines are called by the routines in 84 * sys_socket.c or from a system process, and 85 * implement the semantics of socket operations by 86 * switching out to the protocol specific routines. 87 */ 88 /*ARGSUSED*/ 89 int 90 socreate(dom, aso, type, proto) 91 int dom; 92 struct socket **aso; 93 register int type; 94 int proto; 95 { 96 struct proc *p = curproc; /* XXX */ 97 struct protosw *prp; 98 struct socket *so; 99 int error, s; 100 101 if (proto) 102 prp = pffindproto(dom, proto, type); 103 else 104 prp = pffindtype(dom, type); 105 if (prp == 0 || prp->pr_usrreq == 0) 106 return (EPROTONOSUPPORT); 107 if (prp->pr_type != type) 108 return (EPROTOTYPE); 109 s = splsoftnet(); 110 so = pool_get(&socket_pool, PR_WAITOK); 111 bzero(so, sizeof(*so)); 112 TAILQ_INIT(&so->so_q0); 113 TAILQ_INIT(&so->so_q); 114 so->so_type = type; 115 if (p->p_ucred->cr_uid == 0) 116 so->so_state = SS_PRIV; 117 so->so_ruid = p->p_cred->p_ruid; 118 so->so_euid = p->p_ucred->cr_uid; 119 so->so_rgid = p->p_cred->p_rgid; 120 so->so_egid = p->p_ucred->cr_gid; 121 so->so_proto = prp; 122 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 123 (struct mbuf *)(long)proto, NULL); 124 if (error) { 125 so->so_state |= SS_NOFDREF; 126 sofree(so); 127 splx(s); 128 return (error); 129 } 130 #ifdef COMPAT_SUNOS 131 { 132 extern struct emul emul_sunos; 133 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 134 so->so_options |= SO_BROADCAST; 135 } 136 #endif 137 splx(s); 138 *aso = so; 139 return (0); 140 } 141 142 int 143 sobind(so, nam) 144 struct socket *so; 145 struct mbuf *nam; 146 { 147 int s = splsoftnet(); 148 int error; 149 150 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL); 151 splx(s); 152 return (error); 153 } 154 155 int 156 solisten(so, backlog) 157 register struct socket *so; 158 int backlog; 159 { 160 int s = splsoftnet(), error; 161 162 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL); 163 if (error) { 164 splx(s); 165 return (error); 166 } 167 if (TAILQ_FIRST(&so->so_q) == NULL) 168 so->so_options |= SO_ACCEPTCONN; 169 if (backlog < 0 || backlog > somaxconn) 170 backlog = somaxconn; 171 if (backlog < sominconn) 172 backlog = sominconn; 173 so->so_qlimit = backlog; 174 splx(s); 175 return (0); 176 } 177 178 /* 179 * Must be called at splsoftnet() 180 */ 181 182 void 183 sofree(struct socket *so) 184 { 185 splassert(IPL_SOFTNET); 186 187 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 188 return; 189 if (so->so_head) { 190 /* 191 * We must not decommission a socket that's on the accept(2) 192 * queue. If we do, then accept(2) may hang after select(2) 193 * indicated that the listening socket was ready. 194 */ 195 if (!soqremque(so, 0)) 196 return; 197 } 198 sbrelease(&so->so_snd); 199 sorflush(so); 200 pool_put(&socket_pool, so); 201 } 202 203 /* 204 * Close a socket on last file table reference removal. 205 * Initiate disconnect if connected. 206 * Free socket when disconnect complete. 207 */ 208 int 209 soclose(so) 210 register struct socket *so; 211 { 212 struct socket *so2; 213 int s = splsoftnet(); /* conservative */ 214 int error = 0; 215 216 if (so->so_options & SO_ACCEPTCONN) { 217 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 218 (void) soqremque(so2, 0); 219 (void) soabort(so2); 220 } 221 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 222 (void) soqremque(so2, 1); 223 (void) soabort(so2); 224 } 225 } 226 if (so->so_pcb == 0) 227 goto discard; 228 if (so->so_state & SS_ISCONNECTED) { 229 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 230 error = sodisconnect(so); 231 if (error) 232 goto drop; 233 } 234 if (so->so_options & SO_LINGER) { 235 if ((so->so_state & SS_ISDISCONNECTING) && 236 (so->so_state & SS_NBIO)) 237 goto drop; 238 while (so->so_state & SS_ISCONNECTED) { 239 error = tsleep(&so->so_timeo, 240 PSOCK | PCATCH, netcls, 241 so->so_linger * hz); 242 if (error) 243 break; 244 } 245 } 246 } 247 drop: 248 if (so->so_pcb) { 249 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 250 NULL, NULL); 251 if (error == 0) 252 error = error2; 253 } 254 discard: 255 if (so->so_state & SS_NOFDREF) 256 panic("soclose: NOFDREF"); 257 so->so_state |= SS_NOFDREF; 258 sofree(so); 259 splx(s); 260 return (error); 261 } 262 263 /* 264 * Must be called at splsoftnet. 265 */ 266 int 267 soabort(struct socket *so) 268 { 269 splassert(IPL_SOFTNET); 270 271 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL); 272 } 273 274 int 275 soaccept(so, nam) 276 register struct socket *so; 277 struct mbuf *nam; 278 { 279 int s = splsoftnet(); 280 int error = 0; 281 282 if ((so->so_state & SS_NOFDREF) == 0) 283 panic("soaccept: !NOFDREF"); 284 so->so_state &= ~SS_NOFDREF; 285 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 286 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 287 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 288 nam, NULL); 289 else 290 error = ECONNABORTED; 291 splx(s); 292 return (error); 293 } 294 295 int 296 soconnect(so, nam) 297 register struct socket *so; 298 struct mbuf *nam; 299 { 300 int s; 301 int error; 302 303 if (so->so_options & SO_ACCEPTCONN) 304 return (EOPNOTSUPP); 305 s = splsoftnet(); 306 /* 307 * If protocol is connection-based, can only connect once. 308 * Otherwise, if connected, try to disconnect first. 309 * This allows user to disconnect by connecting to, e.g., 310 * a null address. 311 */ 312 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 313 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 314 (error = sodisconnect(so)))) 315 error = EISCONN; 316 else 317 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 318 NULL, nam, NULL); 319 splx(s); 320 return (error); 321 } 322 323 int 324 soconnect2(so1, so2) 325 register struct socket *so1; 326 struct socket *so2; 327 { 328 int s = splsoftnet(); 329 int error; 330 331 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 332 (struct mbuf *)so2, NULL); 333 splx(s); 334 return (error); 335 } 336 337 int 338 sodisconnect(so) 339 register struct socket *so; 340 { 341 int s = splsoftnet(); 342 int error; 343 344 if ((so->so_state & SS_ISCONNECTED) == 0) { 345 error = ENOTCONN; 346 goto bad; 347 } 348 if (so->so_state & SS_ISDISCONNECTING) { 349 error = EALREADY; 350 goto bad; 351 } 352 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 353 NULL); 354 bad: 355 splx(s); 356 return (error); 357 } 358 359 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 360 /* 361 * Send on a socket. 362 * If send must go all at once and message is larger than 363 * send buffering, then hard error. 364 * Lock against other senders. 365 * If must go all at once and not enough room now, then 366 * inform user that this would block and do nothing. 367 * Otherwise, if nonblocking, send as much as possible. 368 * The data to be sent is described by "uio" if nonzero, 369 * otherwise by the mbuf chain "top" (which must be null 370 * if uio is not). Data provided in mbuf chain must be small 371 * enough to send all at once. 372 * 373 * Returns nonzero on error, timeout or signal; callers 374 * must check for short counts if EINTR/ERESTART are returned. 375 * Data and control buffers are freed on return. 376 */ 377 int 378 sosend(so, addr, uio, top, control, flags) 379 register struct socket *so; 380 struct mbuf *addr; 381 struct uio *uio; 382 struct mbuf *top; 383 struct mbuf *control; 384 int flags; 385 { 386 struct proc *p = curproc; /* XXX */ 387 struct mbuf **mp; 388 struct mbuf *m; 389 long space, len, mlen, clen = 0; 390 quad_t resid; 391 int error, s, dontroute; 392 int atomic = sosendallatonce(so) || top; 393 394 if (uio) 395 resid = uio->uio_resid; 396 else 397 resid = top->m_pkthdr.len; 398 /* 399 * In theory resid should be unsigned (since uio->uio_resid is). 400 * However, space must be signed, as it might be less than 0 401 * if we over-committed, and we must use a signed comparison 402 * of space and resid. On the other hand, a negative resid 403 * causes us to loop sending 0-length segments to the protocol. 404 * MSG_EOR on a SOCK_STREAM socket is also invalid. 405 */ 406 if (resid < 0 || 407 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 408 error = EINVAL; 409 goto out; 410 } 411 dontroute = 412 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 413 (so->so_proto->pr_flags & PR_ATOMIC); 414 p->p_stats->p_ru.ru_msgsnd++; 415 if (control) 416 clen = control->m_len; 417 #define snderr(errno) { error = errno; splx(s); goto release; } 418 419 restart: 420 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 421 goto out; 422 do { 423 s = splsoftnet(); 424 if (so->so_state & SS_CANTSENDMORE) 425 snderr(EPIPE); 426 if (so->so_error) { 427 error = so->so_error; 428 so->so_error = 0; 429 splx(s); 430 goto release; 431 } 432 if ((so->so_state & SS_ISCONNECTED) == 0) { 433 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 434 if ((so->so_state & SS_ISCONFIRMING) == 0 && 435 !(resid == 0 && clen != 0)) 436 snderr(ENOTCONN); 437 } else if (addr == 0) 438 snderr(EDESTADDRREQ); 439 } 440 space = sbspace(&so->so_snd); 441 if (flags & MSG_OOB) 442 space += 1024; 443 if ((atomic && resid > so->so_snd.sb_hiwat) || 444 clen > so->so_snd.sb_hiwat) 445 snderr(EMSGSIZE); 446 if (space < resid + clen && uio && 447 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 448 if (so->so_state & SS_NBIO) 449 snderr(EWOULDBLOCK); 450 sbunlock(&so->so_snd); 451 error = sbwait(&so->so_snd); 452 splx(s); 453 if (error) 454 goto out; 455 goto restart; 456 } 457 splx(s); 458 mp = ⊤ 459 space -= clen; 460 do { 461 if (uio == NULL) { 462 /* 463 * Data is prepackaged in "top". 464 */ 465 resid = 0; 466 if (flags & MSG_EOR) 467 top->m_flags |= M_EOR; 468 } else do { 469 if (top == 0) { 470 MGETHDR(m, M_WAIT, MT_DATA); 471 mlen = MHLEN; 472 m->m_pkthdr.len = 0; 473 m->m_pkthdr.rcvif = (struct ifnet *)0; 474 } else { 475 MGET(m, M_WAIT, MT_DATA); 476 mlen = MLEN; 477 } 478 if (resid >= MINCLSIZE && space >= MCLBYTES) { 479 MCLGET(m, M_WAIT); 480 if ((m->m_flags & M_EXT) == 0) 481 goto nopages; 482 mlen = MCLBYTES; 483 if (atomic && top == 0) { 484 len = lmin(MCLBYTES - max_hdr, resid); 485 m->m_data += max_hdr; 486 } else 487 len = lmin(MCLBYTES, resid); 488 space -= len; 489 } else { 490 nopages: 491 len = lmin(lmin(mlen, resid), space); 492 space -= len; 493 /* 494 * For datagram protocols, leave room 495 * for protocol headers in first mbuf. 496 */ 497 if (atomic && top == 0 && len < mlen) 498 MH_ALIGN(m, len); 499 } 500 error = uiomove(mtod(m, caddr_t), (int)len, 501 uio); 502 resid = uio->uio_resid; 503 m->m_len = len; 504 *mp = m; 505 top->m_pkthdr.len += len; 506 if (error) 507 goto release; 508 mp = &m->m_next; 509 if (resid <= 0) { 510 if (flags & MSG_EOR) 511 top->m_flags |= M_EOR; 512 break; 513 } 514 } while (space > 0 && atomic); 515 if (dontroute) 516 so->so_options |= SO_DONTROUTE; 517 s = splsoftnet(); /* XXX */ 518 error = (*so->so_proto->pr_usrreq)(so, 519 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 520 top, addr, control); 521 splx(s); 522 if (dontroute) 523 so->so_options &= ~SO_DONTROUTE; 524 clen = 0; 525 control = 0; 526 top = 0; 527 mp = ⊤ 528 if (error) 529 goto release; 530 } while (resid && space > 0); 531 } while (resid); 532 533 release: 534 sbunlock(&so->so_snd); 535 out: 536 if (top) 537 m_freem(top); 538 if (control) 539 m_freem(control); 540 return (error); 541 } 542 543 /* 544 * Implement receive operations on a socket. 545 * We depend on the way that records are added to the sockbuf 546 * by sbappend*. In particular, each record (mbufs linked through m_next) 547 * must begin with an address if the protocol so specifies, 548 * followed by an optional mbuf or mbufs containing ancillary data, 549 * and then zero or more mbufs of data. 550 * In order to avoid blocking network interrupts for the entire time here, 551 * we splx() while doing the actual copy to user space. 552 * Although the sockbuf is locked, new data may still be appended, 553 * and thus we must maintain consistency of the sockbuf during that time. 554 * 555 * The caller may receive the data as a single mbuf chain by supplying 556 * an mbuf **mp0 for use in returning the chain. The uio is then used 557 * only for the count in uio_resid. 558 */ 559 int 560 soreceive(so, paddr, uio, mp0, controlp, flagsp) 561 register struct socket *so; 562 struct mbuf **paddr; 563 struct uio *uio; 564 struct mbuf **mp0; 565 struct mbuf **controlp; 566 int *flagsp; 567 { 568 register struct mbuf *m, **mp; 569 register int flags, len, error, s, offset; 570 struct protosw *pr = so->so_proto; 571 struct mbuf *nextrecord; 572 int moff, type = 0; 573 size_t orig_resid = uio->uio_resid; 574 int uio_error = 0; 575 int resid; 576 577 mp = mp0; 578 if (paddr) 579 *paddr = 0; 580 if (controlp) 581 *controlp = 0; 582 if (flagsp) 583 flags = *flagsp &~ MSG_EOR; 584 else 585 flags = 0; 586 if (so->so_state & SS_NBIO) 587 flags |= MSG_DONTWAIT; 588 if (flags & MSG_OOB) { 589 m = m_get(M_WAIT, MT_DATA); 590 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 591 (struct mbuf *)(long)(flags & MSG_PEEK), NULL); 592 if (error) 593 goto bad; 594 do { 595 error = uiomove(mtod(m, caddr_t), 596 (int) min(uio->uio_resid, m->m_len), uio); 597 m = m_free(m); 598 } while (uio->uio_resid && error == 0 && m); 599 bad: 600 if (m) 601 m_freem(m); 602 return (error); 603 } 604 if (mp) 605 *mp = (struct mbuf *)0; 606 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 607 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL); 608 609 restart: 610 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 611 return (error); 612 s = splsoftnet(); 613 614 m = so->so_rcv.sb_mb; 615 /* 616 * If we have less data than requested, block awaiting more 617 * (subject to any timeout) if: 618 * 1. the current count is less than the low water mark, 619 * 2. MSG_WAITALL is set, and it is possible to do the entire 620 * receive operation at once if we block (resid <= hiwat), or 621 * 3. MSG_DONTWAIT is not set. 622 * If MSG_WAITALL is set but resid is larger than the receive buffer, 623 * we have to do the receive in sections, and thus risk returning 624 * a short count if a timeout or signal occurs after we start. 625 */ 626 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 627 so->so_rcv.sb_cc < uio->uio_resid) && 628 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 629 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 630 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 631 #ifdef DIAGNOSTIC 632 if (m == 0 && so->so_rcv.sb_cc) 633 panic("receive 1"); 634 #endif 635 if (so->so_error) { 636 if (m) 637 goto dontblock; 638 error = so->so_error; 639 if ((flags & MSG_PEEK) == 0) 640 so->so_error = 0; 641 goto release; 642 } 643 if (so->so_state & SS_CANTRCVMORE) { 644 if (m) 645 goto dontblock; 646 else 647 goto release; 648 } 649 for (; m; m = m->m_next) 650 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 651 m = so->so_rcv.sb_mb; 652 goto dontblock; 653 } 654 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 655 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 656 error = ENOTCONN; 657 goto release; 658 } 659 if (uio->uio_resid == 0 && controlp == NULL) 660 goto release; 661 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 662 error = EWOULDBLOCK; 663 goto release; 664 } 665 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 666 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 667 sbunlock(&so->so_rcv); 668 error = sbwait(&so->so_rcv); 669 splx(s); 670 if (error) 671 return (error); 672 goto restart; 673 } 674 dontblock: 675 /* 676 * On entry here, m points to the first record of the socket buffer. 677 * While we process the initial mbufs containing address and control 678 * info, we save a copy of m->m_nextpkt into nextrecord. 679 */ 680 #ifdef notyet /* XXXX */ 681 if (uio->uio_procp) 682 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 683 #endif 684 KASSERT(m == so->so_rcv.sb_mb); 685 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 686 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 687 nextrecord = m->m_nextpkt; 688 if (pr->pr_flags & PR_ADDR) { 689 #ifdef DIAGNOSTIC 690 if (m->m_type != MT_SONAME) 691 panic("receive 1a"); 692 #endif 693 orig_resid = 0; 694 if (flags & MSG_PEEK) { 695 if (paddr) 696 *paddr = m_copy(m, 0, m->m_len); 697 m = m->m_next; 698 } else { 699 sbfree(&so->so_rcv, m); 700 if (paddr) { 701 *paddr = m; 702 so->so_rcv.sb_mb = m->m_next; 703 m->m_next = 0; 704 m = so->so_rcv.sb_mb; 705 } else { 706 MFREE(m, so->so_rcv.sb_mb); 707 m = so->so_rcv.sb_mb; 708 } 709 } 710 } 711 while (m && m->m_type == MT_CONTROL && error == 0) { 712 if (flags & MSG_PEEK) { 713 if (controlp) 714 *controlp = m_copy(m, 0, m->m_len); 715 m = m->m_next; 716 } else { 717 sbfree(&so->so_rcv, m); 718 if (controlp) { 719 if (pr->pr_domain->dom_externalize && 720 mtod(m, struct cmsghdr *)->cmsg_type == 721 SCM_RIGHTS) 722 error = (*pr->pr_domain->dom_externalize)(m); 723 *controlp = m; 724 so->so_rcv.sb_mb = m->m_next; 725 m->m_next = 0; 726 m = so->so_rcv.sb_mb; 727 } else { 728 MFREE(m, so->so_rcv.sb_mb); 729 m = so->so_rcv.sb_mb; 730 } 731 } 732 if (controlp) { 733 orig_resid = 0; 734 controlp = &(*controlp)->m_next; 735 } 736 } 737 738 /* 739 * If m is non-NULL, we have some data to read. From now on, 740 * make sure to keep sb_lastrecord consistent when working on 741 * the last packet on the chain (nextrecord == NULL) and we 742 * change m->m_nextpkt. 743 */ 744 if (m) { 745 if ((flags & MSG_PEEK) == 0) { 746 m->m_nextpkt = nextrecord; 747 /* 748 * If nextrecord == NULL (this is a single chain), 749 * then sb_lastrecord may not be valid here if m 750 * was changed earlier. 751 */ 752 if (nextrecord == NULL) { 753 KASSERT(so->so_rcv.sb_mb == m); 754 so->so_rcv.sb_lastrecord = m; 755 } 756 } 757 type = m->m_type; 758 if (type == MT_OOBDATA) 759 flags |= MSG_OOB; 760 if (m->m_flags & M_BCAST) 761 flags |= MSG_BCAST; 762 if (m->m_flags & M_MCAST) 763 flags |= MSG_MCAST; 764 } else { 765 if ((flags & MSG_PEEK) == 0) { 766 KASSERT(so->so_rcv.sb_mb == m); 767 so->so_rcv.sb_mb = nextrecord; 768 SB_EMPTY_FIXUP(&so->so_rcv); 769 } 770 } 771 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 772 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 773 774 moff = 0; 775 offset = 0; 776 while (m && uio->uio_resid > 0 && error == 0) { 777 if (m->m_type == MT_OOBDATA) { 778 if (type != MT_OOBDATA) 779 break; 780 } else if (type == MT_OOBDATA) 781 break; 782 #ifdef DIAGNOSTIC 783 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 784 panic("receive 3"); 785 #endif 786 so->so_state &= ~SS_RCVATMARK; 787 len = uio->uio_resid; 788 if (so->so_oobmark && len > so->so_oobmark - offset) 789 len = so->so_oobmark - offset; 790 if (len > m->m_len - moff) 791 len = m->m_len - moff; 792 /* 793 * If mp is set, just pass back the mbufs. 794 * Otherwise copy them out via the uio, then free. 795 * Sockbuf must be consistent here (points to current mbuf, 796 * it points to next record) when we drop priority; 797 * we must note any additions to the sockbuf when we 798 * block interrupts again. 799 */ 800 if (mp == 0 && uio_error == 0) { 801 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 802 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 803 resid = uio->uio_resid; 804 splx(s); 805 uio_error = 806 uiomove(mtod(m, caddr_t) + moff, (int)len, 807 uio); 808 s = splsoftnet(); 809 if (uio_error) 810 uio->uio_resid = resid - len; 811 } else 812 uio->uio_resid -= len; 813 if (len == m->m_len - moff) { 814 if (m->m_flags & M_EOR) 815 flags |= MSG_EOR; 816 if (flags & MSG_PEEK) { 817 m = m->m_next; 818 moff = 0; 819 } else { 820 nextrecord = m->m_nextpkt; 821 sbfree(&so->so_rcv, m); 822 if (mp) { 823 *mp = m; 824 mp = &m->m_next; 825 so->so_rcv.sb_mb = m = m->m_next; 826 *mp = (struct mbuf *)0; 827 } else { 828 MFREE(m, so->so_rcv.sb_mb); 829 m = so->so_rcv.sb_mb; 830 } 831 /* 832 * If m != NULL, we also know that 833 * so->so_rcv.sb_mb != NULL. 834 */ 835 KASSERT(so->so_rcv.sb_mb == m); 836 if (m) { 837 m->m_nextpkt = nextrecord; 838 if (nextrecord == NULL) 839 so->so_rcv.sb_lastrecord = m; 840 } else { 841 so->so_rcv.sb_mb = nextrecord; 842 SB_EMPTY_FIXUP(&so->so_rcv); 843 } 844 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 845 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 846 } 847 } else { 848 if (flags & MSG_PEEK) 849 moff += len; 850 else { 851 if (mp) 852 *mp = m_copym(m, 0, len, M_WAIT); 853 m->m_data += len; 854 m->m_len -= len; 855 so->so_rcv.sb_cc -= len; 856 } 857 } 858 if (so->so_oobmark) { 859 if ((flags & MSG_PEEK) == 0) { 860 so->so_oobmark -= len; 861 if (so->so_oobmark == 0) { 862 so->so_state |= SS_RCVATMARK; 863 break; 864 } 865 } else { 866 offset += len; 867 if (offset == so->so_oobmark) 868 break; 869 } 870 } 871 if (flags & MSG_EOR) 872 break; 873 /* 874 * If the MSG_WAITALL flag is set (for non-atomic socket), 875 * we must not quit until "uio->uio_resid == 0" or an error 876 * termination. If a signal/timeout occurs, return 877 * with a short count but without error. 878 * Keep sockbuf locked against other readers. 879 */ 880 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 881 !sosendallatonce(so) && !nextrecord) { 882 if (so->so_error || so->so_state & SS_CANTRCVMORE) 883 break; 884 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 885 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 886 error = sbwait(&so->so_rcv); 887 if (error) { 888 sbunlock(&so->so_rcv); 889 splx(s); 890 return (0); 891 } 892 if ((m = so->so_rcv.sb_mb) != NULL) 893 nextrecord = m->m_nextpkt; 894 } 895 } 896 897 if (m && pr->pr_flags & PR_ATOMIC) { 898 flags |= MSG_TRUNC; 899 if ((flags & MSG_PEEK) == 0) 900 (void) sbdroprecord(&so->so_rcv); 901 } 902 if ((flags & MSG_PEEK) == 0) { 903 if (m == 0) { 904 /* 905 * First part is an inline SB_EMPTY_FIXUP(). Second 906 * part makes sure sb_lastrecord is up-to-date if 907 * there is still data in the socket buffer. 908 */ 909 so->so_rcv.sb_mb = nextrecord; 910 if (so->so_rcv.sb_mb == NULL) { 911 so->so_rcv.sb_mbtail = NULL; 912 so->so_rcv.sb_lastrecord = NULL; 913 } else if (nextrecord->m_nextpkt == NULL) 914 so->so_rcv.sb_lastrecord = nextrecord; 915 } 916 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 917 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 918 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 919 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 920 (struct mbuf *)(long)flags, NULL); 921 } 922 if (orig_resid == uio->uio_resid && orig_resid && 923 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 924 sbunlock(&so->so_rcv); 925 splx(s); 926 goto restart; 927 } 928 929 if (uio_error) 930 error = uio_error; 931 932 if (flagsp) 933 *flagsp |= flags; 934 release: 935 sbunlock(&so->so_rcv); 936 splx(s); 937 return (error); 938 } 939 940 int 941 soshutdown(so, how) 942 register struct socket *so; 943 register int how; 944 { 945 register struct protosw *pr = so->so_proto; 946 947 how++; 948 if (how & ~(FREAD|FWRITE)) 949 return (EINVAL); 950 if (how & FREAD) 951 sorflush(so); 952 if (how & FWRITE) 953 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL); 954 return (0); 955 } 956 957 void 958 sorflush(so) 959 register struct socket *so; 960 { 961 register struct sockbuf *sb = &so->so_rcv; 962 register struct protosw *pr = so->so_proto; 963 register int s; 964 struct sockbuf asb; 965 966 sb->sb_flags |= SB_NOINTR; 967 (void) sblock(sb, M_WAITOK); 968 s = splimp(); 969 socantrcvmore(so); 970 sbunlock(sb); 971 asb = *sb; 972 bzero(sb, sizeof (*sb)); 973 /* XXX - the bzero stumps all over so_rcv */ 974 if (asb.sb_flags & SB_KNOTE) { 975 sb->sb_sel.si_note = asb.sb_sel.si_note; 976 sb->sb_flags = SB_KNOTE; 977 } 978 splx(s); 979 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 980 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 981 sbrelease(&asb); 982 } 983 984 int 985 sosetopt(so, level, optname, m0) 986 register struct socket *so; 987 int level, optname; 988 struct mbuf *m0; 989 { 990 int error = 0; 991 register struct mbuf *m = m0; 992 993 if (level != SOL_SOCKET) { 994 if (so->so_proto && so->so_proto->pr_ctloutput) 995 return ((*so->so_proto->pr_ctloutput) 996 (PRCO_SETOPT, so, level, optname, &m0)); 997 error = ENOPROTOOPT; 998 } else { 999 switch (optname) { 1000 1001 case SO_LINGER: 1002 if (m == NULL || m->m_len != sizeof (struct linger) || 1003 mtod(m, struct linger *)->l_linger < 0 || 1004 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1005 error = EINVAL; 1006 goto bad; 1007 } 1008 so->so_linger = mtod(m, struct linger *)->l_linger; 1009 /* fall thru... */ 1010 1011 case SO_DEBUG: 1012 case SO_KEEPALIVE: 1013 case SO_DONTROUTE: 1014 case SO_USELOOPBACK: 1015 case SO_BROADCAST: 1016 case SO_REUSEADDR: 1017 case SO_REUSEPORT: 1018 case SO_OOBINLINE: 1019 if (m == NULL || m->m_len < sizeof (int)) { 1020 error = EINVAL; 1021 goto bad; 1022 } 1023 if (*mtod(m, int *)) 1024 so->so_options |= optname; 1025 else 1026 so->so_options &= ~optname; 1027 break; 1028 1029 case SO_SNDBUF: 1030 case SO_RCVBUF: 1031 case SO_SNDLOWAT: 1032 case SO_RCVLOWAT: 1033 { 1034 u_long cnt; 1035 1036 if (m == NULL || m->m_len < sizeof (int)) { 1037 error = EINVAL; 1038 goto bad; 1039 } 1040 cnt = *mtod(m, int *); 1041 if ((long)cnt <= 0) 1042 cnt = 1; 1043 switch (optname) { 1044 1045 case SO_SNDBUF: 1046 case SO_RCVBUF: 1047 if (sbreserve(optname == SO_SNDBUF ? 1048 &so->so_snd : &so->so_rcv, 1049 cnt) == 0) { 1050 error = ENOBUFS; 1051 goto bad; 1052 } 1053 break; 1054 1055 case SO_SNDLOWAT: 1056 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 1057 so->so_snd.sb_hiwat : cnt; 1058 break; 1059 case SO_RCVLOWAT: 1060 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 1061 so->so_rcv.sb_hiwat : cnt; 1062 break; 1063 } 1064 break; 1065 } 1066 1067 case SO_SNDTIMEO: 1068 case SO_RCVTIMEO: 1069 { 1070 struct timeval *tv; 1071 short val; 1072 1073 if (m == NULL || m->m_len < sizeof (*tv)) { 1074 error = EINVAL; 1075 goto bad; 1076 } 1077 tv = mtod(m, struct timeval *); 1078 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { 1079 error = EDOM; 1080 goto bad; 1081 } 1082 val = tv->tv_sec * hz + tv->tv_usec / tick; 1083 if (val == 0 && tv->tv_usec != 0) 1084 val = 1; 1085 1086 switch (optname) { 1087 1088 case SO_SNDTIMEO: 1089 so->so_snd.sb_timeo = val; 1090 break; 1091 case SO_RCVTIMEO: 1092 so->so_rcv.sb_timeo = val; 1093 break; 1094 } 1095 break; 1096 } 1097 1098 default: 1099 error = ENOPROTOOPT; 1100 break; 1101 } 1102 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1103 (void) ((*so->so_proto->pr_ctloutput) 1104 (PRCO_SETOPT, so, level, optname, &m0)); 1105 m = NULL; /* freed by protocol */ 1106 } 1107 } 1108 bad: 1109 if (m) 1110 (void) m_free(m); 1111 return (error); 1112 } 1113 1114 int 1115 sogetopt(so, level, optname, mp) 1116 register struct socket *so; 1117 int level, optname; 1118 struct mbuf **mp; 1119 { 1120 register struct mbuf *m; 1121 1122 if (level != SOL_SOCKET) { 1123 if (so->so_proto && so->so_proto->pr_ctloutput) { 1124 return ((*so->so_proto->pr_ctloutput) 1125 (PRCO_GETOPT, so, level, optname, mp)); 1126 } else 1127 return (ENOPROTOOPT); 1128 } else { 1129 m = m_get(M_WAIT, MT_SOOPTS); 1130 m->m_len = sizeof (int); 1131 1132 switch (optname) { 1133 1134 case SO_LINGER: 1135 m->m_len = sizeof (struct linger); 1136 mtod(m, struct linger *)->l_onoff = 1137 so->so_options & SO_LINGER; 1138 mtod(m, struct linger *)->l_linger = so->so_linger; 1139 break; 1140 1141 case SO_USELOOPBACK: 1142 case SO_DONTROUTE: 1143 case SO_DEBUG: 1144 case SO_KEEPALIVE: 1145 case SO_REUSEADDR: 1146 case SO_REUSEPORT: 1147 case SO_BROADCAST: 1148 case SO_OOBINLINE: 1149 *mtod(m, int *) = so->so_options & optname; 1150 break; 1151 1152 case SO_TYPE: 1153 *mtod(m, int *) = so->so_type; 1154 break; 1155 1156 case SO_ERROR: 1157 *mtod(m, int *) = so->so_error; 1158 so->so_error = 0; 1159 break; 1160 1161 case SO_SNDBUF: 1162 *mtod(m, int *) = so->so_snd.sb_hiwat; 1163 break; 1164 1165 case SO_RCVBUF: 1166 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1167 break; 1168 1169 case SO_SNDLOWAT: 1170 *mtod(m, int *) = so->so_snd.sb_lowat; 1171 break; 1172 1173 case SO_RCVLOWAT: 1174 *mtod(m, int *) = so->so_rcv.sb_lowat; 1175 break; 1176 1177 case SO_SNDTIMEO: 1178 case SO_RCVTIMEO: 1179 { 1180 int val = (optname == SO_SNDTIMEO ? 1181 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1182 1183 m->m_len = sizeof(struct timeval); 1184 mtod(m, struct timeval *)->tv_sec = val / hz; 1185 mtod(m, struct timeval *)->tv_usec = 1186 (val % hz) * tick; 1187 break; 1188 } 1189 1190 default: 1191 (void)m_free(m); 1192 return (ENOPROTOOPT); 1193 } 1194 *mp = m; 1195 return (0); 1196 } 1197 } 1198 1199 void 1200 sohasoutofband(so) 1201 register struct socket *so; 1202 { 1203 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1204 selwakeup(&so->so_rcv.sb_sel); 1205 } 1206 1207 int 1208 soo_kqfilter(struct file *fp, struct knote *kn) 1209 { 1210 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1211 struct sockbuf *sb; 1212 int s; 1213 1214 switch (kn->kn_filter) { 1215 case EVFILT_READ: 1216 if (so->so_options & SO_ACCEPTCONN) 1217 kn->kn_fop = &solisten_filtops; 1218 else 1219 kn->kn_fop = &soread_filtops; 1220 sb = &so->so_rcv; 1221 break; 1222 case EVFILT_WRITE: 1223 kn->kn_fop = &sowrite_filtops; 1224 sb = &so->so_snd; 1225 break; 1226 default: 1227 return (1); 1228 } 1229 1230 s = splnet(); 1231 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1232 sb->sb_flags |= SB_KNOTE; 1233 splx(s); 1234 return (0); 1235 } 1236 1237 void 1238 filt_sordetach(struct knote *kn) 1239 { 1240 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1241 int s = splnet(); 1242 1243 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1244 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1245 so->so_rcv.sb_flags &= ~SB_KNOTE; 1246 splx(s); 1247 } 1248 1249 /*ARGSUSED*/ 1250 int 1251 filt_soread(struct knote *kn, long hint) 1252 { 1253 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1254 1255 kn->kn_data = so->so_rcv.sb_cc; 1256 if (so->so_state & SS_CANTRCVMORE) { 1257 kn->kn_flags |= EV_EOF; 1258 kn->kn_fflags = so->so_error; 1259 return (1); 1260 } 1261 if (so->so_error) /* temporary udp error */ 1262 return (1); 1263 if (kn->kn_sfflags & NOTE_LOWAT) 1264 return (kn->kn_data >= kn->kn_sdata); 1265 return (kn->kn_data >= so->so_rcv.sb_lowat); 1266 } 1267 1268 void 1269 filt_sowdetach(struct knote *kn) 1270 { 1271 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1272 int s = splnet(); 1273 1274 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1275 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1276 so->so_snd.sb_flags &= ~SB_KNOTE; 1277 splx(s); 1278 } 1279 1280 /*ARGSUSED*/ 1281 int 1282 filt_sowrite(struct knote *kn, long hint) 1283 { 1284 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1285 1286 kn->kn_data = sbspace(&so->so_snd); 1287 if (so->so_state & SS_CANTSENDMORE) { 1288 kn->kn_flags |= EV_EOF; 1289 kn->kn_fflags = so->so_error; 1290 return (1); 1291 } 1292 if (so->so_error) /* temporary udp error */ 1293 return (1); 1294 if (((so->so_state & SS_ISCONNECTED) == 0) && 1295 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1296 return (0); 1297 if (kn->kn_sfflags & NOTE_LOWAT) 1298 return (kn->kn_data >= kn->kn_sdata); 1299 return (kn->kn_data >= so->so_snd.sb_lowat); 1300 } 1301 1302 /*ARGSUSED*/ 1303 int 1304 filt_solisten(struct knote *kn, long hint) 1305 { 1306 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1307 1308 kn->kn_data = so->so_qlen; 1309 return (so->so_qlen != 0); 1310 } 1311