1 /* $OpenBSD: uipc_socket.c,v 1.35 2001/06/22 14:14:09 deraadt Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/file.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/domain.h> 46 #include <sys/kernel.h> 47 #include <sys/event.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 54 void filt_sordetach(struct knote *kn); 55 int filt_soread(struct knote *kn, long hint); 56 void filt_sowdetach(struct knote *kn); 57 int filt_sowrite(struct knote *kn, long hint); 58 int filt_solisten(struct knote *kn, long hint); 59 60 struct filterops solisten_filtops = 61 { 1, NULL, filt_sordetach, filt_solisten }; 62 struct filterops soread_filtops = 63 { 1, NULL, filt_sordetach, filt_soread }; 64 struct filterops sowrite_filtops = 65 { 1, NULL, filt_sowdetach, filt_sowrite }; 66 67 68 #ifndef SOMINCONN 69 #define SOMINCONN 80 70 #endif /* SOMINCONN */ 71 72 int somaxconn = SOMAXCONN; 73 int sominconn = SOMINCONN; 74 75 /* 76 * Socket operation routines. 77 * These routines are called by the routines in 78 * sys_socket.c or from a system process, and 79 * implement the semantics of socket operations by 80 * switching out to the protocol specific routines. 81 */ 82 /*ARGSUSED*/ 83 int 84 socreate(dom, aso, type, proto) 85 int dom; 86 struct socket **aso; 87 register int type; 88 int proto; 89 { 90 struct proc *p = curproc; /* XXX */ 91 register struct protosw *prp; 92 register struct socket *so; 93 register int error; 94 95 if (proto) 96 prp = pffindproto(dom, proto, type); 97 else 98 prp = pffindtype(dom, type); 99 if (prp == 0 || prp->pr_usrreq == 0) 100 return (EPROTONOSUPPORT); 101 if (prp->pr_type != type) 102 return (EPROTOTYPE); 103 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 104 bzero((caddr_t)so, sizeof(*so)); 105 so->so_type = type; 106 if (p->p_ucred->cr_uid == 0) 107 so->so_state = SS_PRIV; 108 so->so_ruid = p->p_cred->p_ruid; 109 so->so_euid = p->p_ucred->cr_uid; 110 so->so_proto = prp; 111 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 112 (struct mbuf *)(long)proto, NULL); 113 if (error) { 114 so->so_state |= SS_NOFDREF; 115 sofree(so); 116 return (error); 117 } 118 #ifdef COMPAT_SUNOS 119 { 120 extern struct emul emul_sunos; 121 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 122 so->so_options |= SO_BROADCAST; 123 } 124 #endif 125 *aso = so; 126 return (0); 127 } 128 129 int 130 sobind(so, nam) 131 struct socket *so; 132 struct mbuf *nam; 133 { 134 int s = splsoftnet(); 135 int error; 136 137 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL); 138 splx(s); 139 return (error); 140 } 141 142 int 143 solisten(so, backlog) 144 register struct socket *so; 145 int backlog; 146 { 147 int s = splsoftnet(), error; 148 149 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL); 150 if (error) { 151 splx(s); 152 return (error); 153 } 154 if (so->so_q == 0) 155 so->so_options |= SO_ACCEPTCONN; 156 if (backlog < 0 || backlog > somaxconn) 157 backlog = somaxconn; 158 if (backlog < sominconn) 159 backlog = sominconn; 160 so->so_qlimit = backlog; 161 splx(s); 162 return (0); 163 } 164 165 void 166 sofree(so) 167 register struct socket *so; 168 { 169 170 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 171 return; 172 if (so->so_head) { 173 /* 174 * We must not decommission a socket that's on the accept(2) 175 * queue. If we do, then accept(2) may hang after select(2) 176 * indicated that the listening socket was ready. 177 */ 178 if (!soqremque(so, 0)) 179 return; 180 } 181 sbrelease(&so->so_snd); 182 sorflush(so); 183 FREE(so, M_SOCKET); 184 } 185 186 /* 187 * Close a socket on last file table reference removal. 188 * Initiate disconnect if connected. 189 * Free socket when disconnect complete. 190 */ 191 int 192 soclose(so) 193 register struct socket *so; 194 { 195 struct socket *so2; 196 int s = splsoftnet(); /* conservative */ 197 int error = 0; 198 199 if (so->so_options & SO_ACCEPTCONN) { 200 while ((so2 = so->so_q0) != NULL) { 201 (void) soqremque(so2, 0); 202 (void) soabort(so2); 203 } 204 while ((so2 = so->so_q) != NULL) { 205 (void) soqremque(so2, 1); 206 (void) soabort(so2); 207 } 208 } 209 if (so->so_pcb == 0) 210 goto discard; 211 if (so->so_state & SS_ISCONNECTED) { 212 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 213 error = sodisconnect(so); 214 if (error) 215 goto drop; 216 } 217 if (so->so_options & SO_LINGER) { 218 if ((so->so_state & SS_ISDISCONNECTING) && 219 (so->so_state & SS_NBIO)) 220 goto drop; 221 while (so->so_state & SS_ISCONNECTED) { 222 error = tsleep((caddr_t)&so->so_timeo, 223 PSOCK | PCATCH, netcls, 224 so->so_linger * hz); 225 if (error) 226 break; 227 } 228 } 229 } 230 drop: 231 if (so->so_pcb) { 232 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 233 NULL, NULL); 234 if (error == 0) 235 error = error2; 236 } 237 discard: 238 if (so->so_state & SS_NOFDREF) 239 panic("soclose: NOFDREF"); 240 so->so_state |= SS_NOFDREF; 241 sofree(so); 242 splx(s); 243 return (error); 244 } 245 246 /* 247 * Must be called at splsoftnet... 248 */ 249 int 250 soabort(so) 251 struct socket *so; 252 { 253 254 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL); 255 } 256 257 int 258 soaccept(so, nam) 259 register struct socket *so; 260 struct mbuf *nam; 261 { 262 int s = splsoftnet(); 263 int error = 0; 264 265 if ((so->so_state & SS_NOFDREF) == 0) 266 panic("soaccept: !NOFDREF"); 267 so->so_state &= ~SS_NOFDREF; 268 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 269 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 270 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 271 nam, NULL); 272 else 273 error = ECONNABORTED; 274 splx(s); 275 return (error); 276 } 277 278 int 279 soconnect(so, nam) 280 register struct socket *so; 281 struct mbuf *nam; 282 { 283 int s; 284 int error; 285 286 if (so->so_options & SO_ACCEPTCONN) 287 return (EOPNOTSUPP); 288 s = splsoftnet(); 289 /* 290 * If protocol is connection-based, can only connect once. 291 * Otherwise, if connected, try to disconnect first. 292 * This allows user to disconnect by connecting to, e.g., 293 * a null address. 294 */ 295 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 296 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 297 (error = sodisconnect(so)))) 298 error = EISCONN; 299 else 300 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 301 NULL, nam, NULL); 302 splx(s); 303 return (error); 304 } 305 306 int 307 soconnect2(so1, so2) 308 register struct socket *so1; 309 struct socket *so2; 310 { 311 int s = splsoftnet(); 312 int error; 313 314 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 315 (struct mbuf *)so2, NULL); 316 splx(s); 317 return (error); 318 } 319 320 int 321 sodisconnect(so) 322 register struct socket *so; 323 { 324 int s = splsoftnet(); 325 int error; 326 327 if ((so->so_state & SS_ISCONNECTED) == 0) { 328 error = ENOTCONN; 329 goto bad; 330 } 331 if (so->so_state & SS_ISDISCONNECTING) { 332 error = EALREADY; 333 goto bad; 334 } 335 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 336 NULL); 337 bad: 338 splx(s); 339 return (error); 340 } 341 342 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 343 /* 344 * Send on a socket. 345 * If send must go all at once and message is larger than 346 * send buffering, then hard error. 347 * Lock against other senders. 348 * If must go all at once and not enough room now, then 349 * inform user that this would block and do nothing. 350 * Otherwise, if nonblocking, send as much as possible. 351 * The data to be sent is described by "uio" if nonzero, 352 * otherwise by the mbuf chain "top" (which must be null 353 * if uio is not). Data provided in mbuf chain must be small 354 * enough to send all at once. 355 * 356 * Returns nonzero on error, timeout or signal; callers 357 * must check for short counts if EINTR/ERESTART are returned. 358 * Data and control buffers are freed on return. 359 */ 360 int 361 sosend(so, addr, uio, top, control, flags) 362 register struct socket *so; 363 struct mbuf *addr; 364 struct uio *uio; 365 struct mbuf *top; 366 struct mbuf *control; 367 int flags; 368 { 369 struct proc *p = curproc; /* XXX */ 370 struct mbuf **mp; 371 register struct mbuf *m; 372 register long space, len; 373 register quad_t resid; 374 int clen = 0, error, s, dontroute, mlen; 375 int atomic = sosendallatonce(so) || top; 376 377 if (uio) 378 resid = uio->uio_resid; 379 else 380 resid = top->m_pkthdr.len; 381 /* 382 * In theory resid should be unsigned (since uio->uio_resid is). 383 * However, space must be signed, as it might be less than 0 384 * if we over-committed, and we must use a signed comparison 385 * of space and resid. On the other hand, a negative resid 386 * causes us to loop sending 0-length segments to the protocol. 387 * MSG_EOR on a SOCK_STREAM socket is also invalid. 388 */ 389 if (resid < 0 || 390 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 391 error = EINVAL; 392 goto out; 393 } 394 dontroute = 395 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 396 (so->so_proto->pr_flags & PR_ATOMIC); 397 p->p_stats->p_ru.ru_msgsnd++; 398 if (control) 399 clen = control->m_len; 400 #define snderr(errno) { error = errno; splx(s); goto release; } 401 402 restart: 403 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 404 goto out; 405 do { 406 s = splsoftnet(); 407 if (so->so_state & SS_CANTSENDMORE) 408 snderr(EPIPE); 409 if (so->so_error) 410 snderr(so->so_error); 411 if ((so->so_state & SS_ISCONNECTED) == 0) { 412 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 413 if ((so->so_state & SS_ISCONFIRMING) == 0 && 414 !(resid == 0 && clen != 0)) 415 snderr(ENOTCONN); 416 } else if (addr == 0) 417 snderr(EDESTADDRREQ); 418 } 419 space = sbspace(&so->so_snd); 420 if (flags & MSG_OOB) 421 space += 1024; 422 if ((atomic && resid > so->so_snd.sb_hiwat) || 423 clen > so->so_snd.sb_hiwat) 424 snderr(EMSGSIZE); 425 if (space < resid + clen && uio && 426 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 427 if (so->so_state & SS_NBIO) 428 snderr(EWOULDBLOCK); 429 sbunlock(&so->so_snd); 430 error = sbwait(&so->so_snd); 431 splx(s); 432 if (error) 433 goto out; 434 goto restart; 435 } 436 splx(s); 437 mp = ⊤ 438 space -= clen; 439 do { 440 if (uio == NULL) { 441 /* 442 * Data is prepackaged in "top". 443 */ 444 resid = 0; 445 if (flags & MSG_EOR) 446 top->m_flags |= M_EOR; 447 } else do { 448 if (top == 0) { 449 MGETHDR(m, M_WAIT, MT_DATA); 450 mlen = MHLEN; 451 m->m_pkthdr.len = 0; 452 m->m_pkthdr.rcvif = (struct ifnet *)0; 453 } else { 454 MGET(m, M_WAIT, MT_DATA); 455 mlen = MLEN; 456 } 457 if (resid >= MINCLSIZE && space >= MCLBYTES) { 458 MCLGET(m, M_WAIT); 459 if ((m->m_flags & M_EXT) == 0) 460 goto nopages; 461 mlen = MCLBYTES; 462 #ifdef MAPPED_MBUFS 463 len = min(MCLBYTES, resid); 464 #else 465 if (atomic && top == 0) { 466 len = min(MCLBYTES - max_hdr, resid); 467 m->m_data += max_hdr; 468 } else 469 len = min(MCLBYTES, resid); 470 #endif 471 space -= len; 472 } else { 473 nopages: 474 len = min(min(mlen, resid), space); 475 space -= len; 476 /* 477 * For datagram protocols, leave room 478 * for protocol headers in first mbuf. 479 */ 480 if (atomic && top == 0 && len < mlen) 481 MH_ALIGN(m, len); 482 } 483 error = uiomove(mtod(m, caddr_t), (int)len, 484 uio); 485 resid = uio->uio_resid; 486 m->m_len = len; 487 *mp = m; 488 top->m_pkthdr.len += len; 489 if (error) 490 goto release; 491 mp = &m->m_next; 492 if (resid <= 0) { 493 if (flags & MSG_EOR) 494 top->m_flags |= M_EOR; 495 break; 496 } 497 } while (space > 0 && atomic); 498 if (dontroute) 499 so->so_options |= SO_DONTROUTE; 500 s = splsoftnet(); /* XXX */ 501 error = (*so->so_proto->pr_usrreq)(so, 502 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 503 top, addr, control); 504 splx(s); 505 if (dontroute) 506 so->so_options &= ~SO_DONTROUTE; 507 clen = 0; 508 control = 0; 509 top = 0; 510 mp = ⊤ 511 if (error) 512 goto release; 513 } while (resid && space > 0); 514 } while (resid); 515 516 release: 517 sbunlock(&so->so_snd); 518 out: 519 if (top) 520 m_freem(top); 521 if (control) 522 m_freem(control); 523 return (error); 524 } 525 526 /* 527 * Implement receive operations on a socket. 528 * We depend on the way that records are added to the sockbuf 529 * by sbappend*. In particular, each record (mbufs linked through m_next) 530 * must begin with an address if the protocol so specifies, 531 * followed by an optional mbuf or mbufs containing ancillary data, 532 * and then zero or more mbufs of data. 533 * In order to avoid blocking network interrupts for the entire time here, 534 * we splx() while doing the actual copy to user space. 535 * Although the sockbuf is locked, new data may still be appended, 536 * and thus we must maintain consistency of the sockbuf during that time. 537 * 538 * The caller may receive the data as a single mbuf chain by supplying 539 * an mbuf **mp0 for use in returning the chain. The uio is then used 540 * only for the count in uio_resid. 541 */ 542 int 543 soreceive(so, paddr, uio, mp0, controlp, flagsp) 544 register struct socket *so; 545 struct mbuf **paddr; 546 struct uio *uio; 547 struct mbuf **mp0; 548 struct mbuf **controlp; 549 int *flagsp; 550 { 551 register struct mbuf *m, **mp; 552 register int flags, len, error, s, offset; 553 struct protosw *pr = so->so_proto; 554 struct mbuf *nextrecord; 555 int moff, type = 0; 556 size_t orig_resid = uio->uio_resid; 557 int uio_error = 0; 558 int resid; 559 560 mp = mp0; 561 if (paddr) 562 *paddr = 0; 563 if (controlp) 564 *controlp = 0; 565 if (flagsp) 566 flags = *flagsp &~ MSG_EOR; 567 else 568 flags = 0; 569 if (so->so_state & SS_NBIO) 570 flags |= MSG_DONTWAIT; 571 if (flags & MSG_OOB) { 572 m = m_get(M_WAIT, MT_DATA); 573 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 574 (struct mbuf *)(long)(flags & MSG_PEEK), NULL); 575 if (error) 576 goto bad; 577 do { 578 error = uiomove(mtod(m, caddr_t), 579 (int) min(uio->uio_resid, m->m_len), uio); 580 m = m_free(m); 581 } while (uio->uio_resid && error == 0 && m); 582 bad: 583 if (m) 584 m_freem(m); 585 return (error); 586 } 587 if (mp) 588 *mp = (struct mbuf *)0; 589 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 590 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL); 591 592 restart: 593 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 594 return (error); 595 s = splsoftnet(); 596 597 m = so->so_rcv.sb_mb; 598 /* 599 * If we have less data than requested, block awaiting more 600 * (subject to any timeout) if: 601 * 1. the current count is less than the low water mark, 602 * 2. MSG_WAITALL is set, and it is possible to do the entire 603 * receive operation at once if we block (resid <= hiwat), or 604 * 3. MSG_DONTWAIT is not set. 605 * If MSG_WAITALL is set but resid is larger than the receive buffer, 606 * we have to do the receive in sections, and thus risk returning 607 * a short count if a timeout or signal occurs after we start. 608 */ 609 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 610 so->so_rcv.sb_cc < uio->uio_resid) && 611 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 612 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 613 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 614 #ifdef DIAGNOSTIC 615 if (m == 0 && so->so_rcv.sb_cc) 616 panic("receive 1"); 617 #endif 618 if (so->so_error) { 619 if (m) 620 goto dontblock; 621 error = so->so_error; 622 if ((flags & MSG_PEEK) == 0) 623 so->so_error = 0; 624 goto release; 625 } 626 if (so->so_state & SS_CANTRCVMORE) { 627 if (m) 628 goto dontblock; 629 else 630 goto release; 631 } 632 for (; m; m = m->m_next) 633 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 634 m = so->so_rcv.sb_mb; 635 goto dontblock; 636 } 637 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 638 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 639 error = ENOTCONN; 640 goto release; 641 } 642 if (uio->uio_resid == 0 && controlp == NULL) 643 goto release; 644 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 645 error = EWOULDBLOCK; 646 goto release; 647 } 648 sbunlock(&so->so_rcv); 649 error = sbwait(&so->so_rcv); 650 splx(s); 651 if (error) 652 return (error); 653 goto restart; 654 } 655 dontblock: 656 #ifdef notyet /* XXXX */ 657 if (uio->uio_procp) 658 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 659 #endif 660 nextrecord = m->m_nextpkt; 661 if (pr->pr_flags & PR_ADDR) { 662 #ifdef DIAGNOSTIC 663 if (m->m_type != MT_SONAME) 664 panic("receive 1a"); 665 #endif 666 orig_resid = 0; 667 if (flags & MSG_PEEK) { 668 if (paddr) 669 *paddr = m_copy(m, 0, m->m_len); 670 m = m->m_next; 671 } else { 672 sbfree(&so->so_rcv, m); 673 if (paddr) { 674 *paddr = m; 675 so->so_rcv.sb_mb = m->m_next; 676 m->m_next = 0; 677 m = so->so_rcv.sb_mb; 678 } else { 679 MFREE(m, so->so_rcv.sb_mb); 680 m = so->so_rcv.sb_mb; 681 } 682 } 683 } 684 while (m && m->m_type == MT_CONTROL && error == 0) { 685 if (flags & MSG_PEEK) { 686 if (controlp) 687 *controlp = m_copy(m, 0, m->m_len); 688 m = m->m_next; 689 } else { 690 sbfree(&so->so_rcv, m); 691 if (controlp) { 692 if (pr->pr_domain->dom_externalize && 693 mtod(m, struct cmsghdr *)->cmsg_type == 694 SCM_RIGHTS) 695 error = (*pr->pr_domain->dom_externalize)(m); 696 *controlp = m; 697 so->so_rcv.sb_mb = m->m_next; 698 m->m_next = 0; 699 m = so->so_rcv.sb_mb; 700 } else { 701 MFREE(m, so->so_rcv.sb_mb); 702 m = so->so_rcv.sb_mb; 703 } 704 } 705 if (controlp) { 706 orig_resid = 0; 707 controlp = &(*controlp)->m_next; 708 } 709 } 710 if (m) { 711 if ((flags & MSG_PEEK) == 0) 712 m->m_nextpkt = nextrecord; 713 type = m->m_type; 714 if (type == MT_OOBDATA) 715 flags |= MSG_OOB; 716 if (m->m_flags & M_BCAST) 717 flags |= MSG_BCAST; 718 if (m->m_flags & M_MCAST) 719 flags |= MSG_MCAST; 720 } 721 moff = 0; 722 offset = 0; 723 while (m && uio->uio_resid > 0 && error == 0) { 724 if (m->m_type == MT_OOBDATA) { 725 if (type != MT_OOBDATA) 726 break; 727 } else if (type == MT_OOBDATA) 728 break; 729 #ifdef DIAGNOSTIC 730 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 731 panic("receive 3"); 732 #endif 733 so->so_state &= ~SS_RCVATMARK; 734 len = uio->uio_resid; 735 if (so->so_oobmark && len > so->so_oobmark - offset) 736 len = so->so_oobmark - offset; 737 if (len > m->m_len - moff) 738 len = m->m_len - moff; 739 /* 740 * If mp is set, just pass back the mbufs. 741 * Otherwise copy them out via the uio, then free. 742 * Sockbuf must be consistent here (points to current mbuf, 743 * it points to next record) when we drop priority; 744 * we must note any additions to the sockbuf when we 745 * block interrupts again. 746 */ 747 if (mp == 0 && uio_error == 0) { 748 resid = uio->uio_resid; 749 splx(s); 750 uio_error = 751 uiomove(mtod(m, caddr_t) + moff, (int)len, 752 uio); 753 s = splsoftnet(); 754 if (uio_error) 755 uio->uio_resid = resid - len; 756 } else 757 uio->uio_resid -= len; 758 if (len == m->m_len - moff) { 759 if (m->m_flags & M_EOR) 760 flags |= MSG_EOR; 761 if (flags & MSG_PEEK) { 762 m = m->m_next; 763 moff = 0; 764 } else { 765 nextrecord = m->m_nextpkt; 766 sbfree(&so->so_rcv, m); 767 if (mp) { 768 *mp = m; 769 mp = &m->m_next; 770 so->so_rcv.sb_mb = m = m->m_next; 771 *mp = (struct mbuf *)0; 772 } else { 773 MFREE(m, so->so_rcv.sb_mb); 774 m = so->so_rcv.sb_mb; 775 } 776 if (m) 777 m->m_nextpkt = nextrecord; 778 } 779 } else { 780 if (flags & MSG_PEEK) 781 moff += len; 782 else { 783 if (mp) 784 *mp = m_copym(m, 0, len, M_WAIT); 785 m->m_data += len; 786 m->m_len -= len; 787 so->so_rcv.sb_cc -= len; 788 } 789 } 790 if (so->so_oobmark) { 791 if ((flags & MSG_PEEK) == 0) { 792 so->so_oobmark -= len; 793 if (so->so_oobmark == 0) { 794 so->so_state |= SS_RCVATMARK; 795 break; 796 } 797 } else { 798 offset += len; 799 if (offset == so->so_oobmark) 800 break; 801 } 802 } 803 if (flags & MSG_EOR) 804 break; 805 /* 806 * If the MSG_WAITALL flag is set (for non-atomic socket), 807 * we must not quit until "uio->uio_resid == 0" or an error 808 * termination. If a signal/timeout occurs, return 809 * with a short count but without error. 810 * Keep sockbuf locked against other readers. 811 */ 812 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 813 !sosendallatonce(so) && !nextrecord) { 814 if (so->so_error || so->so_state & SS_CANTRCVMORE) 815 break; 816 error = sbwait(&so->so_rcv); 817 if (error) { 818 sbunlock(&so->so_rcv); 819 splx(s); 820 return (0); 821 } 822 if ((m = so->so_rcv.sb_mb) != NULL) 823 nextrecord = m->m_nextpkt; 824 } 825 } 826 827 if (m && pr->pr_flags & PR_ATOMIC) { 828 flags |= MSG_TRUNC; 829 if ((flags & MSG_PEEK) == 0) 830 (void) sbdroprecord(&so->so_rcv); 831 } 832 if ((flags & MSG_PEEK) == 0) { 833 if (m == 0) 834 so->so_rcv.sb_mb = nextrecord; 835 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 836 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 837 (struct mbuf *)(long)flags, NULL); 838 } 839 if (orig_resid == uio->uio_resid && orig_resid && 840 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 841 sbunlock(&so->so_rcv); 842 splx(s); 843 goto restart; 844 } 845 846 if (uio_error) 847 error = uio_error; 848 849 if (flagsp) 850 *flagsp |= flags; 851 release: 852 sbunlock(&so->so_rcv); 853 splx(s); 854 return (error); 855 } 856 857 int 858 soshutdown(so, how) 859 register struct socket *so; 860 register int how; 861 { 862 register struct protosw *pr = so->so_proto; 863 864 how++; 865 if (how & ~(FREAD|FWRITE)) 866 return (EINVAL); 867 if (how & FREAD) 868 sorflush(so); 869 if (how & FWRITE) 870 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL); 871 return (0); 872 } 873 874 void 875 sorflush(so) 876 register struct socket *so; 877 { 878 register struct sockbuf *sb = &so->so_rcv; 879 register struct protosw *pr = so->so_proto; 880 register int s; 881 struct sockbuf asb; 882 883 sb->sb_flags |= SB_NOINTR; 884 (void) sblock(sb, M_WAITOK); 885 s = splimp(); 886 socantrcvmore(so); 887 sbunlock(sb); 888 asb = *sb; 889 bzero((caddr_t)sb, sizeof (*sb)); 890 /* XXX - the bzero stumps all over so_rcv */ 891 if (asb.sb_flags & SB_KNOTE) { 892 sb->sb_sel.si_note = asb.sb_sel.si_note; 893 sb->sb_flags = SB_KNOTE; 894 } 895 splx(s); 896 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 897 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 898 sbrelease(&asb); 899 } 900 901 int 902 sosetopt(so, level, optname, m0) 903 register struct socket *so; 904 int level, optname; 905 struct mbuf *m0; 906 { 907 int error = 0; 908 register struct mbuf *m = m0; 909 910 if (level != SOL_SOCKET) { 911 if (so->so_proto && so->so_proto->pr_ctloutput) 912 return ((*so->so_proto->pr_ctloutput) 913 (PRCO_SETOPT, so, level, optname, &m0)); 914 error = ENOPROTOOPT; 915 } else { 916 switch (optname) { 917 918 case SO_LINGER: 919 if (m == NULL || m->m_len != sizeof (struct linger)) { 920 error = EINVAL; 921 goto bad; 922 } 923 so->so_linger = mtod(m, struct linger *)->l_linger; 924 /* fall thru... */ 925 926 case SO_DEBUG: 927 case SO_KEEPALIVE: 928 case SO_DONTROUTE: 929 case SO_USELOOPBACK: 930 case SO_BROADCAST: 931 case SO_REUSEADDR: 932 case SO_REUSEPORT: 933 case SO_OOBINLINE: 934 if (m == NULL || m->m_len < sizeof (int)) { 935 error = EINVAL; 936 goto bad; 937 } 938 if (*mtod(m, int *)) 939 so->so_options |= optname; 940 else 941 so->so_options &= ~optname; 942 break; 943 944 case SO_SNDBUF: 945 case SO_RCVBUF: 946 case SO_SNDLOWAT: 947 case SO_RCVLOWAT: 948 { 949 u_long cnt; 950 951 if (m == NULL || m->m_len < sizeof (int)) { 952 error = EINVAL; 953 goto bad; 954 } 955 cnt = *mtod(m, int *); 956 if ((long)cnt <= 0) 957 cnt = 1; 958 switch (optname) { 959 960 case SO_SNDBUF: 961 case SO_RCVBUF: 962 if (sbreserve(optname == SO_SNDBUF ? 963 &so->so_snd : &so->so_rcv, 964 cnt) == 0) { 965 error = ENOBUFS; 966 goto bad; 967 } 968 break; 969 970 case SO_SNDLOWAT: 971 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 972 so->so_snd.sb_hiwat : cnt; 973 break; 974 case SO_RCVLOWAT: 975 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 976 so->so_rcv.sb_hiwat : cnt; 977 break; 978 } 979 break; 980 } 981 982 case SO_SNDTIMEO: 983 case SO_RCVTIMEO: 984 { 985 struct timeval *tv; 986 short val; 987 988 if (m == NULL || m->m_len < sizeof (*tv)) { 989 error = EINVAL; 990 goto bad; 991 } 992 tv = mtod(m, struct timeval *); 993 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 994 error = EDOM; 995 goto bad; 996 } 997 val = tv->tv_sec * hz + tv->tv_usec / tick; 998 999 switch (optname) { 1000 1001 case SO_SNDTIMEO: 1002 so->so_snd.sb_timeo = val; 1003 break; 1004 case SO_RCVTIMEO: 1005 so->so_rcv.sb_timeo = val; 1006 break; 1007 } 1008 break; 1009 } 1010 1011 default: 1012 error = ENOPROTOOPT; 1013 break; 1014 } 1015 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1016 (void) ((*so->so_proto->pr_ctloutput) 1017 (PRCO_SETOPT, so, level, optname, &m0)); 1018 m = NULL; /* freed by protocol */ 1019 } 1020 } 1021 bad: 1022 if (m) 1023 (void) m_free(m); 1024 return (error); 1025 } 1026 1027 int 1028 sogetopt(so, level, optname, mp) 1029 register struct socket *so; 1030 int level, optname; 1031 struct mbuf **mp; 1032 { 1033 register struct mbuf *m; 1034 1035 if (level != SOL_SOCKET) { 1036 if (so->so_proto && so->so_proto->pr_ctloutput) { 1037 return ((*so->so_proto->pr_ctloutput) 1038 (PRCO_GETOPT, so, level, optname, mp)); 1039 } else 1040 return (ENOPROTOOPT); 1041 } else { 1042 m = m_get(M_WAIT, MT_SOOPTS); 1043 m->m_len = sizeof (int); 1044 1045 switch (optname) { 1046 1047 case SO_LINGER: 1048 m->m_len = sizeof (struct linger); 1049 mtod(m, struct linger *)->l_onoff = 1050 so->so_options & SO_LINGER; 1051 mtod(m, struct linger *)->l_linger = so->so_linger; 1052 break; 1053 1054 case SO_USELOOPBACK: 1055 case SO_DONTROUTE: 1056 case SO_DEBUG: 1057 case SO_KEEPALIVE: 1058 case SO_REUSEADDR: 1059 case SO_REUSEPORT: 1060 case SO_BROADCAST: 1061 case SO_OOBINLINE: 1062 *mtod(m, int *) = so->so_options & optname; 1063 break; 1064 1065 case SO_TYPE: 1066 *mtod(m, int *) = so->so_type; 1067 break; 1068 1069 case SO_ERROR: 1070 *mtod(m, int *) = so->so_error; 1071 so->so_error = 0; 1072 break; 1073 1074 case SO_SNDBUF: 1075 *mtod(m, int *) = so->so_snd.sb_hiwat; 1076 break; 1077 1078 case SO_RCVBUF: 1079 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1080 break; 1081 1082 case SO_SNDLOWAT: 1083 *mtod(m, int *) = so->so_snd.sb_lowat; 1084 break; 1085 1086 case SO_RCVLOWAT: 1087 *mtod(m, int *) = so->so_rcv.sb_lowat; 1088 break; 1089 1090 case SO_SNDTIMEO: 1091 case SO_RCVTIMEO: 1092 { 1093 int val = (optname == SO_SNDTIMEO ? 1094 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1095 1096 m->m_len = sizeof(struct timeval); 1097 mtod(m, struct timeval *)->tv_sec = val / hz; 1098 mtod(m, struct timeval *)->tv_usec = 1099 (val % hz) * tick; 1100 break; 1101 } 1102 1103 default: 1104 (void)m_free(m); 1105 return (ENOPROTOOPT); 1106 } 1107 *mp = m; 1108 return (0); 1109 } 1110 } 1111 1112 void 1113 sohasoutofband(so) 1114 register struct socket *so; 1115 { 1116 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1117 selwakeup(&so->so_rcv.sb_sel); 1118 } 1119 1120 int 1121 soo_kqfilter(struct file *fp, struct knote *kn) 1122 { 1123 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1124 struct sockbuf *sb; 1125 int s; 1126 1127 switch (kn->kn_filter) { 1128 case EVFILT_READ: 1129 if (so->so_options & SO_ACCEPTCONN) 1130 kn->kn_fop = &solisten_filtops; 1131 else 1132 kn->kn_fop = &soread_filtops; 1133 sb = &so->so_rcv; 1134 break; 1135 case EVFILT_WRITE: 1136 kn->kn_fop = &sowrite_filtops; 1137 sb = &so->so_snd; 1138 break; 1139 default: 1140 return (1); 1141 } 1142 1143 s = splnet(); 1144 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1145 sb->sb_flags |= SB_KNOTE; 1146 splx(s); 1147 return (0); 1148 } 1149 1150 void 1151 filt_sordetach(struct knote *kn) 1152 { 1153 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1154 int s = splnet(); 1155 1156 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1157 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1158 so->so_rcv.sb_flags &= ~SB_KNOTE; 1159 splx(s); 1160 } 1161 1162 /*ARGSUSED*/ 1163 int 1164 filt_soread(struct knote *kn, long hint) 1165 { 1166 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1167 1168 kn->kn_data = so->so_rcv.sb_cc; 1169 if (so->so_state & SS_CANTRCVMORE) { 1170 kn->kn_flags |= EV_EOF; 1171 kn->kn_fflags = so->so_error; 1172 return (1); 1173 } 1174 if (so->so_error) /* temporary udp error */ 1175 return (1); 1176 if (kn->kn_sfflags & NOTE_LOWAT) 1177 return (kn->kn_data >= kn->kn_sdata); 1178 return (kn->kn_data >= so->so_rcv.sb_lowat); 1179 } 1180 1181 void 1182 filt_sowdetach(struct knote *kn) 1183 { 1184 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1185 int s = splnet(); 1186 1187 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1188 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1189 so->so_snd.sb_flags &= ~SB_KNOTE; 1190 splx(s); 1191 } 1192 1193 /*ARGSUSED*/ 1194 int 1195 filt_sowrite(struct knote *kn, long hint) 1196 { 1197 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1198 1199 kn->kn_data = sbspace(&so->so_snd); 1200 if (so->so_state & SS_CANTSENDMORE) { 1201 kn->kn_flags |= EV_EOF; 1202 kn->kn_fflags = so->so_error; 1203 return (1); 1204 } 1205 if (so->so_error) /* temporary udp error */ 1206 return (1); 1207 if (((so->so_state & SS_ISCONNECTED) == 0) && 1208 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1209 return (0); 1210 if (kn->kn_sfflags & NOTE_LOWAT) 1211 return (kn->kn_data >= kn->kn_sdata); 1212 return (kn->kn_data >= so->so_snd.sb_lowat); 1213 } 1214 1215 /*ARGSUSED*/ 1216 int 1217 filt_solisten(struct knote *kn, long hint) 1218 { 1219 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1220 1221 kn->kn_data = so->so_qlen; 1222 return (so->so_qlen != 0); 1223 } 1224