1 /* $OpenBSD: uipc_socket.c,v 1.41 2002/02/05 22:04:43 nordin Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/file.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/domain.h> 46 #include <sys/kernel.h> 47 #include <sys/event.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 void filt_sordetach(struct knote *kn); 56 int filt_soread(struct knote *kn, long hint); 57 void filt_sowdetach(struct knote *kn); 58 int filt_sowrite(struct knote *kn, long hint); 59 int filt_solisten(struct knote *kn, long hint); 60 61 struct filterops solisten_filtops = 62 { 1, NULL, filt_sordetach, filt_solisten }; 63 struct filterops soread_filtops = 64 { 1, NULL, filt_sordetach, filt_soread }; 65 struct filterops sowrite_filtops = 66 { 1, NULL, filt_sowdetach, filt_sowrite }; 67 68 69 #ifndef SOMINCONN 70 #define SOMINCONN 80 71 #endif /* SOMINCONN */ 72 73 int somaxconn = SOMAXCONN; 74 int sominconn = SOMINCONN; 75 76 struct pool socket_pool; 77 78 void 79 soinit(void) 80 { 81 82 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 83 } 84 85 /* 86 * Socket operation routines. 87 * These routines are called by the routines in 88 * sys_socket.c or from a system process, and 89 * implement the semantics of socket operations by 90 * switching out to the protocol specific routines. 91 */ 92 /*ARGSUSED*/ 93 int 94 socreate(dom, aso, type, proto) 95 int dom; 96 struct socket **aso; 97 register int type; 98 int proto; 99 { 100 struct proc *p = curproc; /* XXX */ 101 struct protosw *prp; 102 struct socket *so; 103 int error, s; 104 105 if (proto) 106 prp = pffindproto(dom, proto, type); 107 else 108 prp = pffindtype(dom, type); 109 if (prp == 0 || prp->pr_usrreq == 0) 110 return (EPROTONOSUPPORT); 111 if (prp->pr_type != type) 112 return (EPROTOTYPE); 113 s = splsoftnet(); 114 so = pool_get(&socket_pool, PR_WAITOK); 115 bzero((caddr_t)so, sizeof(*so)); 116 TAILQ_INIT(&so->so_q0); 117 TAILQ_INIT(&so->so_q); 118 so->so_type = type; 119 if (p->p_ucred->cr_uid == 0) 120 so->so_state = SS_PRIV; 121 so->so_ruid = p->p_cred->p_ruid; 122 so->so_euid = p->p_ucred->cr_uid; 123 so->so_proto = prp; 124 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 125 (struct mbuf *)(long)proto, NULL); 126 if (error) { 127 so->so_state |= SS_NOFDREF; 128 sofree(so); 129 splx(s); 130 return (error); 131 } 132 #ifdef COMPAT_SUNOS 133 { 134 extern struct emul emul_sunos; 135 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 136 so->so_options |= SO_BROADCAST; 137 } 138 #endif 139 splx(s); 140 *aso = so; 141 return (0); 142 } 143 144 int 145 sobind(so, nam) 146 struct socket *so; 147 struct mbuf *nam; 148 { 149 int s = splsoftnet(); 150 int error; 151 152 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL); 153 splx(s); 154 return (error); 155 } 156 157 int 158 solisten(so, backlog) 159 register struct socket *so; 160 int backlog; 161 { 162 int s = splsoftnet(), error; 163 164 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL); 165 if (error) { 166 splx(s); 167 return (error); 168 } 169 if (TAILQ_FIRST(&so->so_q) == NULL) 170 so->so_options |= SO_ACCEPTCONN; 171 if (backlog < 0 || backlog > somaxconn) 172 backlog = somaxconn; 173 if (backlog < sominconn) 174 backlog = sominconn; 175 so->so_qlimit = backlog; 176 splx(s); 177 return (0); 178 } 179 180 /* 181 * Must be called at splsoftnet() 182 */ 183 184 void 185 sofree(so) 186 register struct socket *so; 187 { 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (so->so_head) { 192 /* 193 * We must not decommission a socket that's on the accept(2) 194 * queue. If we do, then accept(2) may hang after select(2) 195 * indicated that the listening socket was ready. 196 */ 197 if (!soqremque(so, 0)) 198 return; 199 } 200 sbrelease(&so->so_snd); 201 sorflush(so); 202 pool_put(&socket_pool, so); 203 } 204 205 /* 206 * Close a socket on last file table reference removal. 207 * Initiate disconnect if connected. 208 * Free socket when disconnect complete. 209 */ 210 int 211 soclose(so) 212 register struct socket *so; 213 { 214 struct socket *so2; 215 int s = splsoftnet(); /* conservative */ 216 int error = 0; 217 218 if (so->so_options & SO_ACCEPTCONN) { 219 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 220 (void) soqremque(so2, 0); 221 (void) soabort(so2); 222 } 223 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 224 (void) soqremque(so2, 1); 225 (void) soabort(so2); 226 } 227 } 228 if (so->so_pcb == 0) 229 goto discard; 230 if (so->so_state & SS_ISCONNECTED) { 231 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 232 error = sodisconnect(so); 233 if (error) 234 goto drop; 235 } 236 if (so->so_options & SO_LINGER) { 237 if ((so->so_state & SS_ISDISCONNECTING) && 238 (so->so_state & SS_NBIO)) 239 goto drop; 240 while (so->so_state & SS_ISCONNECTED) { 241 error = tsleep((caddr_t)&so->so_timeo, 242 PSOCK | PCATCH, netcls, 243 so->so_linger * hz); 244 if (error) 245 break; 246 } 247 } 248 } 249 drop: 250 if (so->so_pcb) { 251 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 252 NULL, NULL); 253 if (error == 0) 254 error = error2; 255 } 256 discard: 257 if (so->so_state & SS_NOFDREF) 258 panic("soclose: NOFDREF"); 259 so->so_state |= SS_NOFDREF; 260 sofree(so); 261 splx(s); 262 return (error); 263 } 264 265 /* 266 * Must be called at splsoftnet... 267 */ 268 int 269 soabort(so) 270 struct socket *so; 271 { 272 273 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL); 274 } 275 276 int 277 soaccept(so, nam) 278 register struct socket *so; 279 struct mbuf *nam; 280 { 281 int s = splsoftnet(); 282 int error = 0; 283 284 if ((so->so_state & SS_NOFDREF) == 0) 285 panic("soaccept: !NOFDREF"); 286 so->so_state &= ~SS_NOFDREF; 287 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 288 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 289 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 290 nam, NULL); 291 else 292 error = ECONNABORTED; 293 splx(s); 294 return (error); 295 } 296 297 int 298 soconnect(so, nam) 299 register struct socket *so; 300 struct mbuf *nam; 301 { 302 int s; 303 int error; 304 305 if (so->so_options & SO_ACCEPTCONN) 306 return (EOPNOTSUPP); 307 s = splsoftnet(); 308 /* 309 * If protocol is connection-based, can only connect once. 310 * Otherwise, if connected, try to disconnect first. 311 * This allows user to disconnect by connecting to, e.g., 312 * a null address. 313 */ 314 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 315 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 316 (error = sodisconnect(so)))) 317 error = EISCONN; 318 else 319 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 320 NULL, nam, NULL); 321 splx(s); 322 return (error); 323 } 324 325 int 326 soconnect2(so1, so2) 327 register struct socket *so1; 328 struct socket *so2; 329 { 330 int s = splsoftnet(); 331 int error; 332 333 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 334 (struct mbuf *)so2, NULL); 335 splx(s); 336 return (error); 337 } 338 339 int 340 sodisconnect(so) 341 register struct socket *so; 342 { 343 int s = splsoftnet(); 344 int error; 345 346 if ((so->so_state & SS_ISCONNECTED) == 0) { 347 error = ENOTCONN; 348 goto bad; 349 } 350 if (so->so_state & SS_ISDISCONNECTING) { 351 error = EALREADY; 352 goto bad; 353 } 354 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 355 NULL); 356 bad: 357 splx(s); 358 return (error); 359 } 360 361 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 362 /* 363 * Send on a socket. 364 * If send must go all at once and message is larger than 365 * send buffering, then hard error. 366 * Lock against other senders. 367 * If must go all at once and not enough room now, then 368 * inform user that this would block and do nothing. 369 * Otherwise, if nonblocking, send as much as possible. 370 * The data to be sent is described by "uio" if nonzero, 371 * otherwise by the mbuf chain "top" (which must be null 372 * if uio is not). Data provided in mbuf chain must be small 373 * enough to send all at once. 374 * 375 * Returns nonzero on error, timeout or signal; callers 376 * must check for short counts if EINTR/ERESTART are returned. 377 * Data and control buffers are freed on return. 378 */ 379 int 380 sosend(so, addr, uio, top, control, flags) 381 register struct socket *so; 382 struct mbuf *addr; 383 struct uio *uio; 384 struct mbuf *top; 385 struct mbuf *control; 386 int flags; 387 { 388 struct proc *p = curproc; /* XXX */ 389 struct mbuf **mp; 390 struct mbuf *m; 391 long space, len, mlen, clen = 0; 392 quad_t resid; 393 int error, s, dontroute; 394 int atomic = sosendallatonce(so) || top; 395 396 if (uio) 397 resid = uio->uio_resid; 398 else 399 resid = top->m_pkthdr.len; 400 /* 401 * In theory resid should be unsigned (since uio->uio_resid is). 402 * However, space must be signed, as it might be less than 0 403 * if we over-committed, and we must use a signed comparison 404 * of space and resid. On the other hand, a negative resid 405 * causes us to loop sending 0-length segments to the protocol. 406 * MSG_EOR on a SOCK_STREAM socket is also invalid. 407 */ 408 if (resid < 0 || 409 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 410 error = EINVAL; 411 goto out; 412 } 413 dontroute = 414 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 415 (so->so_proto->pr_flags & PR_ATOMIC); 416 p->p_stats->p_ru.ru_msgsnd++; 417 if (control) 418 clen = control->m_len; 419 #define snderr(errno) { error = errno; splx(s); goto release; } 420 421 restart: 422 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 423 goto out; 424 do { 425 s = splsoftnet(); 426 if (so->so_state & SS_CANTSENDMORE) 427 snderr(EPIPE); 428 if (so->so_error) { 429 error = so->so_error; 430 so->so_error = 0; 431 splx(s); 432 goto release; 433 } 434 if ((so->so_state & SS_ISCONNECTED) == 0) { 435 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 436 if ((so->so_state & SS_ISCONFIRMING) == 0 && 437 !(resid == 0 && clen != 0)) 438 snderr(ENOTCONN); 439 } else if (addr == 0) 440 snderr(EDESTADDRREQ); 441 } 442 space = sbspace(&so->so_snd); 443 if (flags & MSG_OOB) 444 space += 1024; 445 if ((atomic && resid > so->so_snd.sb_hiwat) || 446 clen > so->so_snd.sb_hiwat) 447 snderr(EMSGSIZE); 448 if (space < resid + clen && uio && 449 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 450 if (so->so_state & SS_NBIO) 451 snderr(EWOULDBLOCK); 452 sbunlock(&so->so_snd); 453 error = sbwait(&so->so_snd); 454 splx(s); 455 if (error) 456 goto out; 457 goto restart; 458 } 459 splx(s); 460 mp = ⊤ 461 space -= clen; 462 do { 463 if (uio == NULL) { 464 /* 465 * Data is prepackaged in "top". 466 */ 467 resid = 0; 468 if (flags & MSG_EOR) 469 top->m_flags |= M_EOR; 470 } else do { 471 if (top == 0) { 472 MGETHDR(m, M_WAIT, MT_DATA); 473 mlen = MHLEN; 474 m->m_pkthdr.len = 0; 475 m->m_pkthdr.rcvif = (struct ifnet *)0; 476 } else { 477 MGET(m, M_WAIT, MT_DATA); 478 mlen = MLEN; 479 } 480 if (resid >= MINCLSIZE && space >= MCLBYTES) { 481 MCLGET(m, M_WAIT); 482 if ((m->m_flags & M_EXT) == 0) 483 goto nopages; 484 mlen = MCLBYTES; 485 if (atomic && top == 0) { 486 len = lmin(MCLBYTES - max_hdr, resid); 487 m->m_data += max_hdr; 488 } else 489 len = lmin(MCLBYTES, resid); 490 space -= len; 491 } else { 492 nopages: 493 len = lmin(lmin(mlen, resid), space); 494 space -= len; 495 /* 496 * For datagram protocols, leave room 497 * for protocol headers in first mbuf. 498 */ 499 if (atomic && top == 0 && len < mlen) 500 MH_ALIGN(m, len); 501 } 502 error = uiomove(mtod(m, caddr_t), (int)len, 503 uio); 504 resid = uio->uio_resid; 505 m->m_len = len; 506 *mp = m; 507 top->m_pkthdr.len += len; 508 if (error) 509 goto release; 510 mp = &m->m_next; 511 if (resid <= 0) { 512 if (flags & MSG_EOR) 513 top->m_flags |= M_EOR; 514 break; 515 } 516 } while (space > 0 && atomic); 517 if (dontroute) 518 so->so_options |= SO_DONTROUTE; 519 s = splsoftnet(); /* XXX */ 520 error = (*so->so_proto->pr_usrreq)(so, 521 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 522 top, addr, control); 523 splx(s); 524 if (dontroute) 525 so->so_options &= ~SO_DONTROUTE; 526 clen = 0; 527 control = 0; 528 top = 0; 529 mp = ⊤ 530 if (error) 531 goto release; 532 } while (resid && space > 0); 533 } while (resid); 534 535 release: 536 sbunlock(&so->so_snd); 537 out: 538 if (top) 539 m_freem(top); 540 if (control) 541 m_freem(control); 542 return (error); 543 } 544 545 /* 546 * Implement receive operations on a socket. 547 * We depend on the way that records are added to the sockbuf 548 * by sbappend*. In particular, each record (mbufs linked through m_next) 549 * must begin with an address if the protocol so specifies, 550 * followed by an optional mbuf or mbufs containing ancillary data, 551 * and then zero or more mbufs of data. 552 * In order to avoid blocking network interrupts for the entire time here, 553 * we splx() while doing the actual copy to user space. 554 * Although the sockbuf is locked, new data may still be appended, 555 * and thus we must maintain consistency of the sockbuf during that time. 556 * 557 * The caller may receive the data as a single mbuf chain by supplying 558 * an mbuf **mp0 for use in returning the chain. The uio is then used 559 * only for the count in uio_resid. 560 */ 561 int 562 soreceive(so, paddr, uio, mp0, controlp, flagsp) 563 register struct socket *so; 564 struct mbuf **paddr; 565 struct uio *uio; 566 struct mbuf **mp0; 567 struct mbuf **controlp; 568 int *flagsp; 569 { 570 register struct mbuf *m, **mp; 571 register int flags, len, error, s, offset; 572 struct protosw *pr = so->so_proto; 573 struct mbuf *nextrecord; 574 int moff, type = 0; 575 size_t orig_resid = uio->uio_resid; 576 int uio_error = 0; 577 int resid; 578 579 mp = mp0; 580 if (paddr) 581 *paddr = 0; 582 if (controlp) 583 *controlp = 0; 584 if (flagsp) 585 flags = *flagsp &~ MSG_EOR; 586 else 587 flags = 0; 588 if (so->so_state & SS_NBIO) 589 flags |= MSG_DONTWAIT; 590 if (flags & MSG_OOB) { 591 m = m_get(M_WAIT, MT_DATA); 592 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 593 (struct mbuf *)(long)(flags & MSG_PEEK), NULL); 594 if (error) 595 goto bad; 596 do { 597 error = uiomove(mtod(m, caddr_t), 598 (int) min(uio->uio_resid, m->m_len), uio); 599 m = m_free(m); 600 } while (uio->uio_resid && error == 0 && m); 601 bad: 602 if (m) 603 m_freem(m); 604 return (error); 605 } 606 if (mp) 607 *mp = (struct mbuf *)0; 608 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 609 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL); 610 611 restart: 612 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 613 return (error); 614 s = splsoftnet(); 615 616 m = so->so_rcv.sb_mb; 617 /* 618 * If we have less data than requested, block awaiting more 619 * (subject to any timeout) if: 620 * 1. the current count is less than the low water mark, 621 * 2. MSG_WAITALL is set, and it is possible to do the entire 622 * receive operation at once if we block (resid <= hiwat), or 623 * 3. MSG_DONTWAIT is not set. 624 * If MSG_WAITALL is set but resid is larger than the receive buffer, 625 * we have to do the receive in sections, and thus risk returning 626 * a short count if a timeout or signal occurs after we start. 627 */ 628 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 629 so->so_rcv.sb_cc < uio->uio_resid) && 630 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 631 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 632 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 633 #ifdef DIAGNOSTIC 634 if (m == 0 && so->so_rcv.sb_cc) 635 panic("receive 1"); 636 #endif 637 if (so->so_error) { 638 if (m) 639 goto dontblock; 640 error = so->so_error; 641 if ((flags & MSG_PEEK) == 0) 642 so->so_error = 0; 643 goto release; 644 } 645 if (so->so_state & SS_CANTRCVMORE) { 646 if (m) 647 goto dontblock; 648 else 649 goto release; 650 } 651 for (; m; m = m->m_next) 652 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 653 m = so->so_rcv.sb_mb; 654 goto dontblock; 655 } 656 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 657 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 658 error = ENOTCONN; 659 goto release; 660 } 661 if (uio->uio_resid == 0 && controlp == NULL) 662 goto release; 663 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 664 error = EWOULDBLOCK; 665 goto release; 666 } 667 sbunlock(&so->so_rcv); 668 error = sbwait(&so->so_rcv); 669 splx(s); 670 if (error) 671 return (error); 672 goto restart; 673 } 674 dontblock: 675 #ifdef notyet /* XXXX */ 676 if (uio->uio_procp) 677 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 678 #endif 679 nextrecord = m->m_nextpkt; 680 if (pr->pr_flags & PR_ADDR) { 681 #ifdef DIAGNOSTIC 682 if (m->m_type != MT_SONAME) 683 panic("receive 1a"); 684 #endif 685 orig_resid = 0; 686 if (flags & MSG_PEEK) { 687 if (paddr) 688 *paddr = m_copy(m, 0, m->m_len); 689 m = m->m_next; 690 } else { 691 sbfree(&so->so_rcv, m); 692 if (paddr) { 693 *paddr = m; 694 so->so_rcv.sb_mb = m->m_next; 695 m->m_next = 0; 696 m = so->so_rcv.sb_mb; 697 } else { 698 MFREE(m, so->so_rcv.sb_mb); 699 m = so->so_rcv.sb_mb; 700 } 701 } 702 } 703 while (m && m->m_type == MT_CONTROL && error == 0) { 704 if (flags & MSG_PEEK) { 705 if (controlp) 706 *controlp = m_copy(m, 0, m->m_len); 707 m = m->m_next; 708 } else { 709 sbfree(&so->so_rcv, m); 710 if (controlp) { 711 if (pr->pr_domain->dom_externalize && 712 mtod(m, struct cmsghdr *)->cmsg_type == 713 SCM_RIGHTS) 714 error = (*pr->pr_domain->dom_externalize)(m); 715 *controlp = m; 716 so->so_rcv.sb_mb = m->m_next; 717 m->m_next = 0; 718 m = so->so_rcv.sb_mb; 719 } else { 720 MFREE(m, so->so_rcv.sb_mb); 721 m = so->so_rcv.sb_mb; 722 } 723 } 724 if (controlp) { 725 orig_resid = 0; 726 controlp = &(*controlp)->m_next; 727 } 728 } 729 if (m) { 730 if ((flags & MSG_PEEK) == 0) 731 m->m_nextpkt = nextrecord; 732 type = m->m_type; 733 if (type == MT_OOBDATA) 734 flags |= MSG_OOB; 735 if (m->m_flags & M_BCAST) 736 flags |= MSG_BCAST; 737 if (m->m_flags & M_MCAST) 738 flags |= MSG_MCAST; 739 } 740 moff = 0; 741 offset = 0; 742 while (m && uio->uio_resid > 0 && error == 0) { 743 if (m->m_type == MT_OOBDATA) { 744 if (type != MT_OOBDATA) 745 break; 746 } else if (type == MT_OOBDATA) 747 break; 748 #ifdef DIAGNOSTIC 749 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 750 panic("receive 3"); 751 #endif 752 so->so_state &= ~SS_RCVATMARK; 753 len = uio->uio_resid; 754 if (so->so_oobmark && len > so->so_oobmark - offset) 755 len = so->so_oobmark - offset; 756 if (len > m->m_len - moff) 757 len = m->m_len - moff; 758 /* 759 * If mp is set, just pass back the mbufs. 760 * Otherwise copy them out via the uio, then free. 761 * Sockbuf must be consistent here (points to current mbuf, 762 * it points to next record) when we drop priority; 763 * we must note any additions to the sockbuf when we 764 * block interrupts again. 765 */ 766 if (mp == 0 && uio_error == 0) { 767 resid = uio->uio_resid; 768 splx(s); 769 uio_error = 770 uiomove(mtod(m, caddr_t) + moff, (int)len, 771 uio); 772 s = splsoftnet(); 773 if (uio_error) 774 uio->uio_resid = resid - len; 775 } else 776 uio->uio_resid -= len; 777 if (len == m->m_len - moff) { 778 if (m->m_flags & M_EOR) 779 flags |= MSG_EOR; 780 if (flags & MSG_PEEK) { 781 m = m->m_next; 782 moff = 0; 783 } else { 784 nextrecord = m->m_nextpkt; 785 sbfree(&so->so_rcv, m); 786 if (mp) { 787 *mp = m; 788 mp = &m->m_next; 789 so->so_rcv.sb_mb = m = m->m_next; 790 *mp = (struct mbuf *)0; 791 } else { 792 MFREE(m, so->so_rcv.sb_mb); 793 m = so->so_rcv.sb_mb; 794 } 795 if (m) 796 m->m_nextpkt = nextrecord; 797 } 798 } else { 799 if (flags & MSG_PEEK) 800 moff += len; 801 else { 802 if (mp) 803 *mp = m_copym(m, 0, len, M_WAIT); 804 m->m_data += len; 805 m->m_len -= len; 806 so->so_rcv.sb_cc -= len; 807 } 808 } 809 if (so->so_oobmark) { 810 if ((flags & MSG_PEEK) == 0) { 811 so->so_oobmark -= len; 812 if (so->so_oobmark == 0) { 813 so->so_state |= SS_RCVATMARK; 814 break; 815 } 816 } else { 817 offset += len; 818 if (offset == so->so_oobmark) 819 break; 820 } 821 } 822 if (flags & MSG_EOR) 823 break; 824 /* 825 * If the MSG_WAITALL flag is set (for non-atomic socket), 826 * we must not quit until "uio->uio_resid == 0" or an error 827 * termination. If a signal/timeout occurs, return 828 * with a short count but without error. 829 * Keep sockbuf locked against other readers. 830 */ 831 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 832 !sosendallatonce(so) && !nextrecord) { 833 if (so->so_error || so->so_state & SS_CANTRCVMORE) 834 break; 835 error = sbwait(&so->so_rcv); 836 if (error) { 837 sbunlock(&so->so_rcv); 838 splx(s); 839 return (0); 840 } 841 if ((m = so->so_rcv.sb_mb) != NULL) 842 nextrecord = m->m_nextpkt; 843 } 844 } 845 846 if (m && pr->pr_flags & PR_ATOMIC) { 847 flags |= MSG_TRUNC; 848 if ((flags & MSG_PEEK) == 0) 849 (void) sbdroprecord(&so->so_rcv); 850 } 851 if ((flags & MSG_PEEK) == 0) { 852 if (m == 0) 853 so->so_rcv.sb_mb = nextrecord; 854 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 855 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 856 (struct mbuf *)(long)flags, NULL); 857 } 858 if (orig_resid == uio->uio_resid && orig_resid && 859 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 860 sbunlock(&so->so_rcv); 861 splx(s); 862 goto restart; 863 } 864 865 if (uio_error) 866 error = uio_error; 867 868 if (flagsp) 869 *flagsp |= flags; 870 release: 871 sbunlock(&so->so_rcv); 872 splx(s); 873 return (error); 874 } 875 876 int 877 soshutdown(so, how) 878 register struct socket *so; 879 register int how; 880 { 881 register struct protosw *pr = so->so_proto; 882 883 how++; 884 if (how & ~(FREAD|FWRITE)) 885 return (EINVAL); 886 if (how & FREAD) 887 sorflush(so); 888 if (how & FWRITE) 889 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL); 890 return (0); 891 } 892 893 void 894 sorflush(so) 895 register struct socket *so; 896 { 897 register struct sockbuf *sb = &so->so_rcv; 898 register struct protosw *pr = so->so_proto; 899 register int s; 900 struct sockbuf asb; 901 902 sb->sb_flags |= SB_NOINTR; 903 (void) sblock(sb, M_WAITOK); 904 s = splimp(); 905 socantrcvmore(so); 906 sbunlock(sb); 907 asb = *sb; 908 bzero((caddr_t)sb, sizeof (*sb)); 909 /* XXX - the bzero stumps all over so_rcv */ 910 if (asb.sb_flags & SB_KNOTE) { 911 sb->sb_sel.si_note = asb.sb_sel.si_note; 912 sb->sb_flags = SB_KNOTE; 913 } 914 splx(s); 915 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 916 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 917 sbrelease(&asb); 918 } 919 920 int 921 sosetopt(so, level, optname, m0) 922 register struct socket *so; 923 int level, optname; 924 struct mbuf *m0; 925 { 926 int error = 0; 927 register struct mbuf *m = m0; 928 929 if (level != SOL_SOCKET) { 930 if (so->so_proto && so->so_proto->pr_ctloutput) 931 return ((*so->so_proto->pr_ctloutput) 932 (PRCO_SETOPT, so, level, optname, &m0)); 933 error = ENOPROTOOPT; 934 } else { 935 switch (optname) { 936 937 case SO_LINGER: 938 if (m == NULL || m->m_len != sizeof (struct linger) || 939 mtod(m, struct linger *)->l_linger < 0 || 940 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 941 error = EINVAL; 942 goto bad; 943 } 944 so->so_linger = mtod(m, struct linger *)->l_linger; 945 /* fall thru... */ 946 947 case SO_DEBUG: 948 case SO_KEEPALIVE: 949 case SO_DONTROUTE: 950 case SO_USELOOPBACK: 951 case SO_BROADCAST: 952 case SO_REUSEADDR: 953 case SO_REUSEPORT: 954 case SO_OOBINLINE: 955 if (m == NULL || m->m_len < sizeof (int)) { 956 error = EINVAL; 957 goto bad; 958 } 959 if (*mtod(m, int *)) 960 so->so_options |= optname; 961 else 962 so->so_options &= ~optname; 963 break; 964 965 case SO_SNDBUF: 966 case SO_RCVBUF: 967 case SO_SNDLOWAT: 968 case SO_RCVLOWAT: 969 { 970 u_long cnt; 971 972 if (m == NULL || m->m_len < sizeof (int)) { 973 error = EINVAL; 974 goto bad; 975 } 976 cnt = *mtod(m, int *); 977 if ((long)cnt <= 0) 978 cnt = 1; 979 switch (optname) { 980 981 case SO_SNDBUF: 982 case SO_RCVBUF: 983 if (sbreserve(optname == SO_SNDBUF ? 984 &so->so_snd : &so->so_rcv, 985 cnt) == 0) { 986 error = ENOBUFS; 987 goto bad; 988 } 989 break; 990 991 case SO_SNDLOWAT: 992 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 993 so->so_snd.sb_hiwat : cnt; 994 break; 995 case SO_RCVLOWAT: 996 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 997 so->so_rcv.sb_hiwat : cnt; 998 break; 999 } 1000 break; 1001 } 1002 1003 case SO_SNDTIMEO: 1004 case SO_RCVTIMEO: 1005 { 1006 struct timeval *tv; 1007 short val; 1008 1009 if (m == NULL || m->m_len < sizeof (*tv)) { 1010 error = EINVAL; 1011 goto bad; 1012 } 1013 tv = mtod(m, struct timeval *); 1014 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 1015 error = EDOM; 1016 goto bad; 1017 } 1018 val = tv->tv_sec * hz + tv->tv_usec / tick; 1019 1020 switch (optname) { 1021 1022 case SO_SNDTIMEO: 1023 so->so_snd.sb_timeo = val; 1024 break; 1025 case SO_RCVTIMEO: 1026 so->so_rcv.sb_timeo = val; 1027 break; 1028 } 1029 break; 1030 } 1031 1032 default: 1033 error = ENOPROTOOPT; 1034 break; 1035 } 1036 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1037 (void) ((*so->so_proto->pr_ctloutput) 1038 (PRCO_SETOPT, so, level, optname, &m0)); 1039 m = NULL; /* freed by protocol */ 1040 } 1041 } 1042 bad: 1043 if (m) 1044 (void) m_free(m); 1045 return (error); 1046 } 1047 1048 int 1049 sogetopt(so, level, optname, mp) 1050 register struct socket *so; 1051 int level, optname; 1052 struct mbuf **mp; 1053 { 1054 register struct mbuf *m; 1055 1056 if (level != SOL_SOCKET) { 1057 if (so->so_proto && so->so_proto->pr_ctloutput) { 1058 return ((*so->so_proto->pr_ctloutput) 1059 (PRCO_GETOPT, so, level, optname, mp)); 1060 } else 1061 return (ENOPROTOOPT); 1062 } else { 1063 m = m_get(M_WAIT, MT_SOOPTS); 1064 m->m_len = sizeof (int); 1065 1066 switch (optname) { 1067 1068 case SO_LINGER: 1069 m->m_len = sizeof (struct linger); 1070 mtod(m, struct linger *)->l_onoff = 1071 so->so_options & SO_LINGER; 1072 mtod(m, struct linger *)->l_linger = so->so_linger; 1073 break; 1074 1075 case SO_USELOOPBACK: 1076 case SO_DONTROUTE: 1077 case SO_DEBUG: 1078 case SO_KEEPALIVE: 1079 case SO_REUSEADDR: 1080 case SO_REUSEPORT: 1081 case SO_BROADCAST: 1082 case SO_OOBINLINE: 1083 *mtod(m, int *) = so->so_options & optname; 1084 break; 1085 1086 case SO_TYPE: 1087 *mtod(m, int *) = so->so_type; 1088 break; 1089 1090 case SO_ERROR: 1091 *mtod(m, int *) = so->so_error; 1092 so->so_error = 0; 1093 break; 1094 1095 case SO_SNDBUF: 1096 *mtod(m, int *) = so->so_snd.sb_hiwat; 1097 break; 1098 1099 case SO_RCVBUF: 1100 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1101 break; 1102 1103 case SO_SNDLOWAT: 1104 *mtod(m, int *) = so->so_snd.sb_lowat; 1105 break; 1106 1107 case SO_RCVLOWAT: 1108 *mtod(m, int *) = so->so_rcv.sb_lowat; 1109 break; 1110 1111 case SO_SNDTIMEO: 1112 case SO_RCVTIMEO: 1113 { 1114 int val = (optname == SO_SNDTIMEO ? 1115 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1116 1117 m->m_len = sizeof(struct timeval); 1118 mtod(m, struct timeval *)->tv_sec = val / hz; 1119 mtod(m, struct timeval *)->tv_usec = 1120 (val % hz) * tick; 1121 break; 1122 } 1123 1124 default: 1125 (void)m_free(m); 1126 return (ENOPROTOOPT); 1127 } 1128 *mp = m; 1129 return (0); 1130 } 1131 } 1132 1133 void 1134 sohasoutofband(so) 1135 register struct socket *so; 1136 { 1137 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1138 selwakeup(&so->so_rcv.sb_sel); 1139 } 1140 1141 int 1142 soo_kqfilter(struct file *fp, struct knote *kn) 1143 { 1144 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1145 struct sockbuf *sb; 1146 int s; 1147 1148 switch (kn->kn_filter) { 1149 case EVFILT_READ: 1150 if (so->so_options & SO_ACCEPTCONN) 1151 kn->kn_fop = &solisten_filtops; 1152 else 1153 kn->kn_fop = &soread_filtops; 1154 sb = &so->so_rcv; 1155 break; 1156 case EVFILT_WRITE: 1157 kn->kn_fop = &sowrite_filtops; 1158 sb = &so->so_snd; 1159 break; 1160 default: 1161 return (1); 1162 } 1163 1164 s = splnet(); 1165 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1166 sb->sb_flags |= SB_KNOTE; 1167 splx(s); 1168 return (0); 1169 } 1170 1171 void 1172 filt_sordetach(struct knote *kn) 1173 { 1174 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1175 int s = splnet(); 1176 1177 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1178 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1179 so->so_rcv.sb_flags &= ~SB_KNOTE; 1180 splx(s); 1181 } 1182 1183 /*ARGSUSED*/ 1184 int 1185 filt_soread(struct knote *kn, long hint) 1186 { 1187 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1188 1189 kn->kn_data = so->so_rcv.sb_cc; 1190 if (so->so_state & SS_CANTRCVMORE) { 1191 kn->kn_flags |= EV_EOF; 1192 kn->kn_fflags = so->so_error; 1193 return (1); 1194 } 1195 if (so->so_error) /* temporary udp error */ 1196 return (1); 1197 if (kn->kn_sfflags & NOTE_LOWAT) 1198 return (kn->kn_data >= kn->kn_sdata); 1199 return (kn->kn_data >= so->so_rcv.sb_lowat); 1200 } 1201 1202 void 1203 filt_sowdetach(struct knote *kn) 1204 { 1205 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1206 int s = splnet(); 1207 1208 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1209 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1210 so->so_snd.sb_flags &= ~SB_KNOTE; 1211 splx(s); 1212 } 1213 1214 /*ARGSUSED*/ 1215 int 1216 filt_sowrite(struct knote *kn, long hint) 1217 { 1218 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1219 1220 kn->kn_data = sbspace(&so->so_snd); 1221 if (so->so_state & SS_CANTSENDMORE) { 1222 kn->kn_flags |= EV_EOF; 1223 kn->kn_fflags = so->so_error; 1224 return (1); 1225 } 1226 if (so->so_error) /* temporary udp error */ 1227 return (1); 1228 if (((so->so_state & SS_ISCONNECTED) == 0) && 1229 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1230 return (0); 1231 if (kn->kn_sfflags & NOTE_LOWAT) 1232 return (kn->kn_data >= kn->kn_sdata); 1233 return (kn->kn_data >= so->so_snd.sb_lowat); 1234 } 1235 1236 /*ARGSUSED*/ 1237 int 1238 filt_solisten(struct knote *kn, long hint) 1239 { 1240 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1241 1242 kn->kn_data = so->so_qlen; 1243 return (so->so_qlen != 0); 1244 } 1245