1 /* $OpenBSD: uipc_socket.c,v 1.76 2009/03/15 19:40:41 miod Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/domain.h> 42 #include <sys/kernel.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/signalvar.h> 48 #include <sys/resourcevar.h> 49 #include <sys/pool.h> 50 51 void filt_sordetach(struct knote *kn); 52 int filt_soread(struct knote *kn, long hint); 53 void filt_sowdetach(struct knote *kn); 54 int filt_sowrite(struct knote *kn, long hint); 55 int filt_solisten(struct knote *kn, long hint); 56 57 struct filterops solisten_filtops = 58 { 1, NULL, filt_sordetach, filt_solisten }; 59 struct filterops soread_filtops = 60 { 1, NULL, filt_sordetach, filt_soread }; 61 struct filterops sowrite_filtops = 62 { 1, NULL, filt_sowdetach, filt_sowrite }; 63 64 65 #ifndef SOMINCONN 66 #define SOMINCONN 80 67 #endif /* SOMINCONN */ 68 69 int somaxconn = SOMAXCONN; 70 int sominconn = SOMINCONN; 71 72 struct pool socket_pool; 73 74 void 75 soinit(void) 76 { 77 78 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 79 } 80 81 /* 82 * Socket operation routines. 83 * These routines are called by the routines in 84 * sys_socket.c or from a system process, and 85 * implement the semantics of socket operations by 86 * switching out to the protocol specific routines. 87 */ 88 /*ARGSUSED*/ 89 int 90 socreate(int dom, struct socket **aso, int type, int proto) 91 { 92 struct proc *p = curproc; /* XXX */ 93 struct protosw *prp; 94 struct socket *so; 95 int error, s; 96 97 if (proto) 98 prp = pffindproto(dom, proto, type); 99 else 100 prp = pffindtype(dom, type); 101 if (prp == NULL || prp->pr_usrreq == 0) 102 return (EPROTONOSUPPORT); 103 if (prp->pr_type != type) 104 return (EPROTOTYPE); 105 s = splsoftnet(); 106 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 107 TAILQ_INIT(&so->so_q0); 108 TAILQ_INIT(&so->so_q); 109 so->so_type = type; 110 if (p->p_ucred->cr_uid == 0) 111 so->so_state = SS_PRIV; 112 so->so_ruid = p->p_cred->p_ruid; 113 so->so_euid = p->p_ucred->cr_uid; 114 so->so_rgid = p->p_cred->p_rgid; 115 so->so_egid = p->p_ucred->cr_gid; 116 so->so_cpid = p->p_pid; 117 so->so_proto = prp; 118 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 119 (struct mbuf *)(long)proto, NULL, p); 120 if (error) { 121 so->so_state |= SS_NOFDREF; 122 sofree(so); 123 splx(s); 124 return (error); 125 } 126 #ifdef COMPAT_SUNOS 127 { 128 extern struct emul emul_sunos; 129 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 130 so->so_options |= SO_BROADCAST; 131 } 132 #endif 133 splx(s); 134 *aso = so; 135 return (0); 136 } 137 138 int 139 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 140 { 141 int s = splsoftnet(); 142 int error; 143 144 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 145 splx(s); 146 return (error); 147 } 148 149 int 150 solisten(struct socket *so, int backlog) 151 { 152 int s = splsoftnet(), error; 153 154 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 155 curproc); 156 if (error) { 157 splx(s); 158 return (error); 159 } 160 if (TAILQ_FIRST(&so->so_q) == NULL) 161 so->so_options |= SO_ACCEPTCONN; 162 if (backlog < 0 || backlog > somaxconn) 163 backlog = somaxconn; 164 if (backlog < sominconn) 165 backlog = sominconn; 166 so->so_qlimit = backlog; 167 splx(s); 168 return (0); 169 } 170 171 /* 172 * Must be called at splsoftnet() 173 */ 174 175 void 176 sofree(struct socket *so) 177 { 178 splsoftassert(IPL_SOFTNET); 179 180 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 181 return; 182 if (so->so_head) { 183 /* 184 * We must not decommission a socket that's on the accept(2) 185 * queue. If we do, then accept(2) may hang after select(2) 186 * indicated that the listening socket was ready. 187 */ 188 if (!soqremque(so, 0)) 189 return; 190 } 191 sbrelease(&so->so_snd); 192 sorflush(so); 193 pool_put(&socket_pool, so); 194 } 195 196 /* 197 * Close a socket on last file table reference removal. 198 * Initiate disconnect if connected. 199 * Free socket when disconnect complete. 200 */ 201 int 202 soclose(struct socket *so) 203 { 204 struct socket *so2; 205 int s = splsoftnet(); /* conservative */ 206 int error = 0; 207 208 if (so->so_options & SO_ACCEPTCONN) { 209 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 210 (void) soqremque(so2, 0); 211 (void) soabort(so2); 212 } 213 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 214 (void) soqremque(so2, 1); 215 (void) soabort(so2); 216 } 217 } 218 if (so->so_pcb == 0) 219 goto discard; 220 if (so->so_state & SS_ISCONNECTED) { 221 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 222 error = sodisconnect(so); 223 if (error) 224 goto drop; 225 } 226 if (so->so_options & SO_LINGER) { 227 if ((so->so_state & SS_ISDISCONNECTING) && 228 (so->so_state & SS_NBIO)) 229 goto drop; 230 while (so->so_state & SS_ISCONNECTED) { 231 error = tsleep(&so->so_timeo, 232 PSOCK | PCATCH, netcls, 233 so->so_linger * hz); 234 if (error) 235 break; 236 } 237 } 238 } 239 drop: 240 if (so->so_pcb) { 241 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 242 NULL, NULL, curproc); 243 if (error == 0) 244 error = error2; 245 } 246 discard: 247 if (so->so_state & SS_NOFDREF) 248 panic("soclose: NOFDREF"); 249 so->so_state |= SS_NOFDREF; 250 sofree(so); 251 splx(s); 252 return (error); 253 } 254 255 /* 256 * Must be called at splsoftnet. 257 */ 258 int 259 soabort(struct socket *so) 260 { 261 splsoftassert(IPL_SOFTNET); 262 263 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 264 curproc); 265 } 266 267 int 268 soaccept(struct socket *so, struct mbuf *nam) 269 { 270 int s = splsoftnet(); 271 int error = 0; 272 273 if ((so->so_state & SS_NOFDREF) == 0) 274 panic("soaccept: !NOFDREF"); 275 so->so_state &= ~SS_NOFDREF; 276 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 277 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 278 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 279 nam, NULL, curproc); 280 else 281 error = ECONNABORTED; 282 splx(s); 283 return (error); 284 } 285 286 int 287 soconnect(struct socket *so, struct mbuf *nam) 288 { 289 int s; 290 int error; 291 292 if (so->so_options & SO_ACCEPTCONN) 293 return (EOPNOTSUPP); 294 s = splsoftnet(); 295 /* 296 * If protocol is connection-based, can only connect once. 297 * Otherwise, if connected, try to disconnect first. 298 * This allows user to disconnect by connecting to, e.g., 299 * a null address. 300 */ 301 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 302 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 303 (error = sodisconnect(so)))) 304 error = EISCONN; 305 else 306 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 307 NULL, nam, NULL, curproc); 308 splx(s); 309 return (error); 310 } 311 312 int 313 soconnect2(struct socket *so1, struct socket *so2) 314 { 315 int s = splsoftnet(); 316 int error; 317 318 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 319 (struct mbuf *)so2, NULL, curproc); 320 splx(s); 321 return (error); 322 } 323 324 int 325 sodisconnect(struct socket *so) 326 { 327 int s = splsoftnet(); 328 int error; 329 330 if ((so->so_state & SS_ISCONNECTED) == 0) { 331 error = ENOTCONN; 332 goto bad; 333 } 334 if (so->so_state & SS_ISDISCONNECTING) { 335 error = EALREADY; 336 goto bad; 337 } 338 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 339 NULL, curproc); 340 bad: 341 splx(s); 342 return (error); 343 } 344 345 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 346 /* 347 * Send on a socket. 348 * If send must go all at once and message is larger than 349 * send buffering, then hard error. 350 * Lock against other senders. 351 * If must go all at once and not enough room now, then 352 * inform user that this would block and do nothing. 353 * Otherwise, if nonblocking, send as much as possible. 354 * The data to be sent is described by "uio" if nonzero, 355 * otherwise by the mbuf chain "top" (which must be null 356 * if uio is not). Data provided in mbuf chain must be small 357 * enough to send all at once. 358 * 359 * Returns nonzero on error, timeout or signal; callers 360 * must check for short counts if EINTR/ERESTART are returned. 361 * Data and control buffers are freed on return. 362 */ 363 int 364 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 365 struct mbuf *control, int flags) 366 { 367 struct mbuf **mp; 368 struct mbuf *m; 369 long space, len, mlen, clen = 0; 370 quad_t resid; 371 int error, s, dontroute; 372 int atomic = sosendallatonce(so) || top; 373 374 if (uio) 375 resid = uio->uio_resid; 376 else 377 resid = top->m_pkthdr.len; 378 /* 379 * In theory resid should be unsigned (since uio->uio_resid is). 380 * However, space must be signed, as it might be less than 0 381 * if we over-committed, and we must use a signed comparison 382 * of space and resid. On the other hand, a negative resid 383 * causes us to loop sending 0-length segments to the protocol. 384 * MSG_EOR on a SOCK_STREAM socket is also invalid. 385 */ 386 if (resid < 0 || 387 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 388 error = EINVAL; 389 goto out; 390 } 391 dontroute = 392 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 393 (so->so_proto->pr_flags & PR_ATOMIC); 394 if (uio && uio->uio_procp) 395 uio->uio_procp->p_stats->p_ru.ru_msgsnd++; 396 if (control) 397 clen = control->m_len; 398 #define snderr(errno) { error = errno; splx(s); goto release; } 399 400 restart: 401 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 402 goto out; 403 so->so_state |= SS_ISSENDING; 404 do { 405 s = splsoftnet(); 406 if (so->so_state & SS_CANTSENDMORE) 407 snderr(EPIPE); 408 if (so->so_error) { 409 error = so->so_error; 410 so->so_error = 0; 411 splx(s); 412 goto release; 413 } 414 if ((so->so_state & SS_ISCONNECTED) == 0) { 415 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 416 if ((so->so_state & SS_ISCONFIRMING) == 0 && 417 !(resid == 0 && clen != 0)) 418 snderr(ENOTCONN); 419 } else if (addr == 0) 420 snderr(EDESTADDRREQ); 421 } 422 space = sbspace(&so->so_snd); 423 if (flags & MSG_OOB) 424 space += 1024; 425 if ((atomic && resid > so->so_snd.sb_hiwat) || 426 clen > so->so_snd.sb_hiwat) 427 snderr(EMSGSIZE); 428 if (space < resid + clen && 429 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 430 if (so->so_state & SS_NBIO) 431 snderr(EWOULDBLOCK); 432 sbunlock(&so->so_snd); 433 error = sbwait(&so->so_snd); 434 so->so_state &= ~SS_ISSENDING; 435 splx(s); 436 if (error) 437 goto out; 438 goto restart; 439 } 440 splx(s); 441 mp = ⊤ 442 space -= clen; 443 do { 444 if (uio == NULL) { 445 /* 446 * Data is prepackaged in "top". 447 */ 448 resid = 0; 449 if (flags & MSG_EOR) 450 top->m_flags |= M_EOR; 451 } else do { 452 if (top == 0) { 453 MGETHDR(m, M_WAIT, MT_DATA); 454 mlen = MHLEN; 455 m->m_pkthdr.len = 0; 456 m->m_pkthdr.rcvif = (struct ifnet *)0; 457 } else { 458 MGET(m, M_WAIT, MT_DATA); 459 mlen = MLEN; 460 } 461 if (resid >= MINCLSIZE && space >= MCLBYTES) { 462 MCLGET(m, M_NOWAIT); 463 if ((m->m_flags & M_EXT) == 0) 464 goto nopages; 465 mlen = MCLBYTES; 466 if (atomic && top == 0) { 467 len = lmin(MCLBYTES - max_hdr, resid); 468 m->m_data += max_hdr; 469 } else 470 len = lmin(MCLBYTES, resid); 471 space -= len; 472 } else { 473 nopages: 474 len = lmin(lmin(mlen, resid), space); 475 space -= len; 476 /* 477 * For datagram protocols, leave room 478 * for protocol headers in first mbuf. 479 */ 480 if (atomic && top == 0 && len < mlen) 481 MH_ALIGN(m, len); 482 } 483 error = uiomove(mtod(m, caddr_t), (int)len, 484 uio); 485 resid = uio->uio_resid; 486 m->m_len = len; 487 *mp = m; 488 top->m_pkthdr.len += len; 489 if (error) 490 goto release; 491 mp = &m->m_next; 492 if (resid <= 0) { 493 if (flags & MSG_EOR) 494 top->m_flags |= M_EOR; 495 break; 496 } 497 } while (space > 0 && atomic); 498 if (dontroute) 499 so->so_options |= SO_DONTROUTE; 500 s = splsoftnet(); /* XXX */ 501 if (resid <= 0) 502 so->so_state &= ~SS_ISSENDING; 503 error = (*so->so_proto->pr_usrreq)(so, 504 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 505 top, addr, control, curproc); 506 splx(s); 507 if (dontroute) 508 so->so_options &= ~SO_DONTROUTE; 509 clen = 0; 510 control = 0; 511 top = 0; 512 mp = ⊤ 513 if (error) 514 goto release; 515 } while (resid && space > 0); 516 } while (resid); 517 518 release: 519 so->so_state &= ~SS_ISSENDING; 520 sbunlock(&so->so_snd); 521 out: 522 if (top) 523 m_freem(top); 524 if (control) 525 m_freem(control); 526 return (error); 527 } 528 529 /* 530 * Implement receive operations on a socket. 531 * We depend on the way that records are added to the sockbuf 532 * by sbappend*. In particular, each record (mbufs linked through m_next) 533 * must begin with an address if the protocol so specifies, 534 * followed by an optional mbuf or mbufs containing ancillary data, 535 * and then zero or more mbufs of data. 536 * In order to avoid blocking network interrupts for the entire time here, 537 * we splx() while doing the actual copy to user space. 538 * Although the sockbuf is locked, new data may still be appended, 539 * and thus we must maintain consistency of the sockbuf during that time. 540 * 541 * The caller may receive the data as a single mbuf chain by supplying 542 * an mbuf **mp0 for use in returning the chain. The uio is then used 543 * only for the count in uio_resid. 544 */ 545 int 546 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 547 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 548 socklen_t controllen) 549 { 550 struct mbuf *m, **mp; 551 int flags, len, error, s, offset; 552 struct protosw *pr = so->so_proto; 553 struct mbuf *nextrecord; 554 int moff, type = 0; 555 size_t orig_resid = uio->uio_resid; 556 int uio_error = 0; 557 int resid; 558 559 mp = mp0; 560 if (paddr) 561 *paddr = 0; 562 if (controlp) 563 *controlp = 0; 564 if (flagsp) 565 flags = *flagsp &~ MSG_EOR; 566 else 567 flags = 0; 568 if (so->so_state & SS_NBIO) 569 flags |= MSG_DONTWAIT; 570 if (flags & MSG_OOB) { 571 m = m_get(M_WAIT, MT_DATA); 572 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 573 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 574 if (error) 575 goto bad; 576 do { 577 error = uiomove(mtod(m, caddr_t), 578 (int) min(uio->uio_resid, m->m_len), uio); 579 m = m_free(m); 580 } while (uio->uio_resid && error == 0 && m); 581 bad: 582 if (m) 583 m_freem(m); 584 return (error); 585 } 586 if (mp) 587 *mp = NULL; 588 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 589 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, curproc); 590 591 restart: 592 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 593 return (error); 594 s = splsoftnet(); 595 596 m = so->so_rcv.sb_mb; 597 /* 598 * If we have less data than requested, block awaiting more 599 * (subject to any timeout) if: 600 * 1. the current count is less than the low water mark, 601 * 2. MSG_WAITALL is set, and it is possible to do the entire 602 * receive operation at once if we block (resid <= hiwat), or 603 * 3. MSG_DONTWAIT is not set. 604 * If MSG_WAITALL is set but resid is larger than the receive buffer, 605 * we have to do the receive in sections, and thus risk returning 606 * a short count if a timeout or signal occurs after we start. 607 */ 608 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 609 so->so_rcv.sb_cc < uio->uio_resid) && 610 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 611 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 612 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 613 #ifdef DIAGNOSTIC 614 if (m == NULL && so->so_rcv.sb_cc) 615 panic("receive 1"); 616 #endif 617 if (so->so_error) { 618 if (m) 619 goto dontblock; 620 error = so->so_error; 621 if ((flags & MSG_PEEK) == 0) 622 so->so_error = 0; 623 goto release; 624 } 625 if (so->so_state & SS_CANTRCVMORE) { 626 if (m) 627 goto dontblock; 628 else 629 goto release; 630 } 631 for (; m; m = m->m_next) 632 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 633 m = so->so_rcv.sb_mb; 634 goto dontblock; 635 } 636 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 637 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 638 error = ENOTCONN; 639 goto release; 640 } 641 if (uio->uio_resid == 0 && controlp == NULL) 642 goto release; 643 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 644 error = EWOULDBLOCK; 645 goto release; 646 } 647 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 648 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 649 sbunlock(&so->so_rcv); 650 error = sbwait(&so->so_rcv); 651 splx(s); 652 if (error) 653 return (error); 654 goto restart; 655 } 656 dontblock: 657 /* 658 * On entry here, m points to the first record of the socket buffer. 659 * While we process the initial mbufs containing address and control 660 * info, we save a copy of m->m_nextpkt into nextrecord. 661 */ 662 if (uio->uio_procp) 663 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 664 KASSERT(m == so->so_rcv.sb_mb); 665 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 666 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 667 nextrecord = m->m_nextpkt; 668 if (pr->pr_flags & PR_ADDR) { 669 #ifdef DIAGNOSTIC 670 if (m->m_type != MT_SONAME) 671 panic("receive 1a"); 672 #endif 673 orig_resid = 0; 674 if (flags & MSG_PEEK) { 675 if (paddr) 676 *paddr = m_copy(m, 0, m->m_len); 677 m = m->m_next; 678 } else { 679 sbfree(&so->so_rcv, m); 680 if (paddr) { 681 *paddr = m; 682 so->so_rcv.sb_mb = m->m_next; 683 m->m_next = 0; 684 m = so->so_rcv.sb_mb; 685 } else { 686 MFREE(m, so->so_rcv.sb_mb); 687 m = so->so_rcv.sb_mb; 688 } 689 } 690 } 691 while (m && m->m_type == MT_CONTROL && error == 0) { 692 if (flags & MSG_PEEK) { 693 if (controlp) 694 *controlp = m_copy(m, 0, m->m_len); 695 m = m->m_next; 696 } else { 697 sbfree(&so->so_rcv, m); 698 if (controlp) { 699 if (pr->pr_domain->dom_externalize && 700 mtod(m, struct cmsghdr *)->cmsg_type == 701 SCM_RIGHTS) 702 error = (*pr->pr_domain->dom_externalize)(m, 703 controllen); 704 *controlp = m; 705 so->so_rcv.sb_mb = m->m_next; 706 m->m_next = 0; 707 m = so->so_rcv.sb_mb; 708 } else { 709 /* 710 * Dispose of any SCM_RIGHTS message that went 711 * through the read path rather than recv. 712 */ 713 if (pr->pr_domain->dom_dispose && 714 mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 715 pr->pr_domain->dom_dispose(m); 716 MFREE(m, so->so_rcv.sb_mb); 717 m = so->so_rcv.sb_mb; 718 } 719 } 720 if (controlp) { 721 orig_resid = 0; 722 controlp = &(*controlp)->m_next; 723 } 724 } 725 726 /* 727 * If m is non-NULL, we have some data to read. From now on, 728 * make sure to keep sb_lastrecord consistent when working on 729 * the last packet on the chain (nextrecord == NULL) and we 730 * change m->m_nextpkt. 731 */ 732 if (m) { 733 if ((flags & MSG_PEEK) == 0) { 734 m->m_nextpkt = nextrecord; 735 /* 736 * If nextrecord == NULL (this is a single chain), 737 * then sb_lastrecord may not be valid here if m 738 * was changed earlier. 739 */ 740 if (nextrecord == NULL) { 741 KASSERT(so->so_rcv.sb_mb == m); 742 so->so_rcv.sb_lastrecord = m; 743 } 744 } 745 type = m->m_type; 746 if (type == MT_OOBDATA) 747 flags |= MSG_OOB; 748 if (m->m_flags & M_BCAST) 749 flags |= MSG_BCAST; 750 if (m->m_flags & M_MCAST) 751 flags |= MSG_MCAST; 752 } else { 753 if ((flags & MSG_PEEK) == 0) { 754 KASSERT(so->so_rcv.sb_mb == m); 755 so->so_rcv.sb_mb = nextrecord; 756 SB_EMPTY_FIXUP(&so->so_rcv); 757 } 758 } 759 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 760 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 761 762 moff = 0; 763 offset = 0; 764 while (m && uio->uio_resid > 0 && error == 0) { 765 if (m->m_type == MT_OOBDATA) { 766 if (type != MT_OOBDATA) 767 break; 768 } else if (type == MT_OOBDATA) 769 break; 770 #ifdef DIAGNOSTIC 771 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 772 panic("receive 3"); 773 #endif 774 so->so_state &= ~SS_RCVATMARK; 775 len = uio->uio_resid; 776 if (so->so_oobmark && len > so->so_oobmark - offset) 777 len = so->so_oobmark - offset; 778 if (len > m->m_len - moff) 779 len = m->m_len - moff; 780 /* 781 * If mp is set, just pass back the mbufs. 782 * Otherwise copy them out via the uio, then free. 783 * Sockbuf must be consistent here (points to current mbuf, 784 * it points to next record) when we drop priority; 785 * we must note any additions to the sockbuf when we 786 * block interrupts again. 787 */ 788 if (mp == NULL && uio_error == 0) { 789 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 790 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 791 resid = uio->uio_resid; 792 splx(s); 793 uio_error = 794 uiomove(mtod(m, caddr_t) + moff, (int)len, 795 uio); 796 s = splsoftnet(); 797 if (uio_error) 798 uio->uio_resid = resid - len; 799 } else 800 uio->uio_resid -= len; 801 if (len == m->m_len - moff) { 802 if (m->m_flags & M_EOR) 803 flags |= MSG_EOR; 804 if (flags & MSG_PEEK) { 805 m = m->m_next; 806 moff = 0; 807 } else { 808 nextrecord = m->m_nextpkt; 809 sbfree(&so->so_rcv, m); 810 if (mp) { 811 *mp = m; 812 mp = &m->m_next; 813 so->so_rcv.sb_mb = m = m->m_next; 814 *mp = NULL; 815 } else { 816 MFREE(m, so->so_rcv.sb_mb); 817 m = so->so_rcv.sb_mb; 818 } 819 /* 820 * If m != NULL, we also know that 821 * so->so_rcv.sb_mb != NULL. 822 */ 823 KASSERT(so->so_rcv.sb_mb == m); 824 if (m) { 825 m->m_nextpkt = nextrecord; 826 if (nextrecord == NULL) 827 so->so_rcv.sb_lastrecord = m; 828 } else { 829 so->so_rcv.sb_mb = nextrecord; 830 SB_EMPTY_FIXUP(&so->so_rcv); 831 } 832 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 833 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 834 } 835 } else { 836 if (flags & MSG_PEEK) 837 moff += len; 838 else { 839 if (mp) 840 *mp = m_copym(m, 0, len, M_WAIT); 841 m->m_data += len; 842 m->m_len -= len; 843 so->so_rcv.sb_cc -= len; 844 so->so_rcv.sb_datacc -= len; 845 } 846 } 847 if (so->so_oobmark) { 848 if ((flags & MSG_PEEK) == 0) { 849 so->so_oobmark -= len; 850 if (so->so_oobmark == 0) { 851 so->so_state |= SS_RCVATMARK; 852 break; 853 } 854 } else { 855 offset += len; 856 if (offset == so->so_oobmark) 857 break; 858 } 859 } 860 if (flags & MSG_EOR) 861 break; 862 /* 863 * If the MSG_WAITALL flag is set (for non-atomic socket), 864 * we must not quit until "uio->uio_resid == 0" or an error 865 * termination. If a signal/timeout occurs, return 866 * with a short count but without error. 867 * Keep sockbuf locked against other readers. 868 */ 869 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 870 !sosendallatonce(so) && !nextrecord) { 871 if (so->so_error || so->so_state & SS_CANTRCVMORE) 872 break; 873 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 874 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 875 error = sbwait(&so->so_rcv); 876 if (error) { 877 sbunlock(&so->so_rcv); 878 splx(s); 879 return (0); 880 } 881 if ((m = so->so_rcv.sb_mb) != NULL) 882 nextrecord = m->m_nextpkt; 883 } 884 } 885 886 if (m && pr->pr_flags & PR_ATOMIC) { 887 flags |= MSG_TRUNC; 888 if ((flags & MSG_PEEK) == 0) 889 (void) sbdroprecord(&so->so_rcv); 890 } 891 if ((flags & MSG_PEEK) == 0) { 892 if (m == NULL) { 893 /* 894 * First part is an inline SB_EMPTY_FIXUP(). Second 895 * part makes sure sb_lastrecord is up-to-date if 896 * there is still data in the socket buffer. 897 */ 898 so->so_rcv.sb_mb = nextrecord; 899 if (so->so_rcv.sb_mb == NULL) { 900 so->so_rcv.sb_mbtail = NULL; 901 so->so_rcv.sb_lastrecord = NULL; 902 } else if (nextrecord->m_nextpkt == NULL) 903 so->so_rcv.sb_lastrecord = nextrecord; 904 } 905 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 906 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 907 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 908 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 909 (struct mbuf *)(long)flags, NULL, curproc); 910 } 911 if (orig_resid == uio->uio_resid && orig_resid && 912 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 913 sbunlock(&so->so_rcv); 914 splx(s); 915 goto restart; 916 } 917 918 if (uio_error) 919 error = uio_error; 920 921 if (flagsp) 922 *flagsp |= flags; 923 release: 924 sbunlock(&so->so_rcv); 925 splx(s); 926 return (error); 927 } 928 929 int 930 soshutdown(struct socket *so, int how) 931 { 932 struct protosw *pr = so->so_proto; 933 934 switch (how) { 935 case SHUT_RD: 936 case SHUT_RDWR: 937 sorflush(so); 938 if (how == SHUT_RD) 939 return (0); 940 /* FALLTHROUGH */ 941 case SHUT_WR: 942 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 943 curproc); 944 default: 945 return (EINVAL); 946 } 947 } 948 949 void 950 sorflush(struct socket *so) 951 { 952 struct sockbuf *sb = &so->so_rcv; 953 struct protosw *pr = so->so_proto; 954 int s; 955 struct sockbuf asb; 956 957 sb->sb_flags |= SB_NOINTR; 958 (void) sblock(sb, M_WAITOK); 959 s = splnet(); 960 socantrcvmore(so); 961 sbunlock(sb); 962 asb = *sb; 963 bzero(sb, sizeof (*sb)); 964 /* XXX - the bzero stumps all over so_rcv */ 965 if (asb.sb_flags & SB_KNOTE) { 966 sb->sb_sel.si_note = asb.sb_sel.si_note; 967 sb->sb_flags = SB_KNOTE; 968 } 969 splx(s); 970 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 971 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 972 sbrelease(&asb); 973 } 974 975 int 976 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 977 { 978 int error = 0; 979 struct mbuf *m = m0; 980 981 if (level != SOL_SOCKET) { 982 if (so->so_proto && so->so_proto->pr_ctloutput) 983 return ((*so->so_proto->pr_ctloutput) 984 (PRCO_SETOPT, so, level, optname, &m0)); 985 error = ENOPROTOOPT; 986 } else { 987 switch (optname) { 988 case SO_BINDANY: 989 if ((error = suser(curproc, 0)) != 0) /* XXX */ 990 goto bad; 991 break; 992 } 993 994 switch (optname) { 995 996 case SO_LINGER: 997 if (m == NULL || m->m_len != sizeof (struct linger) || 998 mtod(m, struct linger *)->l_linger < 0 || 999 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1000 error = EINVAL; 1001 goto bad; 1002 } 1003 so->so_linger = mtod(m, struct linger *)->l_linger; 1004 /* FALLTHROUGH */ 1005 1006 case SO_BINDANY: 1007 case SO_DEBUG: 1008 case SO_KEEPALIVE: 1009 case SO_DONTROUTE: 1010 case SO_USELOOPBACK: 1011 case SO_BROADCAST: 1012 case SO_REUSEADDR: 1013 case SO_REUSEPORT: 1014 case SO_OOBINLINE: 1015 case SO_JUMBO: 1016 case SO_TIMESTAMP: 1017 if (m == NULL || m->m_len < sizeof (int)) { 1018 error = EINVAL; 1019 goto bad; 1020 } 1021 if (*mtod(m, int *)) 1022 so->so_options |= optname; 1023 else 1024 so->so_options &= ~optname; 1025 break; 1026 1027 case SO_SNDBUF: 1028 case SO_RCVBUF: 1029 case SO_SNDLOWAT: 1030 case SO_RCVLOWAT: 1031 { 1032 u_long cnt; 1033 1034 if (m == NULL || m->m_len < sizeof (int)) { 1035 error = EINVAL; 1036 goto bad; 1037 } 1038 cnt = *mtod(m, int *); 1039 if ((long)cnt <= 0) 1040 cnt = 1; 1041 switch (optname) { 1042 1043 case SO_SNDBUF: 1044 if (sbcheckreserve(cnt, so->so_snd.sb_hiwat) || 1045 sbreserve(&so->so_snd, cnt)) { 1046 error = ENOBUFS; 1047 goto bad; 1048 } 1049 break; 1050 1051 case SO_RCVBUF: 1052 if (sbcheckreserve(cnt, so->so_rcv.sb_hiwat) || 1053 sbreserve(&so->so_rcv, cnt)) { 1054 error = ENOBUFS; 1055 goto bad; 1056 } 1057 break; 1058 1059 case SO_SNDLOWAT: 1060 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 1061 so->so_snd.sb_hiwat : cnt; 1062 break; 1063 case SO_RCVLOWAT: 1064 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 1065 so->so_rcv.sb_hiwat : cnt; 1066 break; 1067 } 1068 break; 1069 } 1070 1071 case SO_SNDTIMEO: 1072 case SO_RCVTIMEO: 1073 { 1074 struct timeval *tv; 1075 u_short val; 1076 1077 if (m == NULL || m->m_len < sizeof (*tv)) { 1078 error = EINVAL; 1079 goto bad; 1080 } 1081 tv = mtod(m, struct timeval *); 1082 if (tv->tv_sec > (USHRT_MAX - tv->tv_usec / tick) / hz) { 1083 error = EDOM; 1084 goto bad; 1085 } 1086 val = tv->tv_sec * hz + tv->tv_usec / tick; 1087 if (val == 0 && tv->tv_usec != 0) 1088 val = 1; 1089 1090 switch (optname) { 1091 1092 case SO_SNDTIMEO: 1093 so->so_snd.sb_timeo = val; 1094 break; 1095 case SO_RCVTIMEO: 1096 so->so_rcv.sb_timeo = val; 1097 break; 1098 } 1099 break; 1100 } 1101 1102 default: 1103 error = ENOPROTOOPT; 1104 break; 1105 } 1106 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1107 (void) ((*so->so_proto->pr_ctloutput) 1108 (PRCO_SETOPT, so, level, optname, &m0)); 1109 m = NULL; /* freed by protocol */ 1110 } 1111 } 1112 bad: 1113 if (m) 1114 (void) m_free(m); 1115 return (error); 1116 } 1117 1118 int 1119 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1120 { 1121 struct mbuf *m; 1122 1123 if (level != SOL_SOCKET) { 1124 if (so->so_proto && so->so_proto->pr_ctloutput) { 1125 return ((*so->so_proto->pr_ctloutput) 1126 (PRCO_GETOPT, so, level, optname, mp)); 1127 } else 1128 return (ENOPROTOOPT); 1129 } else { 1130 m = m_get(M_WAIT, MT_SOOPTS); 1131 m->m_len = sizeof (int); 1132 1133 switch (optname) { 1134 1135 case SO_LINGER: 1136 m->m_len = sizeof (struct linger); 1137 mtod(m, struct linger *)->l_onoff = 1138 so->so_options & SO_LINGER; 1139 mtod(m, struct linger *)->l_linger = so->so_linger; 1140 break; 1141 1142 case SO_BINDANY: 1143 case SO_USELOOPBACK: 1144 case SO_DONTROUTE: 1145 case SO_DEBUG: 1146 case SO_KEEPALIVE: 1147 case SO_REUSEADDR: 1148 case SO_REUSEPORT: 1149 case SO_BROADCAST: 1150 case SO_OOBINLINE: 1151 case SO_JUMBO: 1152 case SO_TIMESTAMP: 1153 *mtod(m, int *) = so->so_options & optname; 1154 break; 1155 1156 case SO_TYPE: 1157 *mtod(m, int *) = so->so_type; 1158 break; 1159 1160 case SO_ERROR: 1161 *mtod(m, int *) = so->so_error; 1162 so->so_error = 0; 1163 break; 1164 1165 case SO_SNDBUF: 1166 *mtod(m, int *) = so->so_snd.sb_hiwat; 1167 break; 1168 1169 case SO_RCVBUF: 1170 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1171 break; 1172 1173 case SO_SNDLOWAT: 1174 *mtod(m, int *) = so->so_snd.sb_lowat; 1175 break; 1176 1177 case SO_RCVLOWAT: 1178 *mtod(m, int *) = so->so_rcv.sb_lowat; 1179 break; 1180 1181 case SO_SNDTIMEO: 1182 case SO_RCVTIMEO: 1183 { 1184 int val = (optname == SO_SNDTIMEO ? 1185 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1186 1187 m->m_len = sizeof(struct timeval); 1188 mtod(m, struct timeval *)->tv_sec = val / hz; 1189 mtod(m, struct timeval *)->tv_usec = 1190 (val % hz) * tick; 1191 break; 1192 } 1193 1194 default: 1195 (void)m_free(m); 1196 return (ENOPROTOOPT); 1197 } 1198 *mp = m; 1199 return (0); 1200 } 1201 } 1202 1203 void 1204 sohasoutofband(struct socket *so) 1205 { 1206 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1207 selwakeup(&so->so_rcv.sb_sel); 1208 } 1209 1210 int 1211 soo_kqfilter(struct file *fp, struct knote *kn) 1212 { 1213 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1214 struct sockbuf *sb; 1215 int s; 1216 1217 switch (kn->kn_filter) { 1218 case EVFILT_READ: 1219 if (so->so_options & SO_ACCEPTCONN) 1220 kn->kn_fop = &solisten_filtops; 1221 else 1222 kn->kn_fop = &soread_filtops; 1223 sb = &so->so_rcv; 1224 break; 1225 case EVFILT_WRITE: 1226 kn->kn_fop = &sowrite_filtops; 1227 sb = &so->so_snd; 1228 break; 1229 default: 1230 return (1); 1231 } 1232 1233 s = splnet(); 1234 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1235 sb->sb_flags |= SB_KNOTE; 1236 splx(s); 1237 return (0); 1238 } 1239 1240 void 1241 filt_sordetach(struct knote *kn) 1242 { 1243 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1244 int s = splnet(); 1245 1246 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1247 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1248 so->so_rcv.sb_flags &= ~SB_KNOTE; 1249 splx(s); 1250 } 1251 1252 /*ARGSUSED*/ 1253 int 1254 filt_soread(struct knote *kn, long hint) 1255 { 1256 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1257 1258 kn->kn_data = so->so_rcv.sb_cc; 1259 if (so->so_state & SS_CANTRCVMORE) { 1260 kn->kn_flags |= EV_EOF; 1261 kn->kn_fflags = so->so_error; 1262 return (1); 1263 } 1264 if (so->so_error) /* temporary udp error */ 1265 return (1); 1266 if (kn->kn_sfflags & NOTE_LOWAT) 1267 return (kn->kn_data >= kn->kn_sdata); 1268 return (kn->kn_data >= so->so_rcv.sb_lowat); 1269 } 1270 1271 void 1272 filt_sowdetach(struct knote *kn) 1273 { 1274 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1275 int s = splnet(); 1276 1277 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1278 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1279 so->so_snd.sb_flags &= ~SB_KNOTE; 1280 splx(s); 1281 } 1282 1283 /*ARGSUSED*/ 1284 int 1285 filt_sowrite(struct knote *kn, long hint) 1286 { 1287 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1288 1289 kn->kn_data = sbspace(&so->so_snd); 1290 if (so->so_state & SS_CANTSENDMORE) { 1291 kn->kn_flags |= EV_EOF; 1292 kn->kn_fflags = so->so_error; 1293 return (1); 1294 } 1295 if (so->so_error) /* temporary udp error */ 1296 return (1); 1297 if (((so->so_state & SS_ISCONNECTED) == 0) && 1298 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1299 return (0); 1300 if (kn->kn_sfflags & NOTE_LOWAT) 1301 return (kn->kn_data >= kn->kn_sdata); 1302 return (kn->kn_data >= so->so_snd.sb_lowat); 1303 } 1304 1305 /*ARGSUSED*/ 1306 int 1307 filt_solisten(struct knote *kn, long hint) 1308 { 1309 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1310 1311 kn->kn_data = so->so_qlen; 1312 return (so->so_qlen != 0); 1313 } 1314