1 /* $NetBSD: uipc_socket.c,v 1.61 2002/01/03 01:16:02 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.61 2002/01/03 01:16:02 mrg Exp $"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/malloc.h> 46 #include <sys/mbuf.h> 47 #include <sys/domain.h> 48 #include <sys/kernel.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/signalvar.h> 53 #include <sys/resourcevar.h> 54 #include <sys/pool.h> 55 56 struct pool socket_pool; 57 58 extern int somaxconn; /* patchable (XXX sysctl) */ 59 int somaxconn = SOMAXCONN; 60 61 void 62 soinit(void) 63 { 64 65 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 66 "sockpl", 0, NULL, NULL, M_SOCKET); 67 } 68 69 /* 70 * Socket operation routines. 71 * These routines are called by the routines in 72 * sys_socket.c or from a system process, and 73 * implement the semantics of socket operations by 74 * switching out to the protocol specific routines. 75 */ 76 /*ARGSUSED*/ 77 int 78 socreate(int dom, struct socket **aso, int type, int proto) 79 { 80 struct proc *p; 81 struct protosw *prp; 82 struct socket *so; 83 int error, s; 84 85 p = curproc; /* XXX */ 86 if (proto) 87 prp = pffindproto(dom, proto, type); 88 else 89 prp = pffindtype(dom, type); 90 if (prp == 0 || prp->pr_usrreq == 0) 91 return (EPROTONOSUPPORT); 92 if (prp->pr_type != type) 93 return (EPROTOTYPE); 94 s = splsoftnet(); 95 so = pool_get(&socket_pool, PR_WAITOK); 96 memset((caddr_t)so, 0, sizeof(*so)); 97 TAILQ_INIT(&so->so_q0); 98 TAILQ_INIT(&so->so_q); 99 so->so_type = type; 100 so->so_proto = prp; 101 so->so_send = sosend; 102 so->so_receive = soreceive; 103 if (p != 0) 104 so->so_uid = p->p_ucred->cr_uid; 105 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 106 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 107 if (error) { 108 so->so_state |= SS_NOFDREF; 109 sofree(so); 110 splx(s); 111 return (error); 112 } 113 splx(s); 114 *aso = so; 115 return (0); 116 } 117 118 int 119 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 120 { 121 int s, error; 122 123 s = splsoftnet(); 124 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 125 nam, (struct mbuf *)0, p); 126 splx(s); 127 return (error); 128 } 129 130 int 131 solisten(struct socket *so, int backlog) 132 { 133 int s, error; 134 135 s = splsoftnet(); 136 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 137 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 138 if (error) { 139 splx(s); 140 return (error); 141 } 142 if (so->so_q.tqh_first == NULL) 143 so->so_options |= SO_ACCEPTCONN; 144 if (backlog < 0) 145 backlog = 0; 146 so->so_qlimit = min(backlog, somaxconn); 147 splx(s); 148 return (0); 149 } 150 151 void 152 sofree(struct socket *so) 153 { 154 155 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 156 return; 157 if (so->so_head) { 158 /* 159 * We must not decommission a socket that's on the accept(2) 160 * queue. If we do, then accept(2) may hang after select(2) 161 * indicated that the listening socket was ready. 162 */ 163 if (!soqremque(so, 0)) 164 return; 165 } 166 sbrelease(&so->so_snd); 167 sorflush(so); 168 pool_put(&socket_pool, so); 169 } 170 171 /* 172 * Close a socket on last file table reference removal. 173 * Initiate disconnect if connected. 174 * Free socket when disconnect complete. 175 */ 176 int 177 soclose(struct socket *so) 178 { 179 struct socket *so2; 180 int s, error; 181 182 error = 0; 183 s = splsoftnet(); /* conservative */ 184 if (so->so_options & SO_ACCEPTCONN) { 185 while ((so2 = so->so_q0.tqh_first) != 0) { 186 (void) soqremque(so2, 0); 187 (void) soabort(so2); 188 } 189 while ((so2 = so->so_q.tqh_first) != 0) { 190 (void) soqremque(so2, 1); 191 (void) soabort(so2); 192 } 193 } 194 if (so->so_pcb == 0) 195 goto discard; 196 if (so->so_state & SS_ISCONNECTED) { 197 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 198 error = sodisconnect(so); 199 if (error) 200 goto drop; 201 } 202 if (so->so_options & SO_LINGER) { 203 if ((so->so_state & SS_ISDISCONNECTING) && 204 (so->so_state & SS_NBIO)) 205 goto drop; 206 while (so->so_state & SS_ISCONNECTED) { 207 error = tsleep((caddr_t)&so->so_timeo, 208 PSOCK | PCATCH, netcls, 209 so->so_linger * hz); 210 if (error) 211 break; 212 } 213 } 214 } 215 drop: 216 if (so->so_pcb) { 217 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 218 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 219 (struct proc *)0); 220 if (error == 0) 221 error = error2; 222 } 223 discard: 224 if (so->so_state & SS_NOFDREF) 225 panic("soclose: NOFDREF"); 226 so->so_state |= SS_NOFDREF; 227 sofree(so); 228 splx(s); 229 return (error); 230 } 231 232 /* 233 * Must be called at splsoftnet... 234 */ 235 int 236 soabort(struct socket *so) 237 { 238 239 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 240 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 241 } 242 243 int 244 soaccept(struct socket *so, struct mbuf *nam) 245 { 246 int s, error; 247 248 error = 0; 249 s = splsoftnet(); 250 if ((so->so_state & SS_NOFDREF) == 0) 251 panic("soaccept: !NOFDREF"); 252 so->so_state &= ~SS_NOFDREF; 253 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 254 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 255 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 256 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 257 else 258 error = ECONNABORTED; 259 260 splx(s); 261 return (error); 262 } 263 264 int 265 soconnect(struct socket *so, struct mbuf *nam) 266 { 267 struct proc *p; 268 int s, error; 269 270 p = curproc; /* XXX */ 271 if (so->so_options & SO_ACCEPTCONN) 272 return (EOPNOTSUPP); 273 s = splsoftnet(); 274 /* 275 * If protocol is connection-based, can only connect once. 276 * Otherwise, if connected, try to disconnect first. 277 * This allows user to disconnect by connecting to, e.g., 278 * a null address. 279 */ 280 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 281 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 282 (error = sodisconnect(so)))) 283 error = EISCONN; 284 else 285 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 286 (struct mbuf *)0, nam, (struct mbuf *)0, p); 287 splx(s); 288 return (error); 289 } 290 291 int 292 soconnect2(struct socket *so1, struct socket *so2) 293 { 294 int s, error; 295 296 s = splsoftnet(); 297 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 298 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 299 (struct proc *)0); 300 splx(s); 301 return (error); 302 } 303 304 int 305 sodisconnect(struct socket *so) 306 { 307 int s, error; 308 309 s = splsoftnet(); 310 if ((so->so_state & SS_ISCONNECTED) == 0) { 311 error = ENOTCONN; 312 goto bad; 313 } 314 if (so->so_state & SS_ISDISCONNECTING) { 315 error = EALREADY; 316 goto bad; 317 } 318 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 319 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 320 (struct proc *)0); 321 bad: 322 splx(s); 323 return (error); 324 } 325 326 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 327 /* 328 * Send on a socket. 329 * If send must go all at once and message is larger than 330 * send buffering, then hard error. 331 * Lock against other senders. 332 * If must go all at once and not enough room now, then 333 * inform user that this would block and do nothing. 334 * Otherwise, if nonblocking, send as much as possible. 335 * The data to be sent is described by "uio" if nonzero, 336 * otherwise by the mbuf chain "top" (which must be null 337 * if uio is not). Data provided in mbuf chain must be small 338 * enough to send all at once. 339 * 340 * Returns nonzero on error, timeout or signal; callers 341 * must check for short counts if EINTR/ERESTART are returned. 342 * Data and control buffers are freed on return. 343 */ 344 int 345 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 346 struct mbuf *control, int flags) 347 { 348 struct proc *p; 349 struct mbuf **mp, *m; 350 long space, len, resid, clen, mlen; 351 int error, s, dontroute, atomic; 352 353 p = curproc; /* XXX */ 354 clen = 0; 355 atomic = sosendallatonce(so) || top; 356 if (uio) 357 resid = uio->uio_resid; 358 else 359 resid = top->m_pkthdr.len; 360 /* 361 * In theory resid should be unsigned. 362 * However, space must be signed, as it might be less than 0 363 * if we over-committed, and we must use a signed comparison 364 * of space and resid. On the other hand, a negative resid 365 * causes us to loop sending 0-length segments to the protocol. 366 */ 367 if (resid < 0) { 368 error = EINVAL; 369 goto out; 370 } 371 dontroute = 372 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 373 (so->so_proto->pr_flags & PR_ATOMIC); 374 p->p_stats->p_ru.ru_msgsnd++; 375 if (control) 376 clen = control->m_len; 377 #define snderr(errno) { error = errno; splx(s); goto release; } 378 379 restart: 380 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 381 goto out; 382 do { 383 s = splsoftnet(); 384 if (so->so_state & SS_CANTSENDMORE) 385 snderr(EPIPE); 386 if (so->so_error) { 387 error = so->so_error; 388 so->so_error = 0; 389 splx(s); 390 goto release; 391 } 392 if ((so->so_state & SS_ISCONNECTED) == 0) { 393 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 394 if ((so->so_state & SS_ISCONFIRMING) == 0 && 395 !(resid == 0 && clen != 0)) 396 snderr(ENOTCONN); 397 } else if (addr == 0) 398 snderr(EDESTADDRREQ); 399 } 400 space = sbspace(&so->so_snd); 401 if (flags & MSG_OOB) 402 space += 1024; 403 if ((atomic && resid > so->so_snd.sb_hiwat) || 404 clen > so->so_snd.sb_hiwat) 405 snderr(EMSGSIZE); 406 if (space < resid + clen && uio && 407 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 408 if (so->so_state & SS_NBIO) 409 snderr(EWOULDBLOCK); 410 sbunlock(&so->so_snd); 411 error = sbwait(&so->so_snd); 412 splx(s); 413 if (error) 414 goto out; 415 goto restart; 416 } 417 splx(s); 418 mp = ⊤ 419 space -= clen; 420 do { 421 if (uio == NULL) { 422 /* 423 * Data is prepackaged in "top". 424 */ 425 resid = 0; 426 if (flags & MSG_EOR) 427 top->m_flags |= M_EOR; 428 } else do { 429 if (top == 0) { 430 MGETHDR(m, M_WAIT, MT_DATA); 431 mlen = MHLEN; 432 m->m_pkthdr.len = 0; 433 m->m_pkthdr.rcvif = (struct ifnet *)0; 434 } else { 435 MGET(m, M_WAIT, MT_DATA); 436 mlen = MLEN; 437 } 438 if (resid >= MINCLSIZE && space >= MCLBYTES) { 439 MCLGET(m, M_WAIT); 440 if ((m->m_flags & M_EXT) == 0) 441 goto nopages; 442 mlen = MCLBYTES; 443 #ifdef MAPPED_MBUFS 444 len = lmin(MCLBYTES, resid); 445 #else 446 if (atomic && top == 0) { 447 len = lmin(MCLBYTES - max_hdr, 448 resid); 449 m->m_data += max_hdr; 450 } else 451 len = lmin(MCLBYTES, resid); 452 #endif 453 space -= len; 454 } else { 455 nopages: 456 len = lmin(lmin(mlen, resid), space); 457 space -= len; 458 /* 459 * For datagram protocols, leave room 460 * for protocol headers in first mbuf. 461 */ 462 if (atomic && top == 0 && len < mlen) 463 MH_ALIGN(m, len); 464 } 465 error = uiomove(mtod(m, caddr_t), (int)len, 466 uio); 467 resid = uio->uio_resid; 468 m->m_len = len; 469 *mp = m; 470 top->m_pkthdr.len += len; 471 if (error) 472 goto release; 473 mp = &m->m_next; 474 if (resid <= 0) { 475 if (flags & MSG_EOR) 476 top->m_flags |= M_EOR; 477 break; 478 } 479 } while (space > 0 && atomic); 480 481 s = splsoftnet(); 482 483 if (so->so_state & SS_CANTSENDMORE) 484 snderr(EPIPE); 485 486 if (dontroute) 487 so->so_options |= SO_DONTROUTE; 488 if (resid > 0) 489 so->so_state |= SS_MORETOCOME; 490 error = (*so->so_proto->pr_usrreq)(so, 491 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 492 top, addr, control, p); 493 if (dontroute) 494 so->so_options &= ~SO_DONTROUTE; 495 if (resid > 0) 496 so->so_state &= ~SS_MORETOCOME; 497 splx(s); 498 499 clen = 0; 500 control = 0; 501 top = 0; 502 mp = ⊤ 503 if (error) 504 goto release; 505 } while (resid && space > 0); 506 } while (resid); 507 508 release: 509 sbunlock(&so->so_snd); 510 out: 511 if (top) 512 m_freem(top); 513 if (control) 514 m_freem(control); 515 return (error); 516 } 517 518 /* 519 * Implement receive operations on a socket. 520 * We depend on the way that records are added to the sockbuf 521 * by sbappend*. In particular, each record (mbufs linked through m_next) 522 * must begin with an address if the protocol so specifies, 523 * followed by an optional mbuf or mbufs containing ancillary data, 524 * and then zero or more mbufs of data. 525 * In order to avoid blocking network interrupts for the entire time here, 526 * we splx() while doing the actual copy to user space. 527 * Although the sockbuf is locked, new data may still be appended, 528 * and thus we must maintain consistency of the sockbuf during that time. 529 * 530 * The caller may receive the data as a single mbuf chain by supplying 531 * an mbuf **mp0 for use in returning the chain. The uio is then used 532 * only for the count in uio_resid. 533 */ 534 int 535 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 536 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 537 { 538 struct mbuf *m, **mp; 539 int flags, len, error, s, offset, moff, type, orig_resid; 540 struct protosw *pr; 541 struct mbuf *nextrecord; 542 543 pr = so->so_proto; 544 mp = mp0; 545 type = 0; 546 orig_resid = uio->uio_resid; 547 if (paddr) 548 *paddr = 0; 549 if (controlp) 550 *controlp = 0; 551 if (flagsp) 552 flags = *flagsp &~ MSG_EOR; 553 else 554 flags = 0; 555 if (flags & MSG_OOB) { 556 m = m_get(M_WAIT, MT_DATA); 557 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 558 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 559 (struct proc *)0); 560 if (error) 561 goto bad; 562 do { 563 error = uiomove(mtod(m, caddr_t), 564 (int) min(uio->uio_resid, m->m_len), uio); 565 m = m_free(m); 566 } while (uio->uio_resid && error == 0 && m); 567 bad: 568 if (m) 569 m_freem(m); 570 return (error); 571 } 572 if (mp) 573 *mp = (struct mbuf *)0; 574 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 575 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 576 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 577 578 restart: 579 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 580 return (error); 581 s = splsoftnet(); 582 583 m = so->so_rcv.sb_mb; 584 /* 585 * If we have less data than requested, block awaiting more 586 * (subject to any timeout) if: 587 * 1. the current count is less than the low water mark, 588 * 2. MSG_WAITALL is set, and it is possible to do the entire 589 * receive operation at once if we block (resid <= hiwat), or 590 * 3. MSG_DONTWAIT is not set. 591 * If MSG_WAITALL is set but resid is larger than the receive buffer, 592 * we have to do the receive in sections, and thus risk returning 593 * a short count if a timeout or signal occurs after we start. 594 */ 595 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 596 so->so_rcv.sb_cc < uio->uio_resid) && 597 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 598 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 599 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 600 #ifdef DIAGNOSTIC 601 if (m == 0 && so->so_rcv.sb_cc) 602 panic("receive 1"); 603 #endif 604 if (so->so_error) { 605 if (m) 606 goto dontblock; 607 error = so->so_error; 608 if ((flags & MSG_PEEK) == 0) 609 so->so_error = 0; 610 goto release; 611 } 612 if (so->so_state & SS_CANTRCVMORE) { 613 if (m) 614 goto dontblock; 615 else 616 goto release; 617 } 618 for (; m; m = m->m_next) 619 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 620 m = so->so_rcv.sb_mb; 621 goto dontblock; 622 } 623 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 624 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 625 error = ENOTCONN; 626 goto release; 627 } 628 if (uio->uio_resid == 0) 629 goto release; 630 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 631 error = EWOULDBLOCK; 632 goto release; 633 } 634 sbunlock(&so->so_rcv); 635 error = sbwait(&so->so_rcv); 636 splx(s); 637 if (error) 638 return (error); 639 goto restart; 640 } 641 dontblock: 642 #ifdef notyet /* XXXX */ 643 if (uio->uio_procp) 644 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 645 #endif 646 nextrecord = m->m_nextpkt; 647 if (pr->pr_flags & PR_ADDR) { 648 #ifdef DIAGNOSTIC 649 if (m->m_type != MT_SONAME) 650 panic("receive 1a"); 651 #endif 652 orig_resid = 0; 653 if (flags & MSG_PEEK) { 654 if (paddr) 655 *paddr = m_copy(m, 0, m->m_len); 656 m = m->m_next; 657 } else { 658 sbfree(&so->so_rcv, m); 659 if (paddr) { 660 *paddr = m; 661 so->so_rcv.sb_mb = m->m_next; 662 m->m_next = 0; 663 m = so->so_rcv.sb_mb; 664 } else { 665 MFREE(m, so->so_rcv.sb_mb); 666 m = so->so_rcv.sb_mb; 667 } 668 } 669 } 670 while (m && m->m_type == MT_CONTROL && error == 0) { 671 if (flags & MSG_PEEK) { 672 if (controlp) 673 *controlp = m_copy(m, 0, m->m_len); 674 m = m->m_next; 675 } else { 676 sbfree(&so->so_rcv, m); 677 if (controlp) { 678 if (pr->pr_domain->dom_externalize && 679 mtod(m, struct cmsghdr *)->cmsg_type == 680 SCM_RIGHTS) 681 error = (*pr->pr_domain->dom_externalize)(m); 682 *controlp = m; 683 so->so_rcv.sb_mb = m->m_next; 684 m->m_next = 0; 685 m = so->so_rcv.sb_mb; 686 } else { 687 MFREE(m, so->so_rcv.sb_mb); 688 m = so->so_rcv.sb_mb; 689 } 690 } 691 if (controlp) { 692 orig_resid = 0; 693 controlp = &(*controlp)->m_next; 694 } 695 } 696 if (m) { 697 if ((flags & MSG_PEEK) == 0) 698 m->m_nextpkt = nextrecord; 699 type = m->m_type; 700 if (type == MT_OOBDATA) 701 flags |= MSG_OOB; 702 } 703 moff = 0; 704 offset = 0; 705 while (m && uio->uio_resid > 0 && error == 0) { 706 if (m->m_type == MT_OOBDATA) { 707 if (type != MT_OOBDATA) 708 break; 709 } else if (type == MT_OOBDATA) 710 break; 711 #ifdef DIAGNOSTIC 712 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 713 panic("receive 3"); 714 #endif 715 so->so_state &= ~SS_RCVATMARK; 716 len = uio->uio_resid; 717 if (so->so_oobmark && len > so->so_oobmark - offset) 718 len = so->so_oobmark - offset; 719 if (len > m->m_len - moff) 720 len = m->m_len - moff; 721 /* 722 * If mp is set, just pass back the mbufs. 723 * Otherwise copy them out via the uio, then free. 724 * Sockbuf must be consistent here (points to current mbuf, 725 * it points to next record) when we drop priority; 726 * we must note any additions to the sockbuf when we 727 * block interrupts again. 728 */ 729 if (mp == 0) { 730 splx(s); 731 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 732 s = splsoftnet(); 733 if (error) 734 goto release; 735 } else 736 uio->uio_resid -= len; 737 if (len == m->m_len - moff) { 738 if (m->m_flags & M_EOR) 739 flags |= MSG_EOR; 740 if (flags & MSG_PEEK) { 741 m = m->m_next; 742 moff = 0; 743 } else { 744 nextrecord = m->m_nextpkt; 745 sbfree(&so->so_rcv, m); 746 if (mp) { 747 *mp = m; 748 mp = &m->m_next; 749 so->so_rcv.sb_mb = m = m->m_next; 750 *mp = (struct mbuf *)0; 751 } else { 752 MFREE(m, so->so_rcv.sb_mb); 753 m = so->so_rcv.sb_mb; 754 } 755 if (m) 756 m->m_nextpkt = nextrecord; 757 } 758 } else { 759 if (flags & MSG_PEEK) 760 moff += len; 761 else { 762 if (mp) 763 *mp = m_copym(m, 0, len, M_WAIT); 764 m->m_data += len; 765 m->m_len -= len; 766 so->so_rcv.sb_cc -= len; 767 } 768 } 769 if (so->so_oobmark) { 770 if ((flags & MSG_PEEK) == 0) { 771 so->so_oobmark -= len; 772 if (so->so_oobmark == 0) { 773 so->so_state |= SS_RCVATMARK; 774 break; 775 } 776 } else { 777 offset += len; 778 if (offset == so->so_oobmark) 779 break; 780 } 781 } 782 if (flags & MSG_EOR) 783 break; 784 /* 785 * If the MSG_WAITALL flag is set (for non-atomic socket), 786 * we must not quit until "uio->uio_resid == 0" or an error 787 * termination. If a signal/timeout occurs, return 788 * with a short count but without error. 789 * Keep sockbuf locked against other readers. 790 */ 791 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 792 !sosendallatonce(so) && !nextrecord) { 793 if (so->so_error || so->so_state & SS_CANTRCVMORE) 794 break; 795 error = sbwait(&so->so_rcv); 796 if (error) { 797 sbunlock(&so->so_rcv); 798 splx(s); 799 return (0); 800 } 801 if ((m = so->so_rcv.sb_mb) != NULL) 802 nextrecord = m->m_nextpkt; 803 } 804 } 805 806 if (m && pr->pr_flags & PR_ATOMIC) { 807 flags |= MSG_TRUNC; 808 if ((flags & MSG_PEEK) == 0) 809 (void) sbdroprecord(&so->so_rcv); 810 } 811 if ((flags & MSG_PEEK) == 0) { 812 if (m == 0) 813 so->so_rcv.sb_mb = nextrecord; 814 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 815 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 816 (struct mbuf *)(long)flags, (struct mbuf *)0, 817 (struct proc *)0); 818 } 819 if (orig_resid == uio->uio_resid && orig_resid && 820 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 821 sbunlock(&so->so_rcv); 822 splx(s); 823 goto restart; 824 } 825 826 if (flagsp) 827 *flagsp |= flags; 828 release: 829 sbunlock(&so->so_rcv); 830 splx(s); 831 return (error); 832 } 833 834 int 835 soshutdown(struct socket *so, int how) 836 { 837 struct protosw *pr; 838 839 pr = so->so_proto; 840 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 841 return (EINVAL); 842 843 if (how == SHUT_RD || how == SHUT_RDWR) 844 sorflush(so); 845 if (how == SHUT_WR || how == SHUT_RDWR) 846 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 847 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 848 return (0); 849 } 850 851 void 852 sorflush(struct socket *so) 853 { 854 struct sockbuf *sb, asb; 855 struct protosw *pr; 856 int s; 857 858 sb = &so->so_rcv; 859 pr = so->so_proto; 860 sb->sb_flags |= SB_NOINTR; 861 (void) sblock(sb, M_WAITOK); 862 s = splnet(); 863 socantrcvmore(so); 864 sbunlock(sb); 865 asb = *sb; 866 memset((caddr_t)sb, 0, sizeof(*sb)); 867 splx(s); 868 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 869 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 870 sbrelease(&asb); 871 } 872 873 int 874 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 875 { 876 int error; 877 struct mbuf *m; 878 879 error = 0; 880 m = m0; 881 if (level != SOL_SOCKET) { 882 if (so->so_proto && so->so_proto->pr_ctloutput) 883 return ((*so->so_proto->pr_ctloutput) 884 (PRCO_SETOPT, so, level, optname, &m0)); 885 error = ENOPROTOOPT; 886 } else { 887 switch (optname) { 888 889 case SO_LINGER: 890 if (m == NULL || m->m_len != sizeof(struct linger)) { 891 error = EINVAL; 892 goto bad; 893 } 894 so->so_linger = mtod(m, struct linger *)->l_linger; 895 /* fall thru... */ 896 897 case SO_DEBUG: 898 case SO_KEEPALIVE: 899 case SO_DONTROUTE: 900 case SO_USELOOPBACK: 901 case SO_BROADCAST: 902 case SO_REUSEADDR: 903 case SO_REUSEPORT: 904 case SO_OOBINLINE: 905 case SO_TIMESTAMP: 906 if (m == NULL || m->m_len < sizeof(int)) { 907 error = EINVAL; 908 goto bad; 909 } 910 if (*mtod(m, int *)) 911 so->so_options |= optname; 912 else 913 so->so_options &= ~optname; 914 break; 915 916 case SO_SNDBUF: 917 case SO_RCVBUF: 918 case SO_SNDLOWAT: 919 case SO_RCVLOWAT: 920 { 921 int optval; 922 923 if (m == NULL || m->m_len < sizeof(int)) { 924 error = EINVAL; 925 goto bad; 926 } 927 928 /* 929 * Values < 1 make no sense for any of these 930 * options, so disallow them. 931 */ 932 optval = *mtod(m, int *); 933 if (optval < 1) { 934 error = EINVAL; 935 goto bad; 936 } 937 938 switch (optname) { 939 940 case SO_SNDBUF: 941 case SO_RCVBUF: 942 if (sbreserve(optname == SO_SNDBUF ? 943 &so->so_snd : &so->so_rcv, 944 (u_long) optval) == 0) { 945 error = ENOBUFS; 946 goto bad; 947 } 948 break; 949 950 /* 951 * Make sure the low-water is never greater than 952 * the high-water. 953 */ 954 case SO_SNDLOWAT: 955 so->so_snd.sb_lowat = 956 (optval > so->so_snd.sb_hiwat) ? 957 so->so_snd.sb_hiwat : optval; 958 break; 959 case SO_RCVLOWAT: 960 so->so_rcv.sb_lowat = 961 (optval > so->so_rcv.sb_hiwat) ? 962 so->so_rcv.sb_hiwat : optval; 963 break; 964 } 965 break; 966 } 967 968 case SO_SNDTIMEO: 969 case SO_RCVTIMEO: 970 { 971 struct timeval *tv; 972 short val; 973 974 if (m == NULL || m->m_len < sizeof(*tv)) { 975 error = EINVAL; 976 goto bad; 977 } 978 tv = mtod(m, struct timeval *); 979 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 980 error = EDOM; 981 goto bad; 982 } 983 val = tv->tv_sec * hz + tv->tv_usec / tick; 984 985 switch (optname) { 986 987 case SO_SNDTIMEO: 988 so->so_snd.sb_timeo = val; 989 break; 990 case SO_RCVTIMEO: 991 so->so_rcv.sb_timeo = val; 992 break; 993 } 994 break; 995 } 996 997 default: 998 error = ENOPROTOOPT; 999 break; 1000 } 1001 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1002 (void) ((*so->so_proto->pr_ctloutput) 1003 (PRCO_SETOPT, so, level, optname, &m0)); 1004 m = NULL; /* freed by protocol */ 1005 } 1006 } 1007 bad: 1008 if (m) 1009 (void) m_free(m); 1010 return (error); 1011 } 1012 1013 int 1014 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1015 { 1016 struct mbuf *m; 1017 1018 if (level != SOL_SOCKET) { 1019 if (so->so_proto && so->so_proto->pr_ctloutput) { 1020 return ((*so->so_proto->pr_ctloutput) 1021 (PRCO_GETOPT, so, level, optname, mp)); 1022 } else 1023 return (ENOPROTOOPT); 1024 } else { 1025 m = m_get(M_WAIT, MT_SOOPTS); 1026 m->m_len = sizeof(int); 1027 1028 switch (optname) { 1029 1030 case SO_LINGER: 1031 m->m_len = sizeof(struct linger); 1032 mtod(m, struct linger *)->l_onoff = 1033 so->so_options & SO_LINGER; 1034 mtod(m, struct linger *)->l_linger = so->so_linger; 1035 break; 1036 1037 case SO_USELOOPBACK: 1038 case SO_DONTROUTE: 1039 case SO_DEBUG: 1040 case SO_KEEPALIVE: 1041 case SO_REUSEADDR: 1042 case SO_REUSEPORT: 1043 case SO_BROADCAST: 1044 case SO_OOBINLINE: 1045 case SO_TIMESTAMP: 1046 *mtod(m, int *) = so->so_options & optname; 1047 break; 1048 1049 case SO_TYPE: 1050 *mtod(m, int *) = so->so_type; 1051 break; 1052 1053 case SO_ERROR: 1054 *mtod(m, int *) = so->so_error; 1055 so->so_error = 0; 1056 break; 1057 1058 case SO_SNDBUF: 1059 *mtod(m, int *) = so->so_snd.sb_hiwat; 1060 break; 1061 1062 case SO_RCVBUF: 1063 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1064 break; 1065 1066 case SO_SNDLOWAT: 1067 *mtod(m, int *) = so->so_snd.sb_lowat; 1068 break; 1069 1070 case SO_RCVLOWAT: 1071 *mtod(m, int *) = so->so_rcv.sb_lowat; 1072 break; 1073 1074 case SO_SNDTIMEO: 1075 case SO_RCVTIMEO: 1076 { 1077 int val = (optname == SO_SNDTIMEO ? 1078 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1079 1080 m->m_len = sizeof(struct timeval); 1081 mtod(m, struct timeval *)->tv_sec = val / hz; 1082 mtod(m, struct timeval *)->tv_usec = 1083 (val % hz) * tick; 1084 break; 1085 } 1086 1087 default: 1088 (void)m_free(m); 1089 return (ENOPROTOOPT); 1090 } 1091 *mp = m; 1092 return (0); 1093 } 1094 } 1095 1096 void 1097 sohasoutofband(struct socket *so) 1098 { 1099 struct proc *p; 1100 1101 if (so->so_pgid < 0) 1102 gsignal(-so->so_pgid, SIGURG); 1103 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1104 psignal(p, SIGURG); 1105 selwakeup(&so->so_rcv.sb_sel); 1106 } 1107