1 /* $NetBSD: uipc_socket.c,v 1.59 2001/11/12 15:25:32 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.59 2001/11/12 15:25:32 lukem Exp $"); 40 41 #include "opt_compat_sunos.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/domain.h> 50 #include <sys/kernel.h> 51 #include <sys/protosw.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/signalvar.h> 55 #include <sys/resourcevar.h> 56 #include <sys/pool.h> 57 58 struct pool socket_pool; 59 60 extern int somaxconn; /* patchable (XXX sysctl) */ 61 int somaxconn = SOMAXCONN; 62 63 void 64 soinit(void) 65 { 66 67 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 68 "sockpl", 0, NULL, NULL, M_SOCKET); 69 } 70 71 /* 72 * Socket operation routines. 73 * These routines are called by the routines in 74 * sys_socket.c or from a system process, and 75 * implement the semantics of socket operations by 76 * switching out to the protocol specific routines. 77 */ 78 /*ARGSUSED*/ 79 int 80 socreate(int dom, struct socket **aso, int type, int proto) 81 { 82 struct proc *p; 83 struct protosw *prp; 84 struct socket *so; 85 int error, s; 86 87 p = curproc; /* XXX */ 88 if (proto) 89 prp = pffindproto(dom, proto, type); 90 else 91 prp = pffindtype(dom, type); 92 if (prp == 0 || prp->pr_usrreq == 0) 93 return (EPROTONOSUPPORT); 94 if (prp->pr_type != type) 95 return (EPROTOTYPE); 96 s = splsoftnet(); 97 so = pool_get(&socket_pool, PR_WAITOK); 98 memset((caddr_t)so, 0, sizeof(*so)); 99 TAILQ_INIT(&so->so_q0); 100 TAILQ_INIT(&so->so_q); 101 so->so_type = type; 102 so->so_proto = prp; 103 so->so_send = sosend; 104 so->so_receive = soreceive; 105 if (p != 0) 106 so->so_uid = p->p_ucred->cr_uid; 107 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 108 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 109 if (error) { 110 so->so_state |= SS_NOFDREF; 111 sofree(so); 112 splx(s); 113 return (error); 114 } 115 #ifdef COMPAT_SUNOS 116 { 117 extern struct emul emul_sunos; 118 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 119 so->so_options |= SO_BROADCAST; 120 } 121 #endif 122 splx(s); 123 *aso = so; 124 return (0); 125 } 126 127 int 128 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 129 { 130 int s, error; 131 132 s = splsoftnet(); 133 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 134 nam, (struct mbuf *)0, p); 135 splx(s); 136 return (error); 137 } 138 139 int 140 solisten(struct socket *so, int backlog) 141 { 142 int s, error; 143 144 s = splsoftnet(); 145 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 146 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 147 if (error) { 148 splx(s); 149 return (error); 150 } 151 if (so->so_q.tqh_first == NULL) 152 so->so_options |= SO_ACCEPTCONN; 153 if (backlog < 0) 154 backlog = 0; 155 so->so_qlimit = min(backlog, somaxconn); 156 splx(s); 157 return (0); 158 } 159 160 void 161 sofree(struct socket *so) 162 { 163 164 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 165 return; 166 if (so->so_head) { 167 /* 168 * We must not decommission a socket that's on the accept(2) 169 * queue. If we do, then accept(2) may hang after select(2) 170 * indicated that the listening socket was ready. 171 */ 172 if (!soqremque(so, 0)) 173 return; 174 } 175 sbrelease(&so->so_snd); 176 sorflush(so); 177 pool_put(&socket_pool, so); 178 } 179 180 /* 181 * Close a socket on last file table reference removal. 182 * Initiate disconnect if connected. 183 * Free socket when disconnect complete. 184 */ 185 int 186 soclose(struct socket *so) 187 { 188 struct socket *so2; 189 int s, error; 190 191 error = 0; 192 s = splsoftnet(); /* conservative */ 193 if (so->so_options & SO_ACCEPTCONN) { 194 while ((so2 = so->so_q0.tqh_first) != 0) { 195 (void) soqremque(so2, 0); 196 (void) soabort(so2); 197 } 198 while ((so2 = so->so_q.tqh_first) != 0) { 199 (void) soqremque(so2, 1); 200 (void) soabort(so2); 201 } 202 } 203 if (so->so_pcb == 0) 204 goto discard; 205 if (so->so_state & SS_ISCONNECTED) { 206 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 207 error = sodisconnect(so); 208 if (error) 209 goto drop; 210 } 211 if (so->so_options & SO_LINGER) { 212 if ((so->so_state & SS_ISDISCONNECTING) && 213 (so->so_state & SS_NBIO)) 214 goto drop; 215 while (so->so_state & SS_ISCONNECTED) { 216 error = tsleep((caddr_t)&so->so_timeo, 217 PSOCK | PCATCH, netcls, 218 so->so_linger * hz); 219 if (error) 220 break; 221 } 222 } 223 } 224 drop: 225 if (so->so_pcb) { 226 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 227 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 228 (struct proc *)0); 229 if (error == 0) 230 error = error2; 231 } 232 discard: 233 if (so->so_state & SS_NOFDREF) 234 panic("soclose: NOFDREF"); 235 so->so_state |= SS_NOFDREF; 236 sofree(so); 237 splx(s); 238 return (error); 239 } 240 241 /* 242 * Must be called at splsoftnet... 243 */ 244 int 245 soabort(struct socket *so) 246 { 247 248 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 249 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 250 } 251 252 int 253 soaccept(struct socket *so, struct mbuf *nam) 254 { 255 int s, error; 256 257 error = 0; 258 s = splsoftnet(); 259 if ((so->so_state & SS_NOFDREF) == 0) 260 panic("soaccept: !NOFDREF"); 261 so->so_state &= ~SS_NOFDREF; 262 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 263 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 264 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 265 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 266 else 267 error = ECONNABORTED; 268 269 splx(s); 270 return (error); 271 } 272 273 int 274 soconnect(struct socket *so, struct mbuf *nam) 275 { 276 struct proc *p; 277 int s, error; 278 279 p = curproc; /* XXX */ 280 if (so->so_options & SO_ACCEPTCONN) 281 return (EOPNOTSUPP); 282 s = splsoftnet(); 283 /* 284 * If protocol is connection-based, can only connect once. 285 * Otherwise, if connected, try to disconnect first. 286 * This allows user to disconnect by connecting to, e.g., 287 * a null address. 288 */ 289 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 290 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 291 (error = sodisconnect(so)))) 292 error = EISCONN; 293 else 294 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 295 (struct mbuf *)0, nam, (struct mbuf *)0, p); 296 splx(s); 297 return (error); 298 } 299 300 int 301 soconnect2(struct socket *so1, struct socket *so2) 302 { 303 int s, error; 304 305 s = splsoftnet(); 306 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 307 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 308 (struct proc *)0); 309 splx(s); 310 return (error); 311 } 312 313 int 314 sodisconnect(struct socket *so) 315 { 316 int s, error; 317 318 s = splsoftnet(); 319 if ((so->so_state & SS_ISCONNECTED) == 0) { 320 error = ENOTCONN; 321 goto bad; 322 } 323 if (so->so_state & SS_ISDISCONNECTING) { 324 error = EALREADY; 325 goto bad; 326 } 327 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 328 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 329 (struct proc *)0); 330 bad: 331 splx(s); 332 return (error); 333 } 334 335 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 336 /* 337 * Send on a socket. 338 * If send must go all at once and message is larger than 339 * send buffering, then hard error. 340 * Lock against other senders. 341 * If must go all at once and not enough room now, then 342 * inform user that this would block and do nothing. 343 * Otherwise, if nonblocking, send as much as possible. 344 * The data to be sent is described by "uio" if nonzero, 345 * otherwise by the mbuf chain "top" (which must be null 346 * if uio is not). Data provided in mbuf chain must be small 347 * enough to send all at once. 348 * 349 * Returns nonzero on error, timeout or signal; callers 350 * must check for short counts if EINTR/ERESTART are returned. 351 * Data and control buffers are freed on return. 352 */ 353 int 354 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 355 struct mbuf *control, int flags) 356 { 357 struct proc *p; 358 struct mbuf **mp, *m; 359 long space, len, resid, clen, mlen; 360 int error, s, dontroute, atomic; 361 362 p = curproc; /* XXX */ 363 clen = 0; 364 atomic = sosendallatonce(so) || top; 365 if (uio) 366 resid = uio->uio_resid; 367 else 368 resid = top->m_pkthdr.len; 369 /* 370 * In theory resid should be unsigned. 371 * However, space must be signed, as it might be less than 0 372 * if we over-committed, and we must use a signed comparison 373 * of space and resid. On the other hand, a negative resid 374 * causes us to loop sending 0-length segments to the protocol. 375 */ 376 if (resid < 0) { 377 error = EINVAL; 378 goto out; 379 } 380 dontroute = 381 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 382 (so->so_proto->pr_flags & PR_ATOMIC); 383 p->p_stats->p_ru.ru_msgsnd++; 384 if (control) 385 clen = control->m_len; 386 #define snderr(errno) { error = errno; splx(s); goto release; } 387 388 restart: 389 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 390 goto out; 391 do { 392 s = splsoftnet(); 393 if (so->so_state & SS_CANTSENDMORE) 394 snderr(EPIPE); 395 if (so->so_error) { 396 error = so->so_error; 397 so->so_error = 0; 398 splx(s); 399 goto release; 400 } 401 if ((so->so_state & SS_ISCONNECTED) == 0) { 402 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 403 if ((so->so_state & SS_ISCONFIRMING) == 0 && 404 !(resid == 0 && clen != 0)) 405 snderr(ENOTCONN); 406 } else if (addr == 0) 407 snderr(EDESTADDRREQ); 408 } 409 space = sbspace(&so->so_snd); 410 if (flags & MSG_OOB) 411 space += 1024; 412 if ((atomic && resid > so->so_snd.sb_hiwat) || 413 clen > so->so_snd.sb_hiwat) 414 snderr(EMSGSIZE); 415 if (space < resid + clen && uio && 416 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 417 if (so->so_state & SS_NBIO) 418 snderr(EWOULDBLOCK); 419 sbunlock(&so->so_snd); 420 error = sbwait(&so->so_snd); 421 splx(s); 422 if (error) 423 goto out; 424 goto restart; 425 } 426 splx(s); 427 mp = ⊤ 428 space -= clen; 429 do { 430 if (uio == NULL) { 431 /* 432 * Data is prepackaged in "top". 433 */ 434 resid = 0; 435 if (flags & MSG_EOR) 436 top->m_flags |= M_EOR; 437 } else do { 438 if (top == 0) { 439 MGETHDR(m, M_WAIT, MT_DATA); 440 mlen = MHLEN; 441 m->m_pkthdr.len = 0; 442 m->m_pkthdr.rcvif = (struct ifnet *)0; 443 } else { 444 MGET(m, M_WAIT, MT_DATA); 445 mlen = MLEN; 446 } 447 if (resid >= MINCLSIZE && space >= MCLBYTES) { 448 MCLGET(m, M_WAIT); 449 if ((m->m_flags & M_EXT) == 0) 450 goto nopages; 451 mlen = MCLBYTES; 452 #ifdef MAPPED_MBUFS 453 len = lmin(MCLBYTES, resid); 454 #else 455 if (atomic && top == 0) { 456 len = lmin(MCLBYTES - max_hdr, 457 resid); 458 m->m_data += max_hdr; 459 } else 460 len = lmin(MCLBYTES, resid); 461 #endif 462 space -= len; 463 } else { 464 nopages: 465 len = lmin(lmin(mlen, resid), space); 466 space -= len; 467 /* 468 * For datagram protocols, leave room 469 * for protocol headers in first mbuf. 470 */ 471 if (atomic && top == 0 && len < mlen) 472 MH_ALIGN(m, len); 473 } 474 error = uiomove(mtod(m, caddr_t), (int)len, 475 uio); 476 resid = uio->uio_resid; 477 m->m_len = len; 478 *mp = m; 479 top->m_pkthdr.len += len; 480 if (error) 481 goto release; 482 mp = &m->m_next; 483 if (resid <= 0) { 484 if (flags & MSG_EOR) 485 top->m_flags |= M_EOR; 486 break; 487 } 488 } while (space > 0 && atomic); 489 490 s = splsoftnet(); 491 492 if (so->so_state & SS_CANTSENDMORE) 493 snderr(EPIPE); 494 495 if (dontroute) 496 so->so_options |= SO_DONTROUTE; 497 if (resid > 0) 498 so->so_state |= SS_MORETOCOME; 499 error = (*so->so_proto->pr_usrreq)(so, 500 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 501 top, addr, control, p); 502 if (dontroute) 503 so->so_options &= ~SO_DONTROUTE; 504 if (resid > 0) 505 so->so_state &= ~SS_MORETOCOME; 506 splx(s); 507 508 clen = 0; 509 control = 0; 510 top = 0; 511 mp = ⊤ 512 if (error) 513 goto release; 514 } while (resid && space > 0); 515 } while (resid); 516 517 release: 518 sbunlock(&so->so_snd); 519 out: 520 if (top) 521 m_freem(top); 522 if (control) 523 m_freem(control); 524 return (error); 525 } 526 527 /* 528 * Implement receive operations on a socket. 529 * We depend on the way that records are added to the sockbuf 530 * by sbappend*. In particular, each record (mbufs linked through m_next) 531 * must begin with an address if the protocol so specifies, 532 * followed by an optional mbuf or mbufs containing ancillary data, 533 * and then zero or more mbufs of data. 534 * In order to avoid blocking network interrupts for the entire time here, 535 * we splx() while doing the actual copy to user space. 536 * Although the sockbuf is locked, new data may still be appended, 537 * and thus we must maintain consistency of the sockbuf during that time. 538 * 539 * The caller may receive the data as a single mbuf chain by supplying 540 * an mbuf **mp0 for use in returning the chain. The uio is then used 541 * only for the count in uio_resid. 542 */ 543 int 544 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 545 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 546 { 547 struct mbuf *m, **mp; 548 int flags, len, error, s, offset, moff, type, orig_resid; 549 struct protosw *pr; 550 struct mbuf *nextrecord; 551 552 pr = so->so_proto; 553 mp = mp0; 554 type = 0; 555 orig_resid = uio->uio_resid; 556 if (paddr) 557 *paddr = 0; 558 if (controlp) 559 *controlp = 0; 560 if (flagsp) 561 flags = *flagsp &~ MSG_EOR; 562 else 563 flags = 0; 564 if (flags & MSG_OOB) { 565 m = m_get(M_WAIT, MT_DATA); 566 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 567 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 568 (struct proc *)0); 569 if (error) 570 goto bad; 571 do { 572 error = uiomove(mtod(m, caddr_t), 573 (int) min(uio->uio_resid, m->m_len), uio); 574 m = m_free(m); 575 } while (uio->uio_resid && error == 0 && m); 576 bad: 577 if (m) 578 m_freem(m); 579 return (error); 580 } 581 if (mp) 582 *mp = (struct mbuf *)0; 583 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 584 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 585 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 586 587 restart: 588 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 589 return (error); 590 s = splsoftnet(); 591 592 m = so->so_rcv.sb_mb; 593 /* 594 * If we have less data than requested, block awaiting more 595 * (subject to any timeout) if: 596 * 1. the current count is less than the low water mark, 597 * 2. MSG_WAITALL is set, and it is possible to do the entire 598 * receive operation at once if we block (resid <= hiwat), or 599 * 3. MSG_DONTWAIT is not set. 600 * If MSG_WAITALL is set but resid is larger than the receive buffer, 601 * we have to do the receive in sections, and thus risk returning 602 * a short count if a timeout or signal occurs after we start. 603 */ 604 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 605 so->so_rcv.sb_cc < uio->uio_resid) && 606 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 607 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 608 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 609 #ifdef DIAGNOSTIC 610 if (m == 0 && so->so_rcv.sb_cc) 611 panic("receive 1"); 612 #endif 613 if (so->so_error) { 614 if (m) 615 goto dontblock; 616 error = so->so_error; 617 if ((flags & MSG_PEEK) == 0) 618 so->so_error = 0; 619 goto release; 620 } 621 if (so->so_state & SS_CANTRCVMORE) { 622 if (m) 623 goto dontblock; 624 else 625 goto release; 626 } 627 for (; m; m = m->m_next) 628 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 629 m = so->so_rcv.sb_mb; 630 goto dontblock; 631 } 632 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 633 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 634 error = ENOTCONN; 635 goto release; 636 } 637 if (uio->uio_resid == 0) 638 goto release; 639 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 640 error = EWOULDBLOCK; 641 goto release; 642 } 643 sbunlock(&so->so_rcv); 644 error = sbwait(&so->so_rcv); 645 splx(s); 646 if (error) 647 return (error); 648 goto restart; 649 } 650 dontblock: 651 #ifdef notyet /* XXXX */ 652 if (uio->uio_procp) 653 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 654 #endif 655 nextrecord = m->m_nextpkt; 656 if (pr->pr_flags & PR_ADDR) { 657 #ifdef DIAGNOSTIC 658 if (m->m_type != MT_SONAME) 659 panic("receive 1a"); 660 #endif 661 orig_resid = 0; 662 if (flags & MSG_PEEK) { 663 if (paddr) 664 *paddr = m_copy(m, 0, m->m_len); 665 m = m->m_next; 666 } else { 667 sbfree(&so->so_rcv, m); 668 if (paddr) { 669 *paddr = m; 670 so->so_rcv.sb_mb = m->m_next; 671 m->m_next = 0; 672 m = so->so_rcv.sb_mb; 673 } else { 674 MFREE(m, so->so_rcv.sb_mb); 675 m = so->so_rcv.sb_mb; 676 } 677 } 678 } 679 while (m && m->m_type == MT_CONTROL && error == 0) { 680 if (flags & MSG_PEEK) { 681 if (controlp) 682 *controlp = m_copy(m, 0, m->m_len); 683 m = m->m_next; 684 } else { 685 sbfree(&so->so_rcv, m); 686 if (controlp) { 687 if (pr->pr_domain->dom_externalize && 688 mtod(m, struct cmsghdr *)->cmsg_type == 689 SCM_RIGHTS) 690 error = (*pr->pr_domain->dom_externalize)(m); 691 *controlp = m; 692 so->so_rcv.sb_mb = m->m_next; 693 m->m_next = 0; 694 m = so->so_rcv.sb_mb; 695 } else { 696 MFREE(m, so->so_rcv.sb_mb); 697 m = so->so_rcv.sb_mb; 698 } 699 } 700 if (controlp) { 701 orig_resid = 0; 702 controlp = &(*controlp)->m_next; 703 } 704 } 705 if (m) { 706 if ((flags & MSG_PEEK) == 0) 707 m->m_nextpkt = nextrecord; 708 type = m->m_type; 709 if (type == MT_OOBDATA) 710 flags |= MSG_OOB; 711 } 712 moff = 0; 713 offset = 0; 714 while (m && uio->uio_resid > 0 && error == 0) { 715 if (m->m_type == MT_OOBDATA) { 716 if (type != MT_OOBDATA) 717 break; 718 } else if (type == MT_OOBDATA) 719 break; 720 #ifdef DIAGNOSTIC 721 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 722 panic("receive 3"); 723 #endif 724 so->so_state &= ~SS_RCVATMARK; 725 len = uio->uio_resid; 726 if (so->so_oobmark && len > so->so_oobmark - offset) 727 len = so->so_oobmark - offset; 728 if (len > m->m_len - moff) 729 len = m->m_len - moff; 730 /* 731 * If mp is set, just pass back the mbufs. 732 * Otherwise copy them out via the uio, then free. 733 * Sockbuf must be consistent here (points to current mbuf, 734 * it points to next record) when we drop priority; 735 * we must note any additions to the sockbuf when we 736 * block interrupts again. 737 */ 738 if (mp == 0) { 739 splx(s); 740 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 741 s = splsoftnet(); 742 if (error) 743 goto release; 744 } else 745 uio->uio_resid -= len; 746 if (len == m->m_len - moff) { 747 if (m->m_flags & M_EOR) 748 flags |= MSG_EOR; 749 if (flags & MSG_PEEK) { 750 m = m->m_next; 751 moff = 0; 752 } else { 753 nextrecord = m->m_nextpkt; 754 sbfree(&so->so_rcv, m); 755 if (mp) { 756 *mp = m; 757 mp = &m->m_next; 758 so->so_rcv.sb_mb = m = m->m_next; 759 *mp = (struct mbuf *)0; 760 } else { 761 MFREE(m, so->so_rcv.sb_mb); 762 m = so->so_rcv.sb_mb; 763 } 764 if (m) 765 m->m_nextpkt = nextrecord; 766 } 767 } else { 768 if (flags & MSG_PEEK) 769 moff += len; 770 else { 771 if (mp) 772 *mp = m_copym(m, 0, len, M_WAIT); 773 m->m_data += len; 774 m->m_len -= len; 775 so->so_rcv.sb_cc -= len; 776 } 777 } 778 if (so->so_oobmark) { 779 if ((flags & MSG_PEEK) == 0) { 780 so->so_oobmark -= len; 781 if (so->so_oobmark == 0) { 782 so->so_state |= SS_RCVATMARK; 783 break; 784 } 785 } else { 786 offset += len; 787 if (offset == so->so_oobmark) 788 break; 789 } 790 } 791 if (flags & MSG_EOR) 792 break; 793 /* 794 * If the MSG_WAITALL flag is set (for non-atomic socket), 795 * we must not quit until "uio->uio_resid == 0" or an error 796 * termination. If a signal/timeout occurs, return 797 * with a short count but without error. 798 * Keep sockbuf locked against other readers. 799 */ 800 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 801 !sosendallatonce(so) && !nextrecord) { 802 if (so->so_error || so->so_state & SS_CANTRCVMORE) 803 break; 804 error = sbwait(&so->so_rcv); 805 if (error) { 806 sbunlock(&so->so_rcv); 807 splx(s); 808 return (0); 809 } 810 if ((m = so->so_rcv.sb_mb) != NULL) 811 nextrecord = m->m_nextpkt; 812 } 813 } 814 815 if (m && pr->pr_flags & PR_ATOMIC) { 816 flags |= MSG_TRUNC; 817 if ((flags & MSG_PEEK) == 0) 818 (void) sbdroprecord(&so->so_rcv); 819 } 820 if ((flags & MSG_PEEK) == 0) { 821 if (m == 0) 822 so->so_rcv.sb_mb = nextrecord; 823 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 824 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 825 (struct mbuf *)(long)flags, (struct mbuf *)0, 826 (struct proc *)0); 827 } 828 if (orig_resid == uio->uio_resid && orig_resid && 829 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 830 sbunlock(&so->so_rcv); 831 splx(s); 832 goto restart; 833 } 834 835 if (flagsp) 836 *flagsp |= flags; 837 release: 838 sbunlock(&so->so_rcv); 839 splx(s); 840 return (error); 841 } 842 843 int 844 soshutdown(struct socket *so, int how) 845 { 846 struct protosw *pr; 847 848 pr = so->so_proto; 849 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 850 return (EINVAL); 851 852 if (how == SHUT_RD || how == SHUT_RDWR) 853 sorflush(so); 854 if (how == SHUT_WR || how == SHUT_RDWR) 855 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 856 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 857 return (0); 858 } 859 860 void 861 sorflush(struct socket *so) 862 { 863 struct sockbuf *sb, asb; 864 struct protosw *pr; 865 int s; 866 867 sb = &so->so_rcv; 868 pr = so->so_proto; 869 sb->sb_flags |= SB_NOINTR; 870 (void) sblock(sb, M_WAITOK); 871 s = splnet(); 872 socantrcvmore(so); 873 sbunlock(sb); 874 asb = *sb; 875 memset((caddr_t)sb, 0, sizeof(*sb)); 876 splx(s); 877 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 878 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 879 sbrelease(&asb); 880 } 881 882 int 883 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 884 { 885 int error; 886 struct mbuf *m; 887 888 error = 0; 889 m = m0; 890 if (level != SOL_SOCKET) { 891 if (so->so_proto && so->so_proto->pr_ctloutput) 892 return ((*so->so_proto->pr_ctloutput) 893 (PRCO_SETOPT, so, level, optname, &m0)); 894 error = ENOPROTOOPT; 895 } else { 896 switch (optname) { 897 898 case SO_LINGER: 899 if (m == NULL || m->m_len != sizeof(struct linger)) { 900 error = EINVAL; 901 goto bad; 902 } 903 so->so_linger = mtod(m, struct linger *)->l_linger; 904 /* fall thru... */ 905 906 case SO_DEBUG: 907 case SO_KEEPALIVE: 908 case SO_DONTROUTE: 909 case SO_USELOOPBACK: 910 case SO_BROADCAST: 911 case SO_REUSEADDR: 912 case SO_REUSEPORT: 913 case SO_OOBINLINE: 914 case SO_TIMESTAMP: 915 if (m == NULL || m->m_len < sizeof(int)) { 916 error = EINVAL; 917 goto bad; 918 } 919 if (*mtod(m, int *)) 920 so->so_options |= optname; 921 else 922 so->so_options &= ~optname; 923 break; 924 925 case SO_SNDBUF: 926 case SO_RCVBUF: 927 case SO_SNDLOWAT: 928 case SO_RCVLOWAT: 929 { 930 int optval; 931 932 if (m == NULL || m->m_len < sizeof(int)) { 933 error = EINVAL; 934 goto bad; 935 } 936 937 /* 938 * Values < 1 make no sense for any of these 939 * options, so disallow them. 940 */ 941 optval = *mtod(m, int *); 942 if (optval < 1) { 943 error = EINVAL; 944 goto bad; 945 } 946 947 switch (optname) { 948 949 case SO_SNDBUF: 950 case SO_RCVBUF: 951 if (sbreserve(optname == SO_SNDBUF ? 952 &so->so_snd : &so->so_rcv, 953 (u_long) optval) == 0) { 954 error = ENOBUFS; 955 goto bad; 956 } 957 break; 958 959 /* 960 * Make sure the low-water is never greater than 961 * the high-water. 962 */ 963 case SO_SNDLOWAT: 964 so->so_snd.sb_lowat = 965 (optval > so->so_snd.sb_hiwat) ? 966 so->so_snd.sb_hiwat : optval; 967 break; 968 case SO_RCVLOWAT: 969 so->so_rcv.sb_lowat = 970 (optval > so->so_rcv.sb_hiwat) ? 971 so->so_rcv.sb_hiwat : optval; 972 break; 973 } 974 break; 975 } 976 977 case SO_SNDTIMEO: 978 case SO_RCVTIMEO: 979 { 980 struct timeval *tv; 981 short val; 982 983 if (m == NULL || m->m_len < sizeof(*tv)) { 984 error = EINVAL; 985 goto bad; 986 } 987 tv = mtod(m, struct timeval *); 988 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 989 error = EDOM; 990 goto bad; 991 } 992 val = tv->tv_sec * hz + tv->tv_usec / tick; 993 994 switch (optname) { 995 996 case SO_SNDTIMEO: 997 so->so_snd.sb_timeo = val; 998 break; 999 case SO_RCVTIMEO: 1000 so->so_rcv.sb_timeo = val; 1001 break; 1002 } 1003 break; 1004 } 1005 1006 default: 1007 error = ENOPROTOOPT; 1008 break; 1009 } 1010 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1011 (void) ((*so->so_proto->pr_ctloutput) 1012 (PRCO_SETOPT, so, level, optname, &m0)); 1013 m = NULL; /* freed by protocol */ 1014 } 1015 } 1016 bad: 1017 if (m) 1018 (void) m_free(m); 1019 return (error); 1020 } 1021 1022 int 1023 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1024 { 1025 struct mbuf *m; 1026 1027 if (level != SOL_SOCKET) { 1028 if (so->so_proto && so->so_proto->pr_ctloutput) { 1029 return ((*so->so_proto->pr_ctloutput) 1030 (PRCO_GETOPT, so, level, optname, mp)); 1031 } else 1032 return (ENOPROTOOPT); 1033 } else { 1034 m = m_get(M_WAIT, MT_SOOPTS); 1035 m->m_len = sizeof(int); 1036 1037 switch (optname) { 1038 1039 case SO_LINGER: 1040 m->m_len = sizeof(struct linger); 1041 mtod(m, struct linger *)->l_onoff = 1042 so->so_options & SO_LINGER; 1043 mtod(m, struct linger *)->l_linger = so->so_linger; 1044 break; 1045 1046 case SO_USELOOPBACK: 1047 case SO_DONTROUTE: 1048 case SO_DEBUG: 1049 case SO_KEEPALIVE: 1050 case SO_REUSEADDR: 1051 case SO_REUSEPORT: 1052 case SO_BROADCAST: 1053 case SO_OOBINLINE: 1054 case SO_TIMESTAMP: 1055 *mtod(m, int *) = so->so_options & optname; 1056 break; 1057 1058 case SO_TYPE: 1059 *mtod(m, int *) = so->so_type; 1060 break; 1061 1062 case SO_ERROR: 1063 *mtod(m, int *) = so->so_error; 1064 so->so_error = 0; 1065 break; 1066 1067 case SO_SNDBUF: 1068 *mtod(m, int *) = so->so_snd.sb_hiwat; 1069 break; 1070 1071 case SO_RCVBUF: 1072 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1073 break; 1074 1075 case SO_SNDLOWAT: 1076 *mtod(m, int *) = so->so_snd.sb_lowat; 1077 break; 1078 1079 case SO_RCVLOWAT: 1080 *mtod(m, int *) = so->so_rcv.sb_lowat; 1081 break; 1082 1083 case SO_SNDTIMEO: 1084 case SO_RCVTIMEO: 1085 { 1086 int val = (optname == SO_SNDTIMEO ? 1087 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1088 1089 m->m_len = sizeof(struct timeval); 1090 mtod(m, struct timeval *)->tv_sec = val / hz; 1091 mtod(m, struct timeval *)->tv_usec = 1092 (val % hz) * tick; 1093 break; 1094 } 1095 1096 default: 1097 (void)m_free(m); 1098 return (ENOPROTOOPT); 1099 } 1100 *mp = m; 1101 return (0); 1102 } 1103 } 1104 1105 void 1106 sohasoutofband(struct socket *so) 1107 { 1108 struct proc *p; 1109 1110 if (so->so_pgid < 0) 1111 gsignal(-so->so_pgid, SIGURG); 1112 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1113 psignal(p, SIGURG); 1114 selwakeup(&so->so_rcv.sb_sel); 1115 } 1116