1 /* $NetBSD: uipc_socket.c,v 1.50 2000/03/30 09:27:14 augustss Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include "opt_compat_sunos.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/domain.h> 47 #include <sys/kernel.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 struct pool socket_pool; 56 57 extern int somaxconn; /* patchable (XXX sysctl) */ 58 int somaxconn = SOMAXCONN; 59 60 void 61 soinit() 62 { 63 64 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 65 "sockpl", 0, NULL, NULL, M_SOCKET); 66 } 67 68 /* 69 * Socket operation routines. 70 * These routines are called by the routines in 71 * sys_socket.c or from a system process, and 72 * implement the semantics of socket operations by 73 * switching out to the protocol specific routines. 74 */ 75 /*ARGSUSED*/ 76 int 77 socreate(dom, aso, type, proto) 78 int dom; 79 struct socket **aso; 80 int type; 81 int proto; 82 { 83 struct proc *p = curproc; /* XXX */ 84 struct protosw *prp; 85 struct socket *so; 86 int error; 87 int s; 88 89 if (proto) 90 prp = pffindproto(dom, proto, type); 91 else 92 prp = pffindtype(dom, type); 93 if (prp == 0 || prp->pr_usrreq == 0) 94 return (EPROTONOSUPPORT); 95 if (prp->pr_type != type) 96 return (EPROTOTYPE); 97 s = splsoftnet(); 98 so = pool_get(&socket_pool, PR_WAITOK); 99 memset((caddr_t)so, 0, sizeof(*so)); 100 TAILQ_INIT(&so->so_q0); 101 TAILQ_INIT(&so->so_q); 102 so->so_type = type; 103 so->so_proto = prp; 104 so->so_send = sosend; 105 so->so_receive = soreceive; 106 if (p != 0) 107 so->so_uid = p->p_ucred->cr_uid; 108 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 109 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 110 if (error) { 111 so->so_state |= SS_NOFDREF; 112 sofree(so); 113 splx(s); 114 return (error); 115 } 116 #ifdef COMPAT_SUNOS 117 { 118 extern struct emul emul_sunos; 119 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 120 so->so_options |= SO_BROADCAST; 121 } 122 #endif 123 splx(s); 124 *aso = so; 125 return (0); 126 } 127 128 int 129 sobind(so, nam) 130 struct socket *so; 131 struct mbuf *nam; 132 { 133 struct proc *p = curproc; /* XXX */ 134 int s = splsoftnet(); 135 int error; 136 137 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 138 nam, (struct mbuf *)0, p); 139 splx(s); 140 return (error); 141 } 142 143 int 144 solisten(so, backlog) 145 struct socket *so; 146 int backlog; 147 { 148 int s = splsoftnet(), error; 149 150 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 151 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 152 if (error) { 153 splx(s); 154 return (error); 155 } 156 if (so->so_q.tqh_first == NULL) 157 so->so_options |= SO_ACCEPTCONN; 158 if (backlog < 0) 159 backlog = 0; 160 so->so_qlimit = min(backlog, somaxconn); 161 splx(s); 162 return (0); 163 } 164 165 void 166 sofree(so) 167 struct socket *so; 168 { 169 170 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 171 return; 172 if (so->so_head) { 173 /* 174 * We must not decommission a socket that's on the accept(2) 175 * queue. If we do, then accept(2) may hang after select(2) 176 * indicated that the listening socket was ready. 177 */ 178 if (!soqremque(so, 0)) 179 return; 180 } 181 sbrelease(&so->so_snd); 182 sorflush(so); 183 pool_put(&socket_pool, so); 184 } 185 186 /* 187 * Close a socket on last file table reference removal. 188 * Initiate disconnect if connected. 189 * Free socket when disconnect complete. 190 */ 191 int 192 soclose(so) 193 struct socket *so; 194 { 195 struct socket *so2; 196 int s = splsoftnet(); /* conservative */ 197 int error = 0; 198 199 if (so->so_options & SO_ACCEPTCONN) { 200 while ((so2 = so->so_q0.tqh_first) != 0) { 201 (void) soqremque(so2, 0); 202 (void) soabort(so2); 203 } 204 while ((so2 = so->so_q.tqh_first) != 0) { 205 (void) soqremque(so2, 1); 206 (void) soabort(so2); 207 } 208 } 209 if (so->so_pcb == 0) 210 goto discard; 211 if (so->so_state & SS_ISCONNECTED) { 212 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 213 error = sodisconnect(so); 214 if (error) 215 goto drop; 216 } 217 if (so->so_options & SO_LINGER) { 218 if ((so->so_state & SS_ISDISCONNECTING) && 219 (so->so_state & SS_NBIO)) 220 goto drop; 221 while (so->so_state & SS_ISCONNECTED) { 222 error = tsleep((caddr_t)&so->so_timeo, 223 PSOCK | PCATCH, netcls, 224 so->so_linger * hz); 225 if (error) 226 break; 227 } 228 } 229 } 230 drop: 231 if (so->so_pcb) { 232 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 233 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 234 (struct proc *)0); 235 if (error == 0) 236 error = error2; 237 } 238 discard: 239 if (so->so_state & SS_NOFDREF) 240 panic("soclose: NOFDREF"); 241 so->so_state |= SS_NOFDREF; 242 sofree(so); 243 splx(s); 244 return (error); 245 } 246 247 /* 248 * Must be called at splsoftnet... 249 */ 250 int 251 soabort(so) 252 struct socket *so; 253 { 254 255 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 256 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 257 } 258 259 int 260 soaccept(so, nam) 261 struct socket *so; 262 struct mbuf *nam; 263 { 264 int s = splsoftnet(); 265 int error; 266 267 if ((so->so_state & SS_NOFDREF) == 0) 268 panic("soaccept: !NOFDREF"); 269 so->so_state &= ~SS_NOFDREF; 270 if ((so->so_state & SS_ISDISCONNECTED) == 0) 271 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 272 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 273 else 274 error = 0; 275 splx(s); 276 return (error); 277 } 278 279 int 280 soconnect(so, nam) 281 struct socket *so; 282 struct mbuf *nam; 283 { 284 struct proc *p = curproc; /* XXX */ 285 int s; 286 int error; 287 288 if (so->so_options & SO_ACCEPTCONN) 289 return (EOPNOTSUPP); 290 s = splsoftnet(); 291 /* 292 * If protocol is connection-based, can only connect once. 293 * Otherwise, if connected, try to disconnect first. 294 * This allows user to disconnect by connecting to, e.g., 295 * a null address. 296 */ 297 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 298 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 299 (error = sodisconnect(so)))) 300 error = EISCONN; 301 else 302 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 303 (struct mbuf *)0, nam, (struct mbuf *)0, p); 304 splx(s); 305 return (error); 306 } 307 308 int 309 soconnect2(so1, so2) 310 struct socket *so1; 311 struct socket *so2; 312 { 313 int s = splsoftnet(); 314 int error; 315 316 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 317 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 318 (struct proc *)0); 319 splx(s); 320 return (error); 321 } 322 323 int 324 sodisconnect(so) 325 struct socket *so; 326 { 327 int s = splsoftnet(); 328 int error; 329 330 if ((so->so_state & SS_ISCONNECTED) == 0) { 331 error = ENOTCONN; 332 goto bad; 333 } 334 if (so->so_state & SS_ISDISCONNECTING) { 335 error = EALREADY; 336 goto bad; 337 } 338 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 339 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 340 (struct proc *)0); 341 bad: 342 splx(s); 343 return (error); 344 } 345 346 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 347 /* 348 * Send on a socket. 349 * If send must go all at once and message is larger than 350 * send buffering, then hard error. 351 * Lock against other senders. 352 * If must go all at once and not enough room now, then 353 * inform user that this would block and do nothing. 354 * Otherwise, if nonblocking, send as much as possible. 355 * The data to be sent is described by "uio" if nonzero, 356 * otherwise by the mbuf chain "top" (which must be null 357 * if uio is not). Data provided in mbuf chain must be small 358 * enough to send all at once. 359 * 360 * Returns nonzero on error, timeout or signal; callers 361 * must check for short counts if EINTR/ERESTART are returned. 362 * Data and control buffers are freed on return. 363 */ 364 int 365 sosend(so, addr, uio, top, control, flags) 366 struct socket *so; 367 struct mbuf *addr; 368 struct uio *uio; 369 struct mbuf *top; 370 struct mbuf *control; 371 int flags; 372 { 373 struct proc *p = curproc; /* XXX */ 374 struct mbuf **mp; 375 struct mbuf *m; 376 long space, len, resid; 377 int clen = 0, error, s, dontroute, mlen; 378 int atomic = sosendallatonce(so) || top; 379 380 if (uio) 381 resid = uio->uio_resid; 382 else 383 resid = top->m_pkthdr.len; 384 /* 385 * In theory resid should be unsigned. 386 * However, space must be signed, as it might be less than 0 387 * if we over-committed, and we must use a signed comparison 388 * of space and resid. On the other hand, a negative resid 389 * causes us to loop sending 0-length segments to the protocol. 390 */ 391 if (resid < 0) { 392 error = EINVAL; 393 goto out; 394 } 395 dontroute = 396 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 397 (so->so_proto->pr_flags & PR_ATOMIC); 398 p->p_stats->p_ru.ru_msgsnd++; 399 if (control) 400 clen = control->m_len; 401 #define snderr(errno) { error = errno; splx(s); goto release; } 402 403 restart: 404 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 405 goto out; 406 do { 407 s = splsoftnet(); 408 if (so->so_state & SS_CANTSENDMORE) 409 snderr(EPIPE); 410 if (so->so_error) { 411 error = so->so_error; 412 so->so_error = 0; 413 splx(s); 414 goto release; 415 } 416 if ((so->so_state & SS_ISCONNECTED) == 0) { 417 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 418 if ((so->so_state & SS_ISCONFIRMING) == 0 && 419 !(resid == 0 && clen != 0)) 420 snderr(ENOTCONN); 421 } else if (addr == 0) 422 snderr(EDESTADDRREQ); 423 } 424 space = sbspace(&so->so_snd); 425 if (flags & MSG_OOB) 426 space += 1024; 427 if ((atomic && resid > so->so_snd.sb_hiwat) || 428 clen > so->so_snd.sb_hiwat) 429 snderr(EMSGSIZE); 430 if (space < resid + clen && uio && 431 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 432 if (so->so_state & SS_NBIO) 433 snderr(EWOULDBLOCK); 434 sbunlock(&so->so_snd); 435 error = sbwait(&so->so_snd); 436 splx(s); 437 if (error) 438 goto out; 439 goto restart; 440 } 441 splx(s); 442 mp = ⊤ 443 space -= clen; 444 do { 445 if (uio == NULL) { 446 /* 447 * Data is prepackaged in "top". 448 */ 449 resid = 0; 450 if (flags & MSG_EOR) 451 top->m_flags |= M_EOR; 452 } else do { 453 if (top == 0) { 454 MGETHDR(m, M_WAIT, MT_DATA); 455 mlen = MHLEN; 456 m->m_pkthdr.len = 0; 457 m->m_pkthdr.rcvif = (struct ifnet *)0; 458 } else { 459 MGET(m, M_WAIT, MT_DATA); 460 mlen = MLEN; 461 } 462 if (resid >= MINCLSIZE && space >= MCLBYTES) { 463 MCLGET(m, M_WAIT); 464 if ((m->m_flags & M_EXT) == 0) 465 goto nopages; 466 mlen = MCLBYTES; 467 #ifdef MAPPED_MBUFS 468 len = min(MCLBYTES, resid); 469 #else 470 if (atomic && top == 0) { 471 len = min(MCLBYTES - max_hdr, resid); 472 m->m_data += max_hdr; 473 } else 474 len = min(MCLBYTES, resid); 475 #endif 476 space -= len; 477 } else { 478 nopages: 479 len = min(min(mlen, resid), space); 480 space -= len; 481 /* 482 * For datagram protocols, leave room 483 * for protocol headers in first mbuf. 484 */ 485 if (atomic && top == 0 && len < mlen) 486 MH_ALIGN(m, len); 487 } 488 error = uiomove(mtod(m, caddr_t), (int)len, uio); 489 resid = uio->uio_resid; 490 m->m_len = len; 491 *mp = m; 492 top->m_pkthdr.len += len; 493 if (error) 494 goto release; 495 mp = &m->m_next; 496 if (resid <= 0) { 497 if (flags & MSG_EOR) 498 top->m_flags |= M_EOR; 499 break; 500 } 501 } while (space > 0 && atomic); 502 503 s = splsoftnet(); 504 505 if (so->so_state & SS_CANTSENDMORE) 506 snderr(EPIPE); 507 508 if (dontroute) 509 so->so_options |= SO_DONTROUTE; 510 if (resid > 0) 511 so->so_state |= SS_MORETOCOME; 512 error = (*so->so_proto->pr_usrreq)(so, 513 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 514 top, addr, control, p); 515 if (dontroute) 516 so->so_options &= ~SO_DONTROUTE; 517 if (resid > 0) 518 so->so_state &= ~SS_MORETOCOME; 519 splx(s); 520 521 clen = 0; 522 control = 0; 523 top = 0; 524 mp = ⊤ 525 if (error) 526 goto release; 527 } while (resid && space > 0); 528 } while (resid); 529 530 release: 531 sbunlock(&so->so_snd); 532 out: 533 if (top) 534 m_freem(top); 535 if (control) 536 m_freem(control); 537 return (error); 538 } 539 540 /* 541 * Implement receive operations on a socket. 542 * We depend on the way that records are added to the sockbuf 543 * by sbappend*. In particular, each record (mbufs linked through m_next) 544 * must begin with an address if the protocol so specifies, 545 * followed by an optional mbuf or mbufs containing ancillary data, 546 * and then zero or more mbufs of data. 547 * In order to avoid blocking network interrupts for the entire time here, 548 * we splx() while doing the actual copy to user space. 549 * Although the sockbuf is locked, new data may still be appended, 550 * and thus we must maintain consistency of the sockbuf during that time. 551 * 552 * The caller may receive the data as a single mbuf chain by supplying 553 * an mbuf **mp0 for use in returning the chain. The uio is then used 554 * only for the count in uio_resid. 555 */ 556 int 557 soreceive(so, paddr, uio, mp0, controlp, flagsp) 558 struct socket *so; 559 struct mbuf **paddr; 560 struct uio *uio; 561 struct mbuf **mp0; 562 struct mbuf **controlp; 563 int *flagsp; 564 { 565 struct mbuf *m, **mp; 566 int flags, len, error, s, offset; 567 struct protosw *pr = so->so_proto; 568 struct mbuf *nextrecord; 569 int moff, type = 0; 570 int orig_resid = uio->uio_resid; 571 572 mp = mp0; 573 if (paddr) 574 *paddr = 0; 575 if (controlp) 576 *controlp = 0; 577 if (flagsp) 578 flags = *flagsp &~ MSG_EOR; 579 else 580 flags = 0; 581 if (flags & MSG_OOB) { 582 m = m_get(M_WAIT, MT_DATA); 583 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 584 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 585 (struct proc *)0); 586 if (error) 587 goto bad; 588 do { 589 error = uiomove(mtod(m, caddr_t), 590 (int) min(uio->uio_resid, m->m_len), uio); 591 m = m_free(m); 592 } while (uio->uio_resid && error == 0 && m); 593 bad: 594 if (m) 595 m_freem(m); 596 return (error); 597 } 598 if (mp) 599 *mp = (struct mbuf *)0; 600 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 601 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 602 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 603 604 restart: 605 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 606 return (error); 607 s = splsoftnet(); 608 609 m = so->so_rcv.sb_mb; 610 /* 611 * If we have less data than requested, block awaiting more 612 * (subject to any timeout) if: 613 * 1. the current count is less than the low water mark, 614 * 2. MSG_WAITALL is set, and it is possible to do the entire 615 * receive operation at once if we block (resid <= hiwat), or 616 * 3. MSG_DONTWAIT is not set. 617 * If MSG_WAITALL is set but resid is larger than the receive buffer, 618 * we have to do the receive in sections, and thus risk returning 619 * a short count if a timeout or signal occurs after we start. 620 */ 621 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 622 so->so_rcv.sb_cc < uio->uio_resid) && 623 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 624 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 625 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 626 #ifdef DIAGNOSTIC 627 if (m == 0 && so->so_rcv.sb_cc) 628 panic("receive 1"); 629 #endif 630 if (so->so_error) { 631 if (m) 632 goto dontblock; 633 error = so->so_error; 634 if ((flags & MSG_PEEK) == 0) 635 so->so_error = 0; 636 goto release; 637 } 638 if (so->so_state & SS_CANTRCVMORE) { 639 if (m) 640 goto dontblock; 641 else 642 goto release; 643 } 644 for (; m; m = m->m_next) 645 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 646 m = so->so_rcv.sb_mb; 647 goto dontblock; 648 } 649 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 650 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 651 error = ENOTCONN; 652 goto release; 653 } 654 if (uio->uio_resid == 0) 655 goto release; 656 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 657 error = EWOULDBLOCK; 658 goto release; 659 } 660 sbunlock(&so->so_rcv); 661 error = sbwait(&so->so_rcv); 662 splx(s); 663 if (error) 664 return (error); 665 goto restart; 666 } 667 dontblock: 668 #ifdef notyet /* XXXX */ 669 if (uio->uio_procp) 670 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 671 #endif 672 nextrecord = m->m_nextpkt; 673 if (pr->pr_flags & PR_ADDR) { 674 #ifdef DIAGNOSTIC 675 if (m->m_type != MT_SONAME) 676 panic("receive 1a"); 677 #endif 678 orig_resid = 0; 679 if (flags & MSG_PEEK) { 680 if (paddr) 681 *paddr = m_copy(m, 0, m->m_len); 682 m = m->m_next; 683 } else { 684 sbfree(&so->so_rcv, m); 685 if (paddr) { 686 *paddr = m; 687 so->so_rcv.sb_mb = m->m_next; 688 m->m_next = 0; 689 m = so->so_rcv.sb_mb; 690 } else { 691 MFREE(m, so->so_rcv.sb_mb); 692 m = so->so_rcv.sb_mb; 693 } 694 } 695 } 696 while (m && m->m_type == MT_CONTROL && error == 0) { 697 if (flags & MSG_PEEK) { 698 if (controlp) 699 *controlp = m_copy(m, 0, m->m_len); 700 m = m->m_next; 701 } else { 702 sbfree(&so->so_rcv, m); 703 if (controlp) { 704 if (pr->pr_domain->dom_externalize && 705 mtod(m, struct cmsghdr *)->cmsg_type == 706 SCM_RIGHTS) 707 error = (*pr->pr_domain->dom_externalize)(m); 708 *controlp = m; 709 so->so_rcv.sb_mb = m->m_next; 710 m->m_next = 0; 711 m = so->so_rcv.sb_mb; 712 } else { 713 MFREE(m, so->so_rcv.sb_mb); 714 m = so->so_rcv.sb_mb; 715 } 716 } 717 if (controlp) { 718 orig_resid = 0; 719 controlp = &(*controlp)->m_next; 720 } 721 } 722 if (m) { 723 if ((flags & MSG_PEEK) == 0) 724 m->m_nextpkt = nextrecord; 725 type = m->m_type; 726 if (type == MT_OOBDATA) 727 flags |= MSG_OOB; 728 } 729 moff = 0; 730 offset = 0; 731 while (m && uio->uio_resid > 0 && error == 0) { 732 if (m->m_type == MT_OOBDATA) { 733 if (type != MT_OOBDATA) 734 break; 735 } else if (type == MT_OOBDATA) 736 break; 737 #ifdef DIAGNOSTIC 738 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 739 panic("receive 3"); 740 #endif 741 so->so_state &= ~SS_RCVATMARK; 742 len = uio->uio_resid; 743 if (so->so_oobmark && len > so->so_oobmark - offset) 744 len = so->so_oobmark - offset; 745 if (len > m->m_len - moff) 746 len = m->m_len - moff; 747 /* 748 * If mp is set, just pass back the mbufs. 749 * Otherwise copy them out via the uio, then free. 750 * Sockbuf must be consistent here (points to current mbuf, 751 * it points to next record) when we drop priority; 752 * we must note any additions to the sockbuf when we 753 * block interrupts again. 754 */ 755 if (mp == 0) { 756 splx(s); 757 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 758 s = splsoftnet(); 759 } else 760 uio->uio_resid -= len; 761 if (len == m->m_len - moff) { 762 if (m->m_flags & M_EOR) 763 flags |= MSG_EOR; 764 if (flags & MSG_PEEK) { 765 m = m->m_next; 766 moff = 0; 767 } else { 768 nextrecord = m->m_nextpkt; 769 sbfree(&so->so_rcv, m); 770 if (mp) { 771 *mp = m; 772 mp = &m->m_next; 773 so->so_rcv.sb_mb = m = m->m_next; 774 *mp = (struct mbuf *)0; 775 } else { 776 MFREE(m, so->so_rcv.sb_mb); 777 m = so->so_rcv.sb_mb; 778 } 779 if (m) 780 m->m_nextpkt = nextrecord; 781 } 782 } else { 783 if (flags & MSG_PEEK) 784 moff += len; 785 else { 786 if (mp) 787 *mp = m_copym(m, 0, len, M_WAIT); 788 m->m_data += len; 789 m->m_len -= len; 790 so->so_rcv.sb_cc -= len; 791 } 792 } 793 if (so->so_oobmark) { 794 if ((flags & MSG_PEEK) == 0) { 795 so->so_oobmark -= len; 796 if (so->so_oobmark == 0) { 797 so->so_state |= SS_RCVATMARK; 798 break; 799 } 800 } else { 801 offset += len; 802 if (offset == so->so_oobmark) 803 break; 804 } 805 } 806 if (flags & MSG_EOR) 807 break; 808 /* 809 * If the MSG_WAITALL flag is set (for non-atomic socket), 810 * we must not quit until "uio->uio_resid == 0" or an error 811 * termination. If a signal/timeout occurs, return 812 * with a short count but without error. 813 * Keep sockbuf locked against other readers. 814 */ 815 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 816 !sosendallatonce(so) && !nextrecord) { 817 if (so->so_error || so->so_state & SS_CANTRCVMORE) 818 break; 819 error = sbwait(&so->so_rcv); 820 if (error) { 821 sbunlock(&so->so_rcv); 822 splx(s); 823 return (0); 824 } 825 if ((m = so->so_rcv.sb_mb) != NULL) 826 nextrecord = m->m_nextpkt; 827 } 828 } 829 830 if (m && pr->pr_flags & PR_ATOMIC) { 831 flags |= MSG_TRUNC; 832 if ((flags & MSG_PEEK) == 0) 833 (void) sbdroprecord(&so->so_rcv); 834 } 835 if ((flags & MSG_PEEK) == 0) { 836 if (m == 0) 837 so->so_rcv.sb_mb = nextrecord; 838 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 839 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 840 (struct mbuf *)(long)flags, (struct mbuf *)0, 841 (struct proc *)0); 842 } 843 if (orig_resid == uio->uio_resid && orig_resid && 844 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 845 sbunlock(&so->so_rcv); 846 splx(s); 847 goto restart; 848 } 849 850 if (flagsp) 851 *flagsp |= flags; 852 release: 853 sbunlock(&so->so_rcv); 854 splx(s); 855 return (error); 856 } 857 858 int 859 soshutdown(so, how) 860 struct socket *so; 861 int how; 862 { 863 struct protosw *pr = so->so_proto; 864 865 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 866 return (EINVAL); 867 868 if (how == SHUT_RD || how == SHUT_RDWR) 869 sorflush(so); 870 if (how == SHUT_WR || how == SHUT_RDWR) 871 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 872 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 873 return (0); 874 } 875 876 void 877 sorflush(so) 878 struct socket *so; 879 { 880 struct sockbuf *sb = &so->so_rcv; 881 struct protosw *pr = so->so_proto; 882 int s; 883 struct sockbuf asb; 884 885 sb->sb_flags |= SB_NOINTR; 886 (void) sblock(sb, M_WAITOK); 887 s = splimp(); 888 socantrcvmore(so); 889 sbunlock(sb); 890 asb = *sb; 891 memset((caddr_t)sb, 0, sizeof(*sb)); 892 splx(s); 893 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 894 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 895 sbrelease(&asb); 896 } 897 898 int 899 sosetopt(so, level, optname, m0) 900 struct socket *so; 901 int level, optname; 902 struct mbuf *m0; 903 { 904 int error = 0; 905 struct mbuf *m = m0; 906 907 if (level != SOL_SOCKET) { 908 if (so->so_proto && so->so_proto->pr_ctloutput) 909 return ((*so->so_proto->pr_ctloutput) 910 (PRCO_SETOPT, so, level, optname, &m0)); 911 error = ENOPROTOOPT; 912 } else { 913 switch (optname) { 914 915 case SO_LINGER: 916 if (m == NULL || m->m_len != sizeof(struct linger)) { 917 error = EINVAL; 918 goto bad; 919 } 920 so->so_linger = mtod(m, struct linger *)->l_linger; 921 /* fall thru... */ 922 923 case SO_DEBUG: 924 case SO_KEEPALIVE: 925 case SO_DONTROUTE: 926 case SO_USELOOPBACK: 927 case SO_BROADCAST: 928 case SO_REUSEADDR: 929 case SO_REUSEPORT: 930 case SO_OOBINLINE: 931 case SO_TIMESTAMP: 932 if (m == NULL || m->m_len < sizeof(int)) { 933 error = EINVAL; 934 goto bad; 935 } 936 if (*mtod(m, int *)) 937 so->so_options |= optname; 938 else 939 so->so_options &= ~optname; 940 break; 941 942 case SO_SNDBUF: 943 case SO_RCVBUF: 944 case SO_SNDLOWAT: 945 case SO_RCVLOWAT: 946 { 947 int optval; 948 949 if (m == NULL || m->m_len < sizeof(int)) { 950 error = EINVAL; 951 goto bad; 952 } 953 954 /* 955 * Values < 1 make no sense for any of these 956 * options, so disallow them. 957 */ 958 optval = *mtod(m, int *); 959 if (optval < 1) { 960 error = EINVAL; 961 goto bad; 962 } 963 964 switch (optname) { 965 966 case SO_SNDBUF: 967 case SO_RCVBUF: 968 if (sbreserve(optname == SO_SNDBUF ? 969 &so->so_snd : &so->so_rcv, 970 (u_long) optval) == 0) { 971 error = ENOBUFS; 972 goto bad; 973 } 974 break; 975 976 /* 977 * Make sure the low-water is never greater than 978 * the high-water. 979 */ 980 case SO_SNDLOWAT: 981 so->so_snd.sb_lowat = 982 (optval > so->so_snd.sb_hiwat) ? 983 so->so_snd.sb_hiwat : optval; 984 break; 985 case SO_RCVLOWAT: 986 so->so_rcv.sb_lowat = 987 (optval > so->so_rcv.sb_hiwat) ? 988 so->so_rcv.sb_hiwat : optval; 989 break; 990 } 991 break; 992 } 993 994 case SO_SNDTIMEO: 995 case SO_RCVTIMEO: 996 { 997 struct timeval *tv; 998 short val; 999 1000 if (m == NULL || m->m_len < sizeof(*tv)) { 1001 error = EINVAL; 1002 goto bad; 1003 } 1004 tv = mtod(m, struct timeval *); 1005 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 1006 error = EDOM; 1007 goto bad; 1008 } 1009 val = tv->tv_sec * hz + tv->tv_usec / tick; 1010 1011 switch (optname) { 1012 1013 case SO_SNDTIMEO: 1014 so->so_snd.sb_timeo = val; 1015 break; 1016 case SO_RCVTIMEO: 1017 so->so_rcv.sb_timeo = val; 1018 break; 1019 } 1020 break; 1021 } 1022 1023 default: 1024 error = ENOPROTOOPT; 1025 break; 1026 } 1027 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1028 (void) ((*so->so_proto->pr_ctloutput) 1029 (PRCO_SETOPT, so, level, optname, &m0)); 1030 m = NULL; /* freed by protocol */ 1031 } 1032 } 1033 bad: 1034 if (m) 1035 (void) m_free(m); 1036 return (error); 1037 } 1038 1039 int 1040 sogetopt(so, level, optname, mp) 1041 struct socket *so; 1042 int level, optname; 1043 struct mbuf **mp; 1044 { 1045 struct mbuf *m; 1046 1047 if (level != SOL_SOCKET) { 1048 if (so->so_proto && so->so_proto->pr_ctloutput) { 1049 return ((*so->so_proto->pr_ctloutput) 1050 (PRCO_GETOPT, so, level, optname, mp)); 1051 } else 1052 return (ENOPROTOOPT); 1053 } else { 1054 m = m_get(M_WAIT, MT_SOOPTS); 1055 m->m_len = sizeof(int); 1056 1057 switch (optname) { 1058 1059 case SO_LINGER: 1060 m->m_len = sizeof(struct linger); 1061 mtod(m, struct linger *)->l_onoff = 1062 so->so_options & SO_LINGER; 1063 mtod(m, struct linger *)->l_linger = so->so_linger; 1064 break; 1065 1066 case SO_USELOOPBACK: 1067 case SO_DONTROUTE: 1068 case SO_DEBUG: 1069 case SO_KEEPALIVE: 1070 case SO_REUSEADDR: 1071 case SO_REUSEPORT: 1072 case SO_BROADCAST: 1073 case SO_OOBINLINE: 1074 case SO_TIMESTAMP: 1075 *mtod(m, int *) = so->so_options & optname; 1076 break; 1077 1078 case SO_TYPE: 1079 *mtod(m, int *) = so->so_type; 1080 break; 1081 1082 case SO_ERROR: 1083 *mtod(m, int *) = so->so_error; 1084 so->so_error = 0; 1085 break; 1086 1087 case SO_SNDBUF: 1088 *mtod(m, int *) = so->so_snd.sb_hiwat; 1089 break; 1090 1091 case SO_RCVBUF: 1092 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1093 break; 1094 1095 case SO_SNDLOWAT: 1096 *mtod(m, int *) = so->so_snd.sb_lowat; 1097 break; 1098 1099 case SO_RCVLOWAT: 1100 *mtod(m, int *) = so->so_rcv.sb_lowat; 1101 break; 1102 1103 case SO_SNDTIMEO: 1104 case SO_RCVTIMEO: 1105 { 1106 int val = (optname == SO_SNDTIMEO ? 1107 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1108 1109 m->m_len = sizeof(struct timeval); 1110 mtod(m, struct timeval *)->tv_sec = val / hz; 1111 mtod(m, struct timeval *)->tv_usec = 1112 (val % hz) * tick; 1113 break; 1114 } 1115 1116 default: 1117 (void)m_free(m); 1118 return (ENOPROTOOPT); 1119 } 1120 *mp = m; 1121 return (0); 1122 } 1123 } 1124 1125 void 1126 sohasoutofband(so) 1127 struct socket *so; 1128 { 1129 struct proc *p; 1130 1131 if (so->so_pgid < 0) 1132 gsignal(-so->so_pgid, SIGURG); 1133 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1134 psignal(p, SIGURG); 1135 selwakeup(&so->so_rcv.sb_sel); 1136 } 1137