1 /* $NetBSD: uipc_socket.c,v 1.48 1999/06/08 02:39:57 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include "opt_compat_sunos.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/domain.h> 47 #include <sys/kernel.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 struct pool socket_pool; 56 57 void 58 soinit() 59 { 60 61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 62 "sockpl", 0, NULL, NULL, M_SOCKET); 63 } 64 65 /* 66 * Socket operation routines. 67 * These routines are called by the routines in 68 * sys_socket.c or from a system process, and 69 * implement the semantics of socket operations by 70 * switching out to the protocol specific routines. 71 */ 72 /*ARGSUSED*/ 73 int 74 socreate(dom, aso, type, proto) 75 int dom; 76 struct socket **aso; 77 register int type; 78 int proto; 79 { 80 struct proc *p = curproc; /* XXX */ 81 register struct protosw *prp; 82 register struct socket *so; 83 register int error; 84 int s; 85 86 if (proto) 87 prp = pffindproto(dom, proto, type); 88 else 89 prp = pffindtype(dom, type); 90 if (prp == 0 || prp->pr_usrreq == 0) 91 return (EPROTONOSUPPORT); 92 if (prp->pr_type != type) 93 return (EPROTOTYPE); 94 s = splsoftnet(); 95 so = pool_get(&socket_pool, PR_WAITOK); 96 memset((caddr_t)so, 0, sizeof(*so)); 97 TAILQ_INIT(&so->so_q0); 98 TAILQ_INIT(&so->so_q); 99 so->so_type = type; 100 so->so_proto = prp; 101 so->so_send = sosend; 102 so->so_receive = soreceive; 103 if (p != 0) 104 so->so_uid = p->p_ucred->cr_uid; 105 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 106 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 107 if (error) { 108 so->so_state |= SS_NOFDREF; 109 sofree(so); 110 splx(s); 111 return (error); 112 } 113 #ifdef COMPAT_SUNOS 114 { 115 extern struct emul emul_sunos; 116 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 117 so->so_options |= SO_BROADCAST; 118 } 119 #endif 120 splx(s); 121 *aso = so; 122 return (0); 123 } 124 125 int 126 sobind(so, nam) 127 struct socket *so; 128 struct mbuf *nam; 129 { 130 struct proc *p = curproc; /* XXX */ 131 int s = splsoftnet(); 132 int error; 133 134 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 135 nam, (struct mbuf *)0, p); 136 splx(s); 137 return (error); 138 } 139 140 int 141 solisten(so, backlog) 142 register struct socket *so; 143 int backlog; 144 { 145 int s = splsoftnet(), error; 146 147 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 148 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 149 if (error) { 150 splx(s); 151 return (error); 152 } 153 if (so->so_q.tqh_first == NULL) 154 so->so_options |= SO_ACCEPTCONN; 155 if (backlog < 0) 156 backlog = 0; 157 so->so_qlimit = min(backlog, SOMAXCONN); 158 splx(s); 159 return (0); 160 } 161 162 void 163 sofree(so) 164 register struct socket *so; 165 { 166 167 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 168 return; 169 if (so->so_head) { 170 /* 171 * We must not decommission a socket that's on the accept(2) 172 * queue. If we do, then accept(2) may hang after select(2) 173 * indicated that the listening socket was ready. 174 */ 175 if (!soqremque(so, 0)) 176 return; 177 } 178 sbrelease(&so->so_snd); 179 sorflush(so); 180 pool_put(&socket_pool, so); 181 } 182 183 /* 184 * Close a socket on last file table reference removal. 185 * Initiate disconnect if connected. 186 * Free socket when disconnect complete. 187 */ 188 int 189 soclose(so) 190 register struct socket *so; 191 { 192 struct socket *so2; 193 int s = splsoftnet(); /* conservative */ 194 int error = 0; 195 196 if (so->so_options & SO_ACCEPTCONN) { 197 while ((so2 = so->so_q0.tqh_first) != 0) { 198 (void) soqremque(so2, 0); 199 (void) soabort(so2); 200 } 201 while ((so2 = so->so_q.tqh_first) != 0) { 202 (void) soqremque(so2, 1); 203 (void) soabort(so2); 204 } 205 } 206 if (so->so_pcb == 0) 207 goto discard; 208 if (so->so_state & SS_ISCONNECTED) { 209 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 210 error = sodisconnect(so); 211 if (error) 212 goto drop; 213 } 214 if (so->so_options & SO_LINGER) { 215 if ((so->so_state & SS_ISDISCONNECTING) && 216 (so->so_state & SS_NBIO)) 217 goto drop; 218 while (so->so_state & SS_ISCONNECTED) { 219 error = tsleep((caddr_t)&so->so_timeo, 220 PSOCK | PCATCH, netcls, 221 so->so_linger * hz); 222 if (error) 223 break; 224 } 225 } 226 } 227 drop: 228 if (so->so_pcb) { 229 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 230 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 231 (struct proc *)0); 232 if (error == 0) 233 error = error2; 234 } 235 discard: 236 if (so->so_state & SS_NOFDREF) 237 panic("soclose: NOFDREF"); 238 so->so_state |= SS_NOFDREF; 239 sofree(so); 240 splx(s); 241 return (error); 242 } 243 244 /* 245 * Must be called at splsoftnet... 246 */ 247 int 248 soabort(so) 249 struct socket *so; 250 { 251 252 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 253 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 254 } 255 256 int 257 soaccept(so, nam) 258 register struct socket *so; 259 struct mbuf *nam; 260 { 261 int s = splsoftnet(); 262 int error; 263 264 if ((so->so_state & SS_NOFDREF) == 0) 265 panic("soaccept: !NOFDREF"); 266 so->so_state &= ~SS_NOFDREF; 267 if ((so->so_state & SS_ISDISCONNECTED) == 0) 268 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 269 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 270 else 271 error = 0; 272 splx(s); 273 return (error); 274 } 275 276 int 277 soconnect(so, nam) 278 register struct socket *so; 279 struct mbuf *nam; 280 { 281 struct proc *p = curproc; /* XXX */ 282 int s; 283 int error; 284 285 if (so->so_options & SO_ACCEPTCONN) 286 return (EOPNOTSUPP); 287 s = splsoftnet(); 288 /* 289 * If protocol is connection-based, can only connect once. 290 * Otherwise, if connected, try to disconnect first. 291 * This allows user to disconnect by connecting to, e.g., 292 * a null address. 293 */ 294 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 295 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 296 (error = sodisconnect(so)))) 297 error = EISCONN; 298 else 299 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 300 (struct mbuf *)0, nam, (struct mbuf *)0, p); 301 splx(s); 302 return (error); 303 } 304 305 int 306 soconnect2(so1, so2) 307 register struct socket *so1; 308 struct socket *so2; 309 { 310 int s = splsoftnet(); 311 int error; 312 313 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 314 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 315 (struct proc *)0); 316 splx(s); 317 return (error); 318 } 319 320 int 321 sodisconnect(so) 322 register struct socket *so; 323 { 324 int s = splsoftnet(); 325 int error; 326 327 if ((so->so_state & SS_ISCONNECTED) == 0) { 328 error = ENOTCONN; 329 goto bad; 330 } 331 if (so->so_state & SS_ISDISCONNECTING) { 332 error = EALREADY; 333 goto bad; 334 } 335 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 336 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 337 (struct proc *)0); 338 bad: 339 splx(s); 340 return (error); 341 } 342 343 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 344 /* 345 * Send on a socket. 346 * If send must go all at once and message is larger than 347 * send buffering, then hard error. 348 * Lock against other senders. 349 * If must go all at once and not enough room now, then 350 * inform user that this would block and do nothing. 351 * Otherwise, if nonblocking, send as much as possible. 352 * The data to be sent is described by "uio" if nonzero, 353 * otherwise by the mbuf chain "top" (which must be null 354 * if uio is not). Data provided in mbuf chain must be small 355 * enough to send all at once. 356 * 357 * Returns nonzero on error, timeout or signal; callers 358 * must check for short counts if EINTR/ERESTART are returned. 359 * Data and control buffers are freed on return. 360 */ 361 int 362 sosend(so, addr, uio, top, control, flags) 363 register struct socket *so; 364 struct mbuf *addr; 365 struct uio *uio; 366 struct mbuf *top; 367 struct mbuf *control; 368 int flags; 369 { 370 struct proc *p = curproc; /* XXX */ 371 struct mbuf **mp; 372 register struct mbuf *m; 373 register long space, len, resid; 374 int clen = 0, error, s, dontroute, mlen; 375 int atomic = sosendallatonce(so) || top; 376 377 if (uio) 378 resid = uio->uio_resid; 379 else 380 resid = top->m_pkthdr.len; 381 /* 382 * In theory resid should be unsigned. 383 * However, space must be signed, as it might be less than 0 384 * if we over-committed, and we must use a signed comparison 385 * of space and resid. On the other hand, a negative resid 386 * causes us to loop sending 0-length segments to the protocol. 387 */ 388 if (resid < 0) { 389 error = EINVAL; 390 goto out; 391 } 392 dontroute = 393 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 394 (so->so_proto->pr_flags & PR_ATOMIC); 395 p->p_stats->p_ru.ru_msgsnd++; 396 if (control) 397 clen = control->m_len; 398 #define snderr(errno) { error = errno; splx(s); goto release; } 399 400 restart: 401 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 402 goto out; 403 do { 404 s = splsoftnet(); 405 if (so->so_state & SS_CANTSENDMORE) 406 snderr(EPIPE); 407 if (so->so_error) { 408 error = so->so_error; 409 so->so_error = 0; 410 splx(s); 411 goto release; 412 } 413 if ((so->so_state & SS_ISCONNECTED) == 0) { 414 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 415 if ((so->so_state & SS_ISCONFIRMING) == 0 && 416 !(resid == 0 && clen != 0)) 417 snderr(ENOTCONN); 418 } else if (addr == 0) 419 snderr(EDESTADDRREQ); 420 } 421 space = sbspace(&so->so_snd); 422 if (flags & MSG_OOB) 423 space += 1024; 424 if ((atomic && resid > so->so_snd.sb_hiwat) || 425 clen > so->so_snd.sb_hiwat) 426 snderr(EMSGSIZE); 427 if (space < resid + clen && uio && 428 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 429 if (so->so_state & SS_NBIO) 430 snderr(EWOULDBLOCK); 431 sbunlock(&so->so_snd); 432 error = sbwait(&so->so_snd); 433 splx(s); 434 if (error) 435 goto out; 436 goto restart; 437 } 438 splx(s); 439 mp = ⊤ 440 space -= clen; 441 do { 442 if (uio == NULL) { 443 /* 444 * Data is prepackaged in "top". 445 */ 446 resid = 0; 447 if (flags & MSG_EOR) 448 top->m_flags |= M_EOR; 449 } else do { 450 if (top == 0) { 451 MGETHDR(m, M_WAIT, MT_DATA); 452 mlen = MHLEN; 453 m->m_pkthdr.len = 0; 454 m->m_pkthdr.rcvif = (struct ifnet *)0; 455 } else { 456 MGET(m, M_WAIT, MT_DATA); 457 mlen = MLEN; 458 } 459 if (resid >= MINCLSIZE && space >= MCLBYTES) { 460 MCLGET(m, M_WAIT); 461 if ((m->m_flags & M_EXT) == 0) 462 goto nopages; 463 mlen = MCLBYTES; 464 #ifdef MAPPED_MBUFS 465 len = min(MCLBYTES, resid); 466 #else 467 if (atomic && top == 0) { 468 len = min(MCLBYTES - max_hdr, resid); 469 m->m_data += max_hdr; 470 } else 471 len = min(MCLBYTES, resid); 472 #endif 473 space -= len; 474 } else { 475 nopages: 476 len = min(min(mlen, resid), space); 477 space -= len; 478 /* 479 * For datagram protocols, leave room 480 * for protocol headers in first mbuf. 481 */ 482 if (atomic && top == 0 && len < mlen) 483 MH_ALIGN(m, len); 484 } 485 error = uiomove(mtod(m, caddr_t), (int)len, uio); 486 resid = uio->uio_resid; 487 m->m_len = len; 488 *mp = m; 489 top->m_pkthdr.len += len; 490 if (error) 491 goto release; 492 mp = &m->m_next; 493 if (resid <= 0) { 494 if (flags & MSG_EOR) 495 top->m_flags |= M_EOR; 496 break; 497 } 498 } while (space > 0 && atomic); 499 500 s = splsoftnet(); 501 502 if (so->so_state & SS_CANTSENDMORE) 503 snderr(EPIPE); 504 505 if (dontroute) 506 so->so_options |= SO_DONTROUTE; 507 if (resid > 0) 508 so->so_state |= SS_MORETOCOME; 509 error = (*so->so_proto->pr_usrreq)(so, 510 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 511 top, addr, control, p); 512 if (dontroute) 513 so->so_options &= ~SO_DONTROUTE; 514 if (resid > 0) 515 so->so_state &= ~SS_MORETOCOME; 516 splx(s); 517 518 clen = 0; 519 control = 0; 520 top = 0; 521 mp = ⊤ 522 if (error) 523 goto release; 524 } while (resid && space > 0); 525 } while (resid); 526 527 release: 528 sbunlock(&so->so_snd); 529 out: 530 if (top) 531 m_freem(top); 532 if (control) 533 m_freem(control); 534 return (error); 535 } 536 537 /* 538 * Implement receive operations on a socket. 539 * We depend on the way that records are added to the sockbuf 540 * by sbappend*. In particular, each record (mbufs linked through m_next) 541 * must begin with an address if the protocol so specifies, 542 * followed by an optional mbuf or mbufs containing ancillary data, 543 * and then zero or more mbufs of data. 544 * In order to avoid blocking network interrupts for the entire time here, 545 * we splx() while doing the actual copy to user space. 546 * Although the sockbuf is locked, new data may still be appended, 547 * and thus we must maintain consistency of the sockbuf during that time. 548 * 549 * The caller may receive the data as a single mbuf chain by supplying 550 * an mbuf **mp0 for use in returning the chain. The uio is then used 551 * only for the count in uio_resid. 552 */ 553 int 554 soreceive(so, paddr, uio, mp0, controlp, flagsp) 555 register struct socket *so; 556 struct mbuf **paddr; 557 struct uio *uio; 558 struct mbuf **mp0; 559 struct mbuf **controlp; 560 int *flagsp; 561 { 562 register struct mbuf *m, **mp; 563 register int flags, len, error, s, offset; 564 struct protosw *pr = so->so_proto; 565 struct mbuf *nextrecord; 566 int moff, type = 0; 567 int orig_resid = uio->uio_resid; 568 569 mp = mp0; 570 if (paddr) 571 *paddr = 0; 572 if (controlp) 573 *controlp = 0; 574 if (flagsp) 575 flags = *flagsp &~ MSG_EOR; 576 else 577 flags = 0; 578 if (flags & MSG_OOB) { 579 m = m_get(M_WAIT, MT_DATA); 580 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 581 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 582 (struct proc *)0); 583 if (error) 584 goto bad; 585 do { 586 error = uiomove(mtod(m, caddr_t), 587 (int) min(uio->uio_resid, m->m_len), uio); 588 m = m_free(m); 589 } while (uio->uio_resid && error == 0 && m); 590 bad: 591 if (m) 592 m_freem(m); 593 return (error); 594 } 595 if (mp) 596 *mp = (struct mbuf *)0; 597 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 598 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 599 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 600 601 restart: 602 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 603 return (error); 604 s = splsoftnet(); 605 606 m = so->so_rcv.sb_mb; 607 /* 608 * If we have less data than requested, block awaiting more 609 * (subject to any timeout) if: 610 * 1. the current count is less than the low water mark, 611 * 2. MSG_WAITALL is set, and it is possible to do the entire 612 * receive operation at once if we block (resid <= hiwat), or 613 * 3. MSG_DONTWAIT is not set. 614 * If MSG_WAITALL is set but resid is larger than the receive buffer, 615 * we have to do the receive in sections, and thus risk returning 616 * a short count if a timeout or signal occurs after we start. 617 */ 618 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 619 so->so_rcv.sb_cc < uio->uio_resid) && 620 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 621 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 622 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 623 #ifdef DIAGNOSTIC 624 if (m == 0 && so->so_rcv.sb_cc) 625 panic("receive 1"); 626 #endif 627 if (so->so_error) { 628 if (m) 629 goto dontblock; 630 error = so->so_error; 631 if ((flags & MSG_PEEK) == 0) 632 so->so_error = 0; 633 goto release; 634 } 635 if (so->so_state & SS_CANTRCVMORE) { 636 if (m) 637 goto dontblock; 638 else 639 goto release; 640 } 641 for (; m; m = m->m_next) 642 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 643 m = so->so_rcv.sb_mb; 644 goto dontblock; 645 } 646 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 647 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 648 error = ENOTCONN; 649 goto release; 650 } 651 if (uio->uio_resid == 0) 652 goto release; 653 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 654 error = EWOULDBLOCK; 655 goto release; 656 } 657 sbunlock(&so->so_rcv); 658 error = sbwait(&so->so_rcv); 659 splx(s); 660 if (error) 661 return (error); 662 goto restart; 663 } 664 dontblock: 665 #ifdef notyet /* XXXX */ 666 if (uio->uio_procp) 667 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 668 #endif 669 nextrecord = m->m_nextpkt; 670 if (pr->pr_flags & PR_ADDR) { 671 #ifdef DIAGNOSTIC 672 if (m->m_type != MT_SONAME) 673 panic("receive 1a"); 674 #endif 675 orig_resid = 0; 676 if (flags & MSG_PEEK) { 677 if (paddr) 678 *paddr = m_copy(m, 0, m->m_len); 679 m = m->m_next; 680 } else { 681 sbfree(&so->so_rcv, m); 682 if (paddr) { 683 *paddr = m; 684 so->so_rcv.sb_mb = m->m_next; 685 m->m_next = 0; 686 m = so->so_rcv.sb_mb; 687 } else { 688 MFREE(m, so->so_rcv.sb_mb); 689 m = so->so_rcv.sb_mb; 690 } 691 } 692 } 693 while (m && m->m_type == MT_CONTROL && error == 0) { 694 if (flags & MSG_PEEK) { 695 if (controlp) 696 *controlp = m_copy(m, 0, m->m_len); 697 m = m->m_next; 698 } else { 699 sbfree(&so->so_rcv, m); 700 if (controlp) { 701 if (pr->pr_domain->dom_externalize && 702 mtod(m, struct cmsghdr *)->cmsg_type == 703 SCM_RIGHTS) 704 error = (*pr->pr_domain->dom_externalize)(m); 705 *controlp = m; 706 so->so_rcv.sb_mb = m->m_next; 707 m->m_next = 0; 708 m = so->so_rcv.sb_mb; 709 } else { 710 MFREE(m, so->so_rcv.sb_mb); 711 m = so->so_rcv.sb_mb; 712 } 713 } 714 if (controlp) { 715 orig_resid = 0; 716 controlp = &(*controlp)->m_next; 717 } 718 } 719 if (m) { 720 if ((flags & MSG_PEEK) == 0) 721 m->m_nextpkt = nextrecord; 722 type = m->m_type; 723 if (type == MT_OOBDATA) 724 flags |= MSG_OOB; 725 } 726 moff = 0; 727 offset = 0; 728 while (m && uio->uio_resid > 0 && error == 0) { 729 if (m->m_type == MT_OOBDATA) { 730 if (type != MT_OOBDATA) 731 break; 732 } else if (type == MT_OOBDATA) 733 break; 734 #ifdef DIAGNOSTIC 735 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 736 panic("receive 3"); 737 #endif 738 so->so_state &= ~SS_RCVATMARK; 739 len = uio->uio_resid; 740 if (so->so_oobmark && len > so->so_oobmark - offset) 741 len = so->so_oobmark - offset; 742 if (len > m->m_len - moff) 743 len = m->m_len - moff; 744 /* 745 * If mp is set, just pass back the mbufs. 746 * Otherwise copy them out via the uio, then free. 747 * Sockbuf must be consistent here (points to current mbuf, 748 * it points to next record) when we drop priority; 749 * we must note any additions to the sockbuf when we 750 * block interrupts again. 751 */ 752 if (mp == 0) { 753 splx(s); 754 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 755 s = splsoftnet(); 756 } else 757 uio->uio_resid -= len; 758 if (len == m->m_len - moff) { 759 if (m->m_flags & M_EOR) 760 flags |= MSG_EOR; 761 if (flags & MSG_PEEK) { 762 m = m->m_next; 763 moff = 0; 764 } else { 765 nextrecord = m->m_nextpkt; 766 sbfree(&so->so_rcv, m); 767 if (mp) { 768 *mp = m; 769 mp = &m->m_next; 770 so->so_rcv.sb_mb = m = m->m_next; 771 *mp = (struct mbuf *)0; 772 } else { 773 MFREE(m, so->so_rcv.sb_mb); 774 m = so->so_rcv.sb_mb; 775 } 776 if (m) 777 m->m_nextpkt = nextrecord; 778 } 779 } else { 780 if (flags & MSG_PEEK) 781 moff += len; 782 else { 783 if (mp) 784 *mp = m_copym(m, 0, len, M_WAIT); 785 m->m_data += len; 786 m->m_len -= len; 787 so->so_rcv.sb_cc -= len; 788 } 789 } 790 if (so->so_oobmark) { 791 if ((flags & MSG_PEEK) == 0) { 792 so->so_oobmark -= len; 793 if (so->so_oobmark == 0) { 794 so->so_state |= SS_RCVATMARK; 795 break; 796 } 797 } else { 798 offset += len; 799 if (offset == so->so_oobmark) 800 break; 801 } 802 } 803 if (flags & MSG_EOR) 804 break; 805 /* 806 * If the MSG_WAITALL flag is set (for non-atomic socket), 807 * we must not quit until "uio->uio_resid == 0" or an error 808 * termination. If a signal/timeout occurs, return 809 * with a short count but without error. 810 * Keep sockbuf locked against other readers. 811 */ 812 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 813 !sosendallatonce(so) && !nextrecord) { 814 if (so->so_error || so->so_state & SS_CANTRCVMORE) 815 break; 816 error = sbwait(&so->so_rcv); 817 if (error) { 818 sbunlock(&so->so_rcv); 819 splx(s); 820 return (0); 821 } 822 if ((m = so->so_rcv.sb_mb) != NULL) 823 nextrecord = m->m_nextpkt; 824 } 825 } 826 827 if (m && pr->pr_flags & PR_ATOMIC) { 828 flags |= MSG_TRUNC; 829 if ((flags & MSG_PEEK) == 0) 830 (void) sbdroprecord(&so->so_rcv); 831 } 832 if ((flags & MSG_PEEK) == 0) { 833 if (m == 0) 834 so->so_rcv.sb_mb = nextrecord; 835 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 836 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 837 (struct mbuf *)(long)flags, (struct mbuf *)0, 838 (struct proc *)0); 839 } 840 if (orig_resid == uio->uio_resid && orig_resid && 841 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 842 sbunlock(&so->so_rcv); 843 splx(s); 844 goto restart; 845 } 846 847 if (flagsp) 848 *flagsp |= flags; 849 release: 850 sbunlock(&so->so_rcv); 851 splx(s); 852 return (error); 853 } 854 855 int 856 soshutdown(so, how) 857 struct socket *so; 858 int how; 859 { 860 struct protosw *pr = so->so_proto; 861 862 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 863 return (EINVAL); 864 865 if (how == SHUT_RD || how == SHUT_RDWR) 866 sorflush(so); 867 if (how == SHUT_WR || how == SHUT_RDWR) 868 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 869 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 870 return (0); 871 } 872 873 void 874 sorflush(so) 875 register struct socket *so; 876 { 877 register struct sockbuf *sb = &so->so_rcv; 878 register struct protosw *pr = so->so_proto; 879 register int s; 880 struct sockbuf asb; 881 882 sb->sb_flags |= SB_NOINTR; 883 (void) sblock(sb, M_WAITOK); 884 s = splimp(); 885 socantrcvmore(so); 886 sbunlock(sb); 887 asb = *sb; 888 memset((caddr_t)sb, 0, sizeof(*sb)); 889 splx(s); 890 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 891 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 892 sbrelease(&asb); 893 } 894 895 int 896 sosetopt(so, level, optname, m0) 897 register struct socket *so; 898 int level, optname; 899 struct mbuf *m0; 900 { 901 int error = 0; 902 register struct mbuf *m = m0; 903 904 if (level != SOL_SOCKET) { 905 if (so->so_proto && so->so_proto->pr_ctloutput) 906 return ((*so->so_proto->pr_ctloutput) 907 (PRCO_SETOPT, so, level, optname, &m0)); 908 error = ENOPROTOOPT; 909 } else { 910 switch (optname) { 911 912 case SO_LINGER: 913 if (m == NULL || m->m_len != sizeof(struct linger)) { 914 error = EINVAL; 915 goto bad; 916 } 917 so->so_linger = mtod(m, struct linger *)->l_linger; 918 /* fall thru... */ 919 920 case SO_DEBUG: 921 case SO_KEEPALIVE: 922 case SO_DONTROUTE: 923 case SO_USELOOPBACK: 924 case SO_BROADCAST: 925 case SO_REUSEADDR: 926 case SO_REUSEPORT: 927 case SO_OOBINLINE: 928 case SO_TIMESTAMP: 929 if (m == NULL || m->m_len < sizeof(int)) { 930 error = EINVAL; 931 goto bad; 932 } 933 if (*mtod(m, int *)) 934 so->so_options |= optname; 935 else 936 so->so_options &= ~optname; 937 break; 938 939 case SO_SNDBUF: 940 case SO_RCVBUF: 941 case SO_SNDLOWAT: 942 case SO_RCVLOWAT: 943 { 944 int optval; 945 946 if (m == NULL || m->m_len < sizeof(int)) { 947 error = EINVAL; 948 goto bad; 949 } 950 951 /* 952 * Values < 1 make no sense for any of these 953 * options, so disallow them. 954 */ 955 optval = *mtod(m, int *); 956 if (optval < 1) { 957 error = EINVAL; 958 goto bad; 959 } 960 961 switch (optname) { 962 963 case SO_SNDBUF: 964 case SO_RCVBUF: 965 if (sbreserve(optname == SO_SNDBUF ? 966 &so->so_snd : &so->so_rcv, 967 (u_long) optval) == 0) { 968 error = ENOBUFS; 969 goto bad; 970 } 971 break; 972 973 /* 974 * Make sure the low-water is never greater than 975 * the high-water. 976 */ 977 case SO_SNDLOWAT: 978 so->so_snd.sb_lowat = 979 (optval > so->so_snd.sb_hiwat) ? 980 so->so_snd.sb_hiwat : optval; 981 break; 982 case SO_RCVLOWAT: 983 so->so_rcv.sb_lowat = 984 (optval > so->so_rcv.sb_hiwat) ? 985 so->so_rcv.sb_hiwat : optval; 986 break; 987 } 988 break; 989 } 990 991 case SO_SNDTIMEO: 992 case SO_RCVTIMEO: 993 { 994 struct timeval *tv; 995 short val; 996 997 if (m == NULL || m->m_len < sizeof(*tv)) { 998 error = EINVAL; 999 goto bad; 1000 } 1001 tv = mtod(m, struct timeval *); 1002 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 1003 error = EDOM; 1004 goto bad; 1005 } 1006 val = tv->tv_sec * hz + tv->tv_usec / tick; 1007 1008 switch (optname) { 1009 1010 case SO_SNDTIMEO: 1011 so->so_snd.sb_timeo = val; 1012 break; 1013 case SO_RCVTIMEO: 1014 so->so_rcv.sb_timeo = val; 1015 break; 1016 } 1017 break; 1018 } 1019 1020 default: 1021 error = ENOPROTOOPT; 1022 break; 1023 } 1024 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1025 (void) ((*so->so_proto->pr_ctloutput) 1026 (PRCO_SETOPT, so, level, optname, &m0)); 1027 m = NULL; /* freed by protocol */ 1028 } 1029 } 1030 bad: 1031 if (m) 1032 (void) m_free(m); 1033 return (error); 1034 } 1035 1036 int 1037 sogetopt(so, level, optname, mp) 1038 register struct socket *so; 1039 int level, optname; 1040 struct mbuf **mp; 1041 { 1042 register struct mbuf *m; 1043 1044 if (level != SOL_SOCKET) { 1045 if (so->so_proto && so->so_proto->pr_ctloutput) { 1046 return ((*so->so_proto->pr_ctloutput) 1047 (PRCO_GETOPT, so, level, optname, mp)); 1048 } else 1049 return (ENOPROTOOPT); 1050 } else { 1051 m = m_get(M_WAIT, MT_SOOPTS); 1052 m->m_len = sizeof(int); 1053 1054 switch (optname) { 1055 1056 case SO_LINGER: 1057 m->m_len = sizeof(struct linger); 1058 mtod(m, struct linger *)->l_onoff = 1059 so->so_options & SO_LINGER; 1060 mtod(m, struct linger *)->l_linger = so->so_linger; 1061 break; 1062 1063 case SO_USELOOPBACK: 1064 case SO_DONTROUTE: 1065 case SO_DEBUG: 1066 case SO_KEEPALIVE: 1067 case SO_REUSEADDR: 1068 case SO_REUSEPORT: 1069 case SO_BROADCAST: 1070 case SO_OOBINLINE: 1071 case SO_TIMESTAMP: 1072 *mtod(m, int *) = so->so_options & optname; 1073 break; 1074 1075 case SO_TYPE: 1076 *mtod(m, int *) = so->so_type; 1077 break; 1078 1079 case SO_ERROR: 1080 *mtod(m, int *) = so->so_error; 1081 so->so_error = 0; 1082 break; 1083 1084 case SO_SNDBUF: 1085 *mtod(m, int *) = so->so_snd.sb_hiwat; 1086 break; 1087 1088 case SO_RCVBUF: 1089 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1090 break; 1091 1092 case SO_SNDLOWAT: 1093 *mtod(m, int *) = so->so_snd.sb_lowat; 1094 break; 1095 1096 case SO_RCVLOWAT: 1097 *mtod(m, int *) = so->so_rcv.sb_lowat; 1098 break; 1099 1100 case SO_SNDTIMEO: 1101 case SO_RCVTIMEO: 1102 { 1103 int val = (optname == SO_SNDTIMEO ? 1104 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1105 1106 m->m_len = sizeof(struct timeval); 1107 mtod(m, struct timeval *)->tv_sec = val / hz; 1108 mtod(m, struct timeval *)->tv_usec = 1109 (val % hz) * tick; 1110 break; 1111 } 1112 1113 default: 1114 (void)m_free(m); 1115 return (ENOPROTOOPT); 1116 } 1117 *mp = m; 1118 return (0); 1119 } 1120 } 1121 1122 void 1123 sohasoutofband(so) 1124 register struct socket *so; 1125 { 1126 struct proc *p; 1127 1128 if (so->so_pgid < 0) 1129 gsignal(-so->so_pgid, SIGURG); 1130 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1131 psignal(p, SIGURG); 1132 selwakeup(&so->so_rcv.sb_sel); 1133 } 1134