1 /* $NetBSD: uipc_socket.c,v 1.56 2001/04/13 23:30:10 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include "opt_compat_sunos.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/domain.h> 47 #include <sys/kernel.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 struct pool socket_pool; 56 57 extern int somaxconn; /* patchable (XXX sysctl) */ 58 int somaxconn = SOMAXCONN; 59 60 void 61 soinit(void) 62 { 63 64 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 65 "sockpl", 0, NULL, NULL, M_SOCKET); 66 } 67 68 /* 69 * Socket operation routines. 70 * These routines are called by the routines in 71 * sys_socket.c or from a system process, and 72 * implement the semantics of socket operations by 73 * switching out to the protocol specific routines. 74 */ 75 /*ARGSUSED*/ 76 int 77 socreate(int dom, struct socket **aso, int type, int proto) 78 { 79 struct proc *p; 80 struct protosw *prp; 81 struct socket *so; 82 int error, s; 83 84 p = curproc; /* XXX */ 85 if (proto) 86 prp = pffindproto(dom, proto, type); 87 else 88 prp = pffindtype(dom, type); 89 if (prp == 0 || prp->pr_usrreq == 0) 90 return (EPROTONOSUPPORT); 91 if (prp->pr_type != type) 92 return (EPROTOTYPE); 93 s = splsoftnet(); 94 so = pool_get(&socket_pool, PR_WAITOK); 95 memset((caddr_t)so, 0, sizeof(*so)); 96 TAILQ_INIT(&so->so_q0); 97 TAILQ_INIT(&so->so_q); 98 so->so_type = type; 99 so->so_proto = prp; 100 so->so_send = sosend; 101 so->so_receive = soreceive; 102 if (p != 0) 103 so->so_uid = p->p_ucred->cr_uid; 104 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 105 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 106 if (error) { 107 so->so_state |= SS_NOFDREF; 108 sofree(so); 109 splx(s); 110 return (error); 111 } 112 #ifdef COMPAT_SUNOS 113 { 114 extern struct emul emul_sunos; 115 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 116 so->so_options |= SO_BROADCAST; 117 } 118 #endif 119 splx(s); 120 *aso = so; 121 return (0); 122 } 123 124 int 125 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 126 { 127 int s, error; 128 129 s = splsoftnet(); 130 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 131 nam, (struct mbuf *)0, p); 132 splx(s); 133 return (error); 134 } 135 136 int 137 solisten(struct socket *so, int backlog) 138 { 139 int s, error; 140 141 s = splsoftnet(); 142 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 143 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 144 if (error) { 145 splx(s); 146 return (error); 147 } 148 if (so->so_q.tqh_first == NULL) 149 so->so_options |= SO_ACCEPTCONN; 150 if (backlog < 0) 151 backlog = 0; 152 so->so_qlimit = min(backlog, somaxconn); 153 splx(s); 154 return (0); 155 } 156 157 void 158 sofree(struct socket *so) 159 { 160 161 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 162 return; 163 if (so->so_head) { 164 /* 165 * We must not decommission a socket that's on the accept(2) 166 * queue. If we do, then accept(2) may hang after select(2) 167 * indicated that the listening socket was ready. 168 */ 169 if (!soqremque(so, 0)) 170 return; 171 } 172 sbrelease(&so->so_snd); 173 sorflush(so); 174 pool_put(&socket_pool, so); 175 } 176 177 /* 178 * Close a socket on last file table reference removal. 179 * Initiate disconnect if connected. 180 * Free socket when disconnect complete. 181 */ 182 int 183 soclose(struct socket *so) 184 { 185 struct socket *so2; 186 int s, error; 187 188 error = 0; 189 s = splsoftnet(); /* conservative */ 190 if (so->so_options & SO_ACCEPTCONN) { 191 while ((so2 = so->so_q0.tqh_first) != 0) { 192 (void) soqremque(so2, 0); 193 (void) soabort(so2); 194 } 195 while ((so2 = so->so_q.tqh_first) != 0) { 196 (void) soqremque(so2, 1); 197 (void) soabort(so2); 198 } 199 } 200 if (so->so_pcb == 0) 201 goto discard; 202 if (so->so_state & SS_ISCONNECTED) { 203 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 204 error = sodisconnect(so); 205 if (error) 206 goto drop; 207 } 208 if (so->so_options & SO_LINGER) { 209 if ((so->so_state & SS_ISDISCONNECTING) && 210 (so->so_state & SS_NBIO)) 211 goto drop; 212 while (so->so_state & SS_ISCONNECTED) { 213 error = tsleep((caddr_t)&so->so_timeo, 214 PSOCK | PCATCH, netcls, 215 so->so_linger * hz); 216 if (error) 217 break; 218 } 219 } 220 } 221 drop: 222 if (so->so_pcb) { 223 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 224 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 225 (struct proc *)0); 226 if (error == 0) 227 error = error2; 228 } 229 discard: 230 if (so->so_state & SS_NOFDREF) 231 panic("soclose: NOFDREF"); 232 so->so_state |= SS_NOFDREF; 233 sofree(so); 234 splx(s); 235 return (error); 236 } 237 238 /* 239 * Must be called at splsoftnet... 240 */ 241 int 242 soabort(struct socket *so) 243 { 244 245 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 246 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 247 } 248 249 int 250 soaccept(struct socket *so, struct mbuf *nam) 251 { 252 int s, error; 253 254 error = 0; 255 s = splsoftnet(); 256 if ((so->so_state & SS_NOFDREF) == 0) 257 panic("soaccept: !NOFDREF"); 258 so->so_state &= ~SS_NOFDREF; 259 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 260 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 261 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 262 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 263 else 264 error = ECONNABORTED; 265 266 splx(s); 267 return (error); 268 } 269 270 int 271 soconnect(struct socket *so, struct mbuf *nam) 272 { 273 struct proc *p; 274 int s, error; 275 276 p = curproc; /* XXX */ 277 if (so->so_options & SO_ACCEPTCONN) 278 return (EOPNOTSUPP); 279 s = splsoftnet(); 280 /* 281 * If protocol is connection-based, can only connect once. 282 * Otherwise, if connected, try to disconnect first. 283 * This allows user to disconnect by connecting to, e.g., 284 * a null address. 285 */ 286 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 287 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 288 (error = sodisconnect(so)))) 289 error = EISCONN; 290 else 291 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 292 (struct mbuf *)0, nam, (struct mbuf *)0, p); 293 splx(s); 294 return (error); 295 } 296 297 int 298 soconnect2(struct socket *so1, struct socket *so2) 299 { 300 int s, error; 301 302 s = splsoftnet(); 303 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 304 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 305 (struct proc *)0); 306 splx(s); 307 return (error); 308 } 309 310 int 311 sodisconnect(struct socket *so) 312 { 313 int s, error; 314 315 s = splsoftnet(); 316 if ((so->so_state & SS_ISCONNECTED) == 0) { 317 error = ENOTCONN; 318 goto bad; 319 } 320 if (so->so_state & SS_ISDISCONNECTING) { 321 error = EALREADY; 322 goto bad; 323 } 324 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 325 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 326 (struct proc *)0); 327 bad: 328 splx(s); 329 return (error); 330 } 331 332 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 333 /* 334 * Send on a socket. 335 * If send must go all at once and message is larger than 336 * send buffering, then hard error. 337 * Lock against other senders. 338 * If must go all at once and not enough room now, then 339 * inform user that this would block and do nothing. 340 * Otherwise, if nonblocking, send as much as possible. 341 * The data to be sent is described by "uio" if nonzero, 342 * otherwise by the mbuf chain "top" (which must be null 343 * if uio is not). Data provided in mbuf chain must be small 344 * enough to send all at once. 345 * 346 * Returns nonzero on error, timeout or signal; callers 347 * must check for short counts if EINTR/ERESTART are returned. 348 * Data and control buffers are freed on return. 349 */ 350 int 351 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 352 struct mbuf *control, int flags) 353 { 354 struct proc *p; 355 struct mbuf **mp, *m; 356 long space, len, resid; 357 int clen, error, s, dontroute, mlen, atomic; 358 359 p = curproc; /* XXX */ 360 clen = 0; 361 atomic = sosendallatonce(so) || top; 362 if (uio) 363 resid = uio->uio_resid; 364 else 365 resid = top->m_pkthdr.len; 366 /* 367 * In theory resid should be unsigned. 368 * However, space must be signed, as it might be less than 0 369 * if we over-committed, and we must use a signed comparison 370 * of space and resid. On the other hand, a negative resid 371 * causes us to loop sending 0-length segments to the protocol. 372 */ 373 if (resid < 0) { 374 error = EINVAL; 375 goto out; 376 } 377 dontroute = 378 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 379 (so->so_proto->pr_flags & PR_ATOMIC); 380 p->p_stats->p_ru.ru_msgsnd++; 381 if (control) 382 clen = control->m_len; 383 #define snderr(errno) { error = errno; splx(s); goto release; } 384 385 restart: 386 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 387 goto out; 388 do { 389 s = splsoftnet(); 390 if (so->so_state & SS_CANTSENDMORE) 391 snderr(EPIPE); 392 if (so->so_error) { 393 error = so->so_error; 394 so->so_error = 0; 395 splx(s); 396 goto release; 397 } 398 if ((so->so_state & SS_ISCONNECTED) == 0) { 399 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 400 if ((so->so_state & SS_ISCONFIRMING) == 0 && 401 !(resid == 0 && clen != 0)) 402 snderr(ENOTCONN); 403 } else if (addr == 0) 404 snderr(EDESTADDRREQ); 405 } 406 space = sbspace(&so->so_snd); 407 if (flags & MSG_OOB) 408 space += 1024; 409 if ((atomic && resid > so->so_snd.sb_hiwat) || 410 clen > so->so_snd.sb_hiwat) 411 snderr(EMSGSIZE); 412 if (space < resid + clen && uio && 413 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 414 if (so->so_state & SS_NBIO) 415 snderr(EWOULDBLOCK); 416 sbunlock(&so->so_snd); 417 error = sbwait(&so->so_snd); 418 splx(s); 419 if (error) 420 goto out; 421 goto restart; 422 } 423 splx(s); 424 mp = ⊤ 425 space -= clen; 426 do { 427 if (uio == NULL) { 428 /* 429 * Data is prepackaged in "top". 430 */ 431 resid = 0; 432 if (flags & MSG_EOR) 433 top->m_flags |= M_EOR; 434 } else do { 435 if (top == 0) { 436 MGETHDR(m, M_WAIT, MT_DATA); 437 mlen = MHLEN; 438 m->m_pkthdr.len = 0; 439 m->m_pkthdr.rcvif = (struct ifnet *)0; 440 } else { 441 MGET(m, M_WAIT, MT_DATA); 442 mlen = MLEN; 443 } 444 if (resid >= MINCLSIZE && space >= MCLBYTES) { 445 MCLGET(m, M_WAIT); 446 if ((m->m_flags & M_EXT) == 0) 447 goto nopages; 448 mlen = MCLBYTES; 449 #ifdef MAPPED_MBUFS 450 len = min(MCLBYTES, resid); 451 #else 452 if (atomic && top == 0) { 453 len = min(MCLBYTES - max_hdr, 454 resid); 455 m->m_data += max_hdr; 456 } else 457 len = min(MCLBYTES, resid); 458 #endif 459 space -= len; 460 } else { 461 nopages: 462 len = min(min(mlen, resid), space); 463 space -= len; 464 /* 465 * For datagram protocols, leave room 466 * for protocol headers in first mbuf. 467 */ 468 if (atomic && top == 0 && len < mlen) 469 MH_ALIGN(m, len); 470 } 471 error = uiomove(mtod(m, caddr_t), (int)len, 472 uio); 473 resid = uio->uio_resid; 474 m->m_len = len; 475 *mp = m; 476 top->m_pkthdr.len += len; 477 if (error) 478 goto release; 479 mp = &m->m_next; 480 if (resid <= 0) { 481 if (flags & MSG_EOR) 482 top->m_flags |= M_EOR; 483 break; 484 } 485 } while (space > 0 && atomic); 486 487 s = splsoftnet(); 488 489 if (so->so_state & SS_CANTSENDMORE) 490 snderr(EPIPE); 491 492 if (dontroute) 493 so->so_options |= SO_DONTROUTE; 494 if (resid > 0) 495 so->so_state |= SS_MORETOCOME; 496 error = (*so->so_proto->pr_usrreq)(so, 497 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 498 top, addr, control, p); 499 if (dontroute) 500 so->so_options &= ~SO_DONTROUTE; 501 if (resid > 0) 502 so->so_state &= ~SS_MORETOCOME; 503 splx(s); 504 505 clen = 0; 506 control = 0; 507 top = 0; 508 mp = ⊤ 509 if (error) 510 goto release; 511 } while (resid && space > 0); 512 } while (resid); 513 514 release: 515 sbunlock(&so->so_snd); 516 out: 517 if (top) 518 m_freem(top); 519 if (control) 520 m_freem(control); 521 return (error); 522 } 523 524 /* 525 * Implement receive operations on a socket. 526 * We depend on the way that records are added to the sockbuf 527 * by sbappend*. In particular, each record (mbufs linked through m_next) 528 * must begin with an address if the protocol so specifies, 529 * followed by an optional mbuf or mbufs containing ancillary data, 530 * and then zero or more mbufs of data. 531 * In order to avoid blocking network interrupts for the entire time here, 532 * we splx() while doing the actual copy to user space. 533 * Although the sockbuf is locked, new data may still be appended, 534 * and thus we must maintain consistency of the sockbuf during that time. 535 * 536 * The caller may receive the data as a single mbuf chain by supplying 537 * an mbuf **mp0 for use in returning the chain. The uio is then used 538 * only for the count in uio_resid. 539 */ 540 int 541 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 542 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 543 { 544 struct mbuf *m, **mp; 545 int flags, len, error, s, offset, moff, type, orig_resid; 546 struct protosw *pr; 547 struct mbuf *nextrecord; 548 549 pr = so->so_proto; 550 mp = mp0; 551 type = 0; 552 orig_resid = uio->uio_resid; 553 if (paddr) 554 *paddr = 0; 555 if (controlp) 556 *controlp = 0; 557 if (flagsp) 558 flags = *flagsp &~ MSG_EOR; 559 else 560 flags = 0; 561 if (flags & MSG_OOB) { 562 m = m_get(M_WAIT, MT_DATA); 563 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 564 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 565 (struct proc *)0); 566 if (error) 567 goto bad; 568 do { 569 error = uiomove(mtod(m, caddr_t), 570 (int) min(uio->uio_resid, m->m_len), uio); 571 m = m_free(m); 572 } while (uio->uio_resid && error == 0 && m); 573 bad: 574 if (m) 575 m_freem(m); 576 return (error); 577 } 578 if (mp) 579 *mp = (struct mbuf *)0; 580 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 581 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 582 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 583 584 restart: 585 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 586 return (error); 587 s = splsoftnet(); 588 589 m = so->so_rcv.sb_mb; 590 /* 591 * If we have less data than requested, block awaiting more 592 * (subject to any timeout) if: 593 * 1. the current count is less than the low water mark, 594 * 2. MSG_WAITALL is set, and it is possible to do the entire 595 * receive operation at once if we block (resid <= hiwat), or 596 * 3. MSG_DONTWAIT is not set. 597 * If MSG_WAITALL is set but resid is larger than the receive buffer, 598 * we have to do the receive in sections, and thus risk returning 599 * a short count if a timeout or signal occurs after we start. 600 */ 601 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 602 so->so_rcv.sb_cc < uio->uio_resid) && 603 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 604 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 605 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 606 #ifdef DIAGNOSTIC 607 if (m == 0 && so->so_rcv.sb_cc) 608 panic("receive 1"); 609 #endif 610 if (so->so_error) { 611 if (m) 612 goto dontblock; 613 error = so->so_error; 614 if ((flags & MSG_PEEK) == 0) 615 so->so_error = 0; 616 goto release; 617 } 618 if (so->so_state & SS_CANTRCVMORE) { 619 if (m) 620 goto dontblock; 621 else 622 goto release; 623 } 624 for (; m; m = m->m_next) 625 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 626 m = so->so_rcv.sb_mb; 627 goto dontblock; 628 } 629 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 630 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 631 error = ENOTCONN; 632 goto release; 633 } 634 if (uio->uio_resid == 0) 635 goto release; 636 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 637 error = EWOULDBLOCK; 638 goto release; 639 } 640 sbunlock(&so->so_rcv); 641 error = sbwait(&so->so_rcv); 642 splx(s); 643 if (error) 644 return (error); 645 goto restart; 646 } 647 dontblock: 648 #ifdef notyet /* XXXX */ 649 if (uio->uio_procp) 650 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 651 #endif 652 nextrecord = m->m_nextpkt; 653 if (pr->pr_flags & PR_ADDR) { 654 #ifdef DIAGNOSTIC 655 if (m->m_type != MT_SONAME) 656 panic("receive 1a"); 657 #endif 658 orig_resid = 0; 659 if (flags & MSG_PEEK) { 660 if (paddr) 661 *paddr = m_copy(m, 0, m->m_len); 662 m = m->m_next; 663 } else { 664 sbfree(&so->so_rcv, m); 665 if (paddr) { 666 *paddr = m; 667 so->so_rcv.sb_mb = m->m_next; 668 m->m_next = 0; 669 m = so->so_rcv.sb_mb; 670 } else { 671 MFREE(m, so->so_rcv.sb_mb); 672 m = so->so_rcv.sb_mb; 673 } 674 } 675 } 676 while (m && m->m_type == MT_CONTROL && error == 0) { 677 if (flags & MSG_PEEK) { 678 if (controlp) 679 *controlp = m_copy(m, 0, m->m_len); 680 m = m->m_next; 681 } else { 682 sbfree(&so->so_rcv, m); 683 if (controlp) { 684 if (pr->pr_domain->dom_externalize && 685 mtod(m, struct cmsghdr *)->cmsg_type == 686 SCM_RIGHTS) 687 error = (*pr->pr_domain->dom_externalize)(m); 688 *controlp = m; 689 so->so_rcv.sb_mb = m->m_next; 690 m->m_next = 0; 691 m = so->so_rcv.sb_mb; 692 } else { 693 MFREE(m, so->so_rcv.sb_mb); 694 m = so->so_rcv.sb_mb; 695 } 696 } 697 if (controlp) { 698 orig_resid = 0; 699 controlp = &(*controlp)->m_next; 700 } 701 } 702 if (m) { 703 if ((flags & MSG_PEEK) == 0) 704 m->m_nextpkt = nextrecord; 705 type = m->m_type; 706 if (type == MT_OOBDATA) 707 flags |= MSG_OOB; 708 } 709 moff = 0; 710 offset = 0; 711 while (m && uio->uio_resid > 0 && error == 0) { 712 if (m->m_type == MT_OOBDATA) { 713 if (type != MT_OOBDATA) 714 break; 715 } else if (type == MT_OOBDATA) 716 break; 717 #ifdef DIAGNOSTIC 718 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 719 panic("receive 3"); 720 #endif 721 so->so_state &= ~SS_RCVATMARK; 722 len = uio->uio_resid; 723 if (so->so_oobmark && len > so->so_oobmark - offset) 724 len = so->so_oobmark - offset; 725 if (len > m->m_len - moff) 726 len = m->m_len - moff; 727 /* 728 * If mp is set, just pass back the mbufs. 729 * Otherwise copy them out via the uio, then free. 730 * Sockbuf must be consistent here (points to current mbuf, 731 * it points to next record) when we drop priority; 732 * we must note any additions to the sockbuf when we 733 * block interrupts again. 734 */ 735 if (mp == 0) { 736 splx(s); 737 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 738 s = splsoftnet(); 739 } else 740 uio->uio_resid -= len; 741 if (len == m->m_len - moff) { 742 if (m->m_flags & M_EOR) 743 flags |= MSG_EOR; 744 if (flags & MSG_PEEK) { 745 m = m->m_next; 746 moff = 0; 747 } else { 748 nextrecord = m->m_nextpkt; 749 sbfree(&so->so_rcv, m); 750 if (mp) { 751 *mp = m; 752 mp = &m->m_next; 753 so->so_rcv.sb_mb = m = m->m_next; 754 *mp = (struct mbuf *)0; 755 } else { 756 MFREE(m, so->so_rcv.sb_mb); 757 m = so->so_rcv.sb_mb; 758 } 759 if (m) 760 m->m_nextpkt = nextrecord; 761 } 762 } else { 763 if (flags & MSG_PEEK) 764 moff += len; 765 else { 766 if (mp) 767 *mp = m_copym(m, 0, len, M_WAIT); 768 m->m_data += len; 769 m->m_len -= len; 770 so->so_rcv.sb_cc -= len; 771 } 772 } 773 if (so->so_oobmark) { 774 if ((flags & MSG_PEEK) == 0) { 775 so->so_oobmark -= len; 776 if (so->so_oobmark == 0) { 777 so->so_state |= SS_RCVATMARK; 778 break; 779 } 780 } else { 781 offset += len; 782 if (offset == so->so_oobmark) 783 break; 784 } 785 } 786 if (flags & MSG_EOR) 787 break; 788 /* 789 * If the MSG_WAITALL flag is set (for non-atomic socket), 790 * we must not quit until "uio->uio_resid == 0" or an error 791 * termination. If a signal/timeout occurs, return 792 * with a short count but without error. 793 * Keep sockbuf locked against other readers. 794 */ 795 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 796 !sosendallatonce(so) && !nextrecord) { 797 if (so->so_error || so->so_state & SS_CANTRCVMORE) 798 break; 799 error = sbwait(&so->so_rcv); 800 if (error) { 801 sbunlock(&so->so_rcv); 802 splx(s); 803 return (0); 804 } 805 if ((m = so->so_rcv.sb_mb) != NULL) 806 nextrecord = m->m_nextpkt; 807 } 808 } 809 810 if (m && pr->pr_flags & PR_ATOMIC) { 811 flags |= MSG_TRUNC; 812 if ((flags & MSG_PEEK) == 0) 813 (void) sbdroprecord(&so->so_rcv); 814 } 815 if ((flags & MSG_PEEK) == 0) { 816 if (m == 0) 817 so->so_rcv.sb_mb = nextrecord; 818 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 819 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 820 (struct mbuf *)(long)flags, (struct mbuf *)0, 821 (struct proc *)0); 822 } 823 if (orig_resid == uio->uio_resid && orig_resid && 824 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 825 sbunlock(&so->so_rcv); 826 splx(s); 827 goto restart; 828 } 829 830 if (flagsp) 831 *flagsp |= flags; 832 release: 833 sbunlock(&so->so_rcv); 834 splx(s); 835 return (error); 836 } 837 838 int 839 soshutdown(struct socket *so, int how) 840 { 841 struct protosw *pr; 842 843 pr = so->so_proto; 844 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 845 return (EINVAL); 846 847 if (how == SHUT_RD || how == SHUT_RDWR) 848 sorflush(so); 849 if (how == SHUT_WR || how == SHUT_RDWR) 850 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 851 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 852 return (0); 853 } 854 855 void 856 sorflush(struct socket *so) 857 { 858 struct sockbuf *sb, asb; 859 struct protosw *pr; 860 int s; 861 862 sb = &so->so_rcv; 863 pr = so->so_proto; 864 sb->sb_flags |= SB_NOINTR; 865 (void) sblock(sb, M_WAITOK); 866 s = splnet(); 867 socantrcvmore(so); 868 sbunlock(sb); 869 asb = *sb; 870 memset((caddr_t)sb, 0, sizeof(*sb)); 871 splx(s); 872 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 873 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 874 sbrelease(&asb); 875 } 876 877 int 878 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 879 { 880 int error; 881 struct mbuf *m; 882 883 error = 0; 884 m = m0; 885 if (level != SOL_SOCKET) { 886 if (so->so_proto && so->so_proto->pr_ctloutput) 887 return ((*so->so_proto->pr_ctloutput) 888 (PRCO_SETOPT, so, level, optname, &m0)); 889 error = ENOPROTOOPT; 890 } else { 891 switch (optname) { 892 893 case SO_LINGER: 894 if (m == NULL || m->m_len != sizeof(struct linger)) { 895 error = EINVAL; 896 goto bad; 897 } 898 so->so_linger = mtod(m, struct linger *)->l_linger; 899 /* fall thru... */ 900 901 case SO_DEBUG: 902 case SO_KEEPALIVE: 903 case SO_DONTROUTE: 904 case SO_USELOOPBACK: 905 case SO_BROADCAST: 906 case SO_REUSEADDR: 907 case SO_REUSEPORT: 908 case SO_OOBINLINE: 909 case SO_TIMESTAMP: 910 if (m == NULL || m->m_len < sizeof(int)) { 911 error = EINVAL; 912 goto bad; 913 } 914 if (*mtod(m, int *)) 915 so->so_options |= optname; 916 else 917 so->so_options &= ~optname; 918 break; 919 920 case SO_SNDBUF: 921 case SO_RCVBUF: 922 case SO_SNDLOWAT: 923 case SO_RCVLOWAT: 924 { 925 int optval; 926 927 if (m == NULL || m->m_len < sizeof(int)) { 928 error = EINVAL; 929 goto bad; 930 } 931 932 /* 933 * Values < 1 make no sense for any of these 934 * options, so disallow them. 935 */ 936 optval = *mtod(m, int *); 937 if (optval < 1) { 938 error = EINVAL; 939 goto bad; 940 } 941 942 switch (optname) { 943 944 case SO_SNDBUF: 945 case SO_RCVBUF: 946 if (sbreserve(optname == SO_SNDBUF ? 947 &so->so_snd : &so->so_rcv, 948 (u_long) optval) == 0) { 949 error = ENOBUFS; 950 goto bad; 951 } 952 break; 953 954 /* 955 * Make sure the low-water is never greater than 956 * the high-water. 957 */ 958 case SO_SNDLOWAT: 959 so->so_snd.sb_lowat = 960 (optval > so->so_snd.sb_hiwat) ? 961 so->so_snd.sb_hiwat : optval; 962 break; 963 case SO_RCVLOWAT: 964 so->so_rcv.sb_lowat = 965 (optval > so->so_rcv.sb_hiwat) ? 966 so->so_rcv.sb_hiwat : optval; 967 break; 968 } 969 break; 970 } 971 972 case SO_SNDTIMEO: 973 case SO_RCVTIMEO: 974 { 975 struct timeval *tv; 976 short val; 977 978 if (m == NULL || m->m_len < sizeof(*tv)) { 979 error = EINVAL; 980 goto bad; 981 } 982 tv = mtod(m, struct timeval *); 983 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 984 error = EDOM; 985 goto bad; 986 } 987 val = tv->tv_sec * hz + tv->tv_usec / tick; 988 989 switch (optname) { 990 991 case SO_SNDTIMEO: 992 so->so_snd.sb_timeo = val; 993 break; 994 case SO_RCVTIMEO: 995 so->so_rcv.sb_timeo = val; 996 break; 997 } 998 break; 999 } 1000 1001 default: 1002 error = ENOPROTOOPT; 1003 break; 1004 } 1005 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1006 (void) ((*so->so_proto->pr_ctloutput) 1007 (PRCO_SETOPT, so, level, optname, &m0)); 1008 m = NULL; /* freed by protocol */ 1009 } 1010 } 1011 bad: 1012 if (m) 1013 (void) m_free(m); 1014 return (error); 1015 } 1016 1017 int 1018 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1019 { 1020 struct mbuf *m; 1021 1022 if (level != SOL_SOCKET) { 1023 if (so->so_proto && so->so_proto->pr_ctloutput) { 1024 return ((*so->so_proto->pr_ctloutput) 1025 (PRCO_GETOPT, so, level, optname, mp)); 1026 } else 1027 return (ENOPROTOOPT); 1028 } else { 1029 m = m_get(M_WAIT, MT_SOOPTS); 1030 m->m_len = sizeof(int); 1031 1032 switch (optname) { 1033 1034 case SO_LINGER: 1035 m->m_len = sizeof(struct linger); 1036 mtod(m, struct linger *)->l_onoff = 1037 so->so_options & SO_LINGER; 1038 mtod(m, struct linger *)->l_linger = so->so_linger; 1039 break; 1040 1041 case SO_USELOOPBACK: 1042 case SO_DONTROUTE: 1043 case SO_DEBUG: 1044 case SO_KEEPALIVE: 1045 case SO_REUSEADDR: 1046 case SO_REUSEPORT: 1047 case SO_BROADCAST: 1048 case SO_OOBINLINE: 1049 case SO_TIMESTAMP: 1050 *mtod(m, int *) = so->so_options & optname; 1051 break; 1052 1053 case SO_TYPE: 1054 *mtod(m, int *) = so->so_type; 1055 break; 1056 1057 case SO_ERROR: 1058 *mtod(m, int *) = so->so_error; 1059 so->so_error = 0; 1060 break; 1061 1062 case SO_SNDBUF: 1063 *mtod(m, int *) = so->so_snd.sb_hiwat; 1064 break; 1065 1066 case SO_RCVBUF: 1067 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1068 break; 1069 1070 case SO_SNDLOWAT: 1071 *mtod(m, int *) = so->so_snd.sb_lowat; 1072 break; 1073 1074 case SO_RCVLOWAT: 1075 *mtod(m, int *) = so->so_rcv.sb_lowat; 1076 break; 1077 1078 case SO_SNDTIMEO: 1079 case SO_RCVTIMEO: 1080 { 1081 int val = (optname == SO_SNDTIMEO ? 1082 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1083 1084 m->m_len = sizeof(struct timeval); 1085 mtod(m, struct timeval *)->tv_sec = val / hz; 1086 mtod(m, struct timeval *)->tv_usec = 1087 (val % hz) * tick; 1088 break; 1089 } 1090 1091 default: 1092 (void)m_free(m); 1093 return (ENOPROTOOPT); 1094 } 1095 *mp = m; 1096 return (0); 1097 } 1098 } 1099 1100 void 1101 sohasoutofband(struct socket *so) 1102 { 1103 struct proc *p; 1104 1105 if (so->so_pgid < 0) 1106 gsignal(-so->so_pgid, SIGURG); 1107 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1108 psignal(p, SIGURG); 1109 selwakeup(&so->so_rcv.sb_sel); 1110 } 1111