1 /* $NetBSD: uipc_socket.c,v 1.54 2001/02/27 05:19:13 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 36 */ 37 38 #include "opt_compat_sunos.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/domain.h> 47 #include <sys/kernel.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 struct pool socket_pool; 56 57 extern int somaxconn; /* patchable (XXX sysctl) */ 58 int somaxconn = SOMAXCONN; 59 60 void 61 soinit(void) 62 { 63 64 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 65 "sockpl", 0, NULL, NULL, M_SOCKET); 66 } 67 68 /* 69 * Socket operation routines. 70 * These routines are called by the routines in 71 * sys_socket.c or from a system process, and 72 * implement the semantics of socket operations by 73 * switching out to the protocol specific routines. 74 */ 75 /*ARGSUSED*/ 76 int 77 socreate(int dom, struct socket **aso, int type, int proto) 78 { 79 struct proc *p; 80 struct protosw *prp; 81 struct socket *so; 82 int error, s; 83 84 p = curproc; /* XXX */ 85 if (proto) 86 prp = pffindproto(dom, proto, type); 87 else 88 prp = pffindtype(dom, type); 89 if (prp == 0 || prp->pr_usrreq == 0) 90 return (EPROTONOSUPPORT); 91 if (prp->pr_type != type) 92 return (EPROTOTYPE); 93 s = splsoftnet(); 94 so = pool_get(&socket_pool, PR_WAITOK); 95 memset((caddr_t)so, 0, sizeof(*so)); 96 TAILQ_INIT(&so->so_q0); 97 TAILQ_INIT(&so->so_q); 98 so->so_type = type; 99 so->so_proto = prp; 100 so->so_send = sosend; 101 so->so_receive = soreceive; 102 if (p != 0) 103 so->so_uid = p->p_ucred->cr_uid; 104 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 105 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 106 if (error) { 107 so->so_state |= SS_NOFDREF; 108 sofree(so); 109 splx(s); 110 return (error); 111 } 112 #ifdef COMPAT_SUNOS 113 { 114 extern struct emul emul_sunos; 115 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 116 so->so_options |= SO_BROADCAST; 117 } 118 #endif 119 splx(s); 120 *aso = so; 121 return (0); 122 } 123 124 int 125 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 126 { 127 int s, error; 128 129 s = splsoftnet(); 130 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 131 nam, (struct mbuf *)0, p); 132 splx(s); 133 return (error); 134 } 135 136 int 137 solisten(struct socket *so, int backlog) 138 { 139 int s, error; 140 141 s = splsoftnet(); 142 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 143 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 144 if (error) { 145 splx(s); 146 return (error); 147 } 148 if (so->so_q.tqh_first == NULL) 149 so->so_options |= SO_ACCEPTCONN; 150 if (backlog < 0) 151 backlog = 0; 152 so->so_qlimit = min(backlog, somaxconn); 153 splx(s); 154 return (0); 155 } 156 157 void 158 sofree(struct socket *so) 159 { 160 161 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 162 return; 163 if (so->so_head) { 164 /* 165 * We must not decommission a socket that's on the accept(2) 166 * queue. If we do, then accept(2) may hang after select(2) 167 * indicated that the listening socket was ready. 168 */ 169 if (!soqremque(so, 0)) 170 return; 171 } 172 sbrelease(&so->so_snd); 173 sorflush(so); 174 pool_put(&socket_pool, so); 175 } 176 177 /* 178 * Close a socket on last file table reference removal. 179 * Initiate disconnect if connected. 180 * Free socket when disconnect complete. 181 */ 182 int 183 soclose(struct socket *so) 184 { 185 struct socket *so2; 186 int s, error; 187 188 error = 0; 189 s = splsoftnet(); /* conservative */ 190 if (so->so_options & SO_ACCEPTCONN) { 191 while ((so2 = so->so_q0.tqh_first) != 0) { 192 (void) soqremque(so2, 0); 193 (void) soabort(so2); 194 } 195 while ((so2 = so->so_q.tqh_first) != 0) { 196 (void) soqremque(so2, 1); 197 (void) soabort(so2); 198 } 199 } 200 if (so->so_pcb == 0) 201 goto discard; 202 if (so->so_state & SS_ISCONNECTED) { 203 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 204 error = sodisconnect(so); 205 if (error) 206 goto drop; 207 } 208 if (so->so_options & SO_LINGER) { 209 if ((so->so_state & SS_ISDISCONNECTING) && 210 (so->so_state & SS_NBIO)) 211 goto drop; 212 while (so->so_state & SS_ISCONNECTED) { 213 error = tsleep((caddr_t)&so->so_timeo, 214 PSOCK | PCATCH, netcls, 215 so->so_linger * hz); 216 if (error) 217 break; 218 } 219 } 220 } 221 drop: 222 if (so->so_pcb) { 223 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 224 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 225 (struct proc *)0); 226 if (error == 0) 227 error = error2; 228 } 229 discard: 230 if (so->so_state & SS_NOFDREF) 231 panic("soclose: NOFDREF"); 232 so->so_state |= SS_NOFDREF; 233 sofree(so); 234 splx(s); 235 return (error); 236 } 237 238 /* 239 * Must be called at splsoftnet... 240 */ 241 int 242 soabort(struct socket *so) 243 { 244 245 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 246 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 247 } 248 249 int 250 soaccept(struct socket *so, struct mbuf *nam) 251 { 252 int s, error; 253 254 error = 0; 255 s = splsoftnet(); 256 if ((so->so_state & SS_NOFDREF) == 0) 257 panic("soaccept: !NOFDREF"); 258 so->so_state &= ~SS_NOFDREF; 259 if ((so->so_state & SS_ISDISCONNECTED) == 0) 260 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 261 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 262 else 263 error = ECONNABORTED; 264 265 splx(s); 266 return (error); 267 } 268 269 int 270 soconnect(struct socket *so, struct mbuf *nam) 271 { 272 struct proc *p; 273 int s, error; 274 275 p = curproc; /* XXX */ 276 if (so->so_options & SO_ACCEPTCONN) 277 return (EOPNOTSUPP); 278 s = splsoftnet(); 279 /* 280 * If protocol is connection-based, can only connect once. 281 * Otherwise, if connected, try to disconnect first. 282 * This allows user to disconnect by connecting to, e.g., 283 * a null address. 284 */ 285 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 286 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 287 (error = sodisconnect(so)))) 288 error = EISCONN; 289 else 290 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 291 (struct mbuf *)0, nam, (struct mbuf *)0, p); 292 splx(s); 293 return (error); 294 } 295 296 int 297 soconnect2(struct socket *so1, struct socket *so2) 298 { 299 int s, error; 300 301 s = splsoftnet(); 302 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 303 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 304 (struct proc *)0); 305 splx(s); 306 return (error); 307 } 308 309 int 310 sodisconnect(struct socket *so) 311 { 312 int s, error; 313 314 s = splsoftnet(); 315 if ((so->so_state & SS_ISCONNECTED) == 0) { 316 error = ENOTCONN; 317 goto bad; 318 } 319 if (so->so_state & SS_ISDISCONNECTING) { 320 error = EALREADY; 321 goto bad; 322 } 323 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 324 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 325 (struct proc *)0); 326 bad: 327 splx(s); 328 return (error); 329 } 330 331 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 332 /* 333 * Send on a socket. 334 * If send must go all at once and message is larger than 335 * send buffering, then hard error. 336 * Lock against other senders. 337 * If must go all at once and not enough room now, then 338 * inform user that this would block and do nothing. 339 * Otherwise, if nonblocking, send as much as possible. 340 * The data to be sent is described by "uio" if nonzero, 341 * otherwise by the mbuf chain "top" (which must be null 342 * if uio is not). Data provided in mbuf chain must be small 343 * enough to send all at once. 344 * 345 * Returns nonzero on error, timeout or signal; callers 346 * must check for short counts if EINTR/ERESTART are returned. 347 * Data and control buffers are freed on return. 348 */ 349 int 350 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 351 struct mbuf *control, int flags) 352 { 353 struct proc *p; 354 struct mbuf **mp, *m; 355 long space, len, resid; 356 int clen, error, s, dontroute, mlen, atomic; 357 358 p = curproc; /* XXX */ 359 clen = 0; 360 atomic = sosendallatonce(so) || top; 361 if (uio) 362 resid = uio->uio_resid; 363 else 364 resid = top->m_pkthdr.len; 365 /* 366 * In theory resid should be unsigned. 367 * However, space must be signed, as it might be less than 0 368 * if we over-committed, and we must use a signed comparison 369 * of space and resid. On the other hand, a negative resid 370 * causes us to loop sending 0-length segments to the protocol. 371 */ 372 if (resid < 0) { 373 error = EINVAL; 374 goto out; 375 } 376 dontroute = 377 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 378 (so->so_proto->pr_flags & PR_ATOMIC); 379 p->p_stats->p_ru.ru_msgsnd++; 380 if (control) 381 clen = control->m_len; 382 #define snderr(errno) { error = errno; splx(s); goto release; } 383 384 restart: 385 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 386 goto out; 387 do { 388 s = splsoftnet(); 389 if (so->so_state & SS_CANTSENDMORE) 390 snderr(EPIPE); 391 if (so->so_error) { 392 error = so->so_error; 393 so->so_error = 0; 394 splx(s); 395 goto release; 396 } 397 if ((so->so_state & SS_ISCONNECTED) == 0) { 398 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 399 if ((so->so_state & SS_ISCONFIRMING) == 0 && 400 !(resid == 0 && clen != 0)) 401 snderr(ENOTCONN); 402 } else if (addr == 0) 403 snderr(EDESTADDRREQ); 404 } 405 space = sbspace(&so->so_snd); 406 if (flags & MSG_OOB) 407 space += 1024; 408 if ((atomic && resid > so->so_snd.sb_hiwat) || 409 clen > so->so_snd.sb_hiwat) 410 snderr(EMSGSIZE); 411 if (space < resid + clen && uio && 412 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 413 if (so->so_state & SS_NBIO) 414 snderr(EWOULDBLOCK); 415 sbunlock(&so->so_snd); 416 error = sbwait(&so->so_snd); 417 splx(s); 418 if (error) 419 goto out; 420 goto restart; 421 } 422 splx(s); 423 mp = ⊤ 424 space -= clen; 425 do { 426 if (uio == NULL) { 427 /* 428 * Data is prepackaged in "top". 429 */ 430 resid = 0; 431 if (flags & MSG_EOR) 432 top->m_flags |= M_EOR; 433 } else do { 434 if (top == 0) { 435 MGETHDR(m, M_WAIT, MT_DATA); 436 mlen = MHLEN; 437 m->m_pkthdr.len = 0; 438 m->m_pkthdr.rcvif = (struct ifnet *)0; 439 } else { 440 MGET(m, M_WAIT, MT_DATA); 441 mlen = MLEN; 442 } 443 if (resid >= MINCLSIZE && space >= MCLBYTES) { 444 MCLGET(m, M_WAIT); 445 if ((m->m_flags & M_EXT) == 0) 446 goto nopages; 447 mlen = MCLBYTES; 448 #ifdef MAPPED_MBUFS 449 len = min(MCLBYTES, resid); 450 #else 451 if (atomic && top == 0) { 452 len = min(MCLBYTES - max_hdr, 453 resid); 454 m->m_data += max_hdr; 455 } else 456 len = min(MCLBYTES, resid); 457 #endif 458 space -= len; 459 } else { 460 nopages: 461 len = min(min(mlen, resid), space); 462 space -= len; 463 /* 464 * For datagram protocols, leave room 465 * for protocol headers in first mbuf. 466 */ 467 if (atomic && top == 0 && len < mlen) 468 MH_ALIGN(m, len); 469 } 470 error = uiomove(mtod(m, caddr_t), (int)len, 471 uio); 472 resid = uio->uio_resid; 473 m->m_len = len; 474 *mp = m; 475 top->m_pkthdr.len += len; 476 if (error) 477 goto release; 478 mp = &m->m_next; 479 if (resid <= 0) { 480 if (flags & MSG_EOR) 481 top->m_flags |= M_EOR; 482 break; 483 } 484 } while (space > 0 && atomic); 485 486 s = splsoftnet(); 487 488 if (so->so_state & SS_CANTSENDMORE) 489 snderr(EPIPE); 490 491 if (dontroute) 492 so->so_options |= SO_DONTROUTE; 493 if (resid > 0) 494 so->so_state |= SS_MORETOCOME; 495 error = (*so->so_proto->pr_usrreq)(so, 496 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 497 top, addr, control, p); 498 if (dontroute) 499 so->so_options &= ~SO_DONTROUTE; 500 if (resid > 0) 501 so->so_state &= ~SS_MORETOCOME; 502 splx(s); 503 504 clen = 0; 505 control = 0; 506 top = 0; 507 mp = ⊤ 508 if (error) 509 goto release; 510 } while (resid && space > 0); 511 } while (resid); 512 513 release: 514 sbunlock(&so->so_snd); 515 out: 516 if (top) 517 m_freem(top); 518 if (control) 519 m_freem(control); 520 return (error); 521 } 522 523 /* 524 * Implement receive operations on a socket. 525 * We depend on the way that records are added to the sockbuf 526 * by sbappend*. In particular, each record (mbufs linked through m_next) 527 * must begin with an address if the protocol so specifies, 528 * followed by an optional mbuf or mbufs containing ancillary data, 529 * and then zero or more mbufs of data. 530 * In order to avoid blocking network interrupts for the entire time here, 531 * we splx() while doing the actual copy to user space. 532 * Although the sockbuf is locked, new data may still be appended, 533 * and thus we must maintain consistency of the sockbuf during that time. 534 * 535 * The caller may receive the data as a single mbuf chain by supplying 536 * an mbuf **mp0 for use in returning the chain. The uio is then used 537 * only for the count in uio_resid. 538 */ 539 int 540 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 541 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 542 { 543 struct mbuf *m, **mp; 544 int flags, len, error, s, offset, moff, type, orig_resid; 545 struct protosw *pr; 546 struct mbuf *nextrecord; 547 548 pr = so->so_proto; 549 mp = mp0; 550 type = 0; 551 orig_resid = uio->uio_resid; 552 if (paddr) 553 *paddr = 0; 554 if (controlp) 555 *controlp = 0; 556 if (flagsp) 557 flags = *flagsp &~ MSG_EOR; 558 else 559 flags = 0; 560 if (flags & MSG_OOB) { 561 m = m_get(M_WAIT, MT_DATA); 562 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 563 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 564 (struct proc *)0); 565 if (error) 566 goto bad; 567 do { 568 error = uiomove(mtod(m, caddr_t), 569 (int) min(uio->uio_resid, m->m_len), uio); 570 m = m_free(m); 571 } while (uio->uio_resid && error == 0 && m); 572 bad: 573 if (m) 574 m_freem(m); 575 return (error); 576 } 577 if (mp) 578 *mp = (struct mbuf *)0; 579 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 580 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 581 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 582 583 restart: 584 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 585 return (error); 586 s = splsoftnet(); 587 588 m = so->so_rcv.sb_mb; 589 /* 590 * If we have less data than requested, block awaiting more 591 * (subject to any timeout) if: 592 * 1. the current count is less than the low water mark, 593 * 2. MSG_WAITALL is set, and it is possible to do the entire 594 * receive operation at once if we block (resid <= hiwat), or 595 * 3. MSG_DONTWAIT is not set. 596 * If MSG_WAITALL is set but resid is larger than the receive buffer, 597 * we have to do the receive in sections, and thus risk returning 598 * a short count if a timeout or signal occurs after we start. 599 */ 600 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 601 so->so_rcv.sb_cc < uio->uio_resid) && 602 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 603 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 604 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 605 #ifdef DIAGNOSTIC 606 if (m == 0 && so->so_rcv.sb_cc) 607 panic("receive 1"); 608 #endif 609 if (so->so_error) { 610 if (m) 611 goto dontblock; 612 error = so->so_error; 613 if ((flags & MSG_PEEK) == 0) 614 so->so_error = 0; 615 goto release; 616 } 617 if (so->so_state & SS_CANTRCVMORE) { 618 if (m) 619 goto dontblock; 620 else 621 goto release; 622 } 623 for (; m; m = m->m_next) 624 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 625 m = so->so_rcv.sb_mb; 626 goto dontblock; 627 } 628 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 629 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 630 error = ENOTCONN; 631 goto release; 632 } 633 if (uio->uio_resid == 0) 634 goto release; 635 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 636 error = EWOULDBLOCK; 637 goto release; 638 } 639 sbunlock(&so->so_rcv); 640 error = sbwait(&so->so_rcv); 641 splx(s); 642 if (error) 643 return (error); 644 goto restart; 645 } 646 dontblock: 647 #ifdef notyet /* XXXX */ 648 if (uio->uio_procp) 649 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 650 #endif 651 nextrecord = m->m_nextpkt; 652 if (pr->pr_flags & PR_ADDR) { 653 #ifdef DIAGNOSTIC 654 if (m->m_type != MT_SONAME) 655 panic("receive 1a"); 656 #endif 657 orig_resid = 0; 658 if (flags & MSG_PEEK) { 659 if (paddr) 660 *paddr = m_copy(m, 0, m->m_len); 661 m = m->m_next; 662 } else { 663 sbfree(&so->so_rcv, m); 664 if (paddr) { 665 *paddr = m; 666 so->so_rcv.sb_mb = m->m_next; 667 m->m_next = 0; 668 m = so->so_rcv.sb_mb; 669 } else { 670 MFREE(m, so->so_rcv.sb_mb); 671 m = so->so_rcv.sb_mb; 672 } 673 } 674 } 675 while (m && m->m_type == MT_CONTROL && error == 0) { 676 if (flags & MSG_PEEK) { 677 if (controlp) 678 *controlp = m_copy(m, 0, m->m_len); 679 m = m->m_next; 680 } else { 681 sbfree(&so->so_rcv, m); 682 if (controlp) { 683 if (pr->pr_domain->dom_externalize && 684 mtod(m, struct cmsghdr *)->cmsg_type == 685 SCM_RIGHTS) 686 error = (*pr->pr_domain->dom_externalize)(m); 687 *controlp = m; 688 so->so_rcv.sb_mb = m->m_next; 689 m->m_next = 0; 690 m = so->so_rcv.sb_mb; 691 } else { 692 MFREE(m, so->so_rcv.sb_mb); 693 m = so->so_rcv.sb_mb; 694 } 695 } 696 if (controlp) { 697 orig_resid = 0; 698 controlp = &(*controlp)->m_next; 699 } 700 } 701 if (m) { 702 if ((flags & MSG_PEEK) == 0) 703 m->m_nextpkt = nextrecord; 704 type = m->m_type; 705 if (type == MT_OOBDATA) 706 flags |= MSG_OOB; 707 } 708 moff = 0; 709 offset = 0; 710 while (m && uio->uio_resid > 0 && error == 0) { 711 if (m->m_type == MT_OOBDATA) { 712 if (type != MT_OOBDATA) 713 break; 714 } else if (type == MT_OOBDATA) 715 break; 716 #ifdef DIAGNOSTIC 717 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 718 panic("receive 3"); 719 #endif 720 so->so_state &= ~SS_RCVATMARK; 721 len = uio->uio_resid; 722 if (so->so_oobmark && len > so->so_oobmark - offset) 723 len = so->so_oobmark - offset; 724 if (len > m->m_len - moff) 725 len = m->m_len - moff; 726 /* 727 * If mp is set, just pass back the mbufs. 728 * Otherwise copy them out via the uio, then free. 729 * Sockbuf must be consistent here (points to current mbuf, 730 * it points to next record) when we drop priority; 731 * we must note any additions to the sockbuf when we 732 * block interrupts again. 733 */ 734 if (mp == 0) { 735 splx(s); 736 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 737 s = splsoftnet(); 738 } else 739 uio->uio_resid -= len; 740 if (len == m->m_len - moff) { 741 if (m->m_flags & M_EOR) 742 flags |= MSG_EOR; 743 if (flags & MSG_PEEK) { 744 m = m->m_next; 745 moff = 0; 746 } else { 747 nextrecord = m->m_nextpkt; 748 sbfree(&so->so_rcv, m); 749 if (mp) { 750 *mp = m; 751 mp = &m->m_next; 752 so->so_rcv.sb_mb = m = m->m_next; 753 *mp = (struct mbuf *)0; 754 } else { 755 MFREE(m, so->so_rcv.sb_mb); 756 m = so->so_rcv.sb_mb; 757 } 758 if (m) 759 m->m_nextpkt = nextrecord; 760 } 761 } else { 762 if (flags & MSG_PEEK) 763 moff += len; 764 else { 765 if (mp) 766 *mp = m_copym(m, 0, len, M_WAIT); 767 m->m_data += len; 768 m->m_len -= len; 769 so->so_rcv.sb_cc -= len; 770 } 771 } 772 if (so->so_oobmark) { 773 if ((flags & MSG_PEEK) == 0) { 774 so->so_oobmark -= len; 775 if (so->so_oobmark == 0) { 776 so->so_state |= SS_RCVATMARK; 777 break; 778 } 779 } else { 780 offset += len; 781 if (offset == so->so_oobmark) 782 break; 783 } 784 } 785 if (flags & MSG_EOR) 786 break; 787 /* 788 * If the MSG_WAITALL flag is set (for non-atomic socket), 789 * we must not quit until "uio->uio_resid == 0" or an error 790 * termination. If a signal/timeout occurs, return 791 * with a short count but without error. 792 * Keep sockbuf locked against other readers. 793 */ 794 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 795 !sosendallatonce(so) && !nextrecord) { 796 if (so->so_error || so->so_state & SS_CANTRCVMORE) 797 break; 798 error = sbwait(&so->so_rcv); 799 if (error) { 800 sbunlock(&so->so_rcv); 801 splx(s); 802 return (0); 803 } 804 if ((m = so->so_rcv.sb_mb) != NULL) 805 nextrecord = m->m_nextpkt; 806 } 807 } 808 809 if (m && pr->pr_flags & PR_ATOMIC) { 810 flags |= MSG_TRUNC; 811 if ((flags & MSG_PEEK) == 0) 812 (void) sbdroprecord(&so->so_rcv); 813 } 814 if ((flags & MSG_PEEK) == 0) { 815 if (m == 0) 816 so->so_rcv.sb_mb = nextrecord; 817 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 818 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 819 (struct mbuf *)(long)flags, (struct mbuf *)0, 820 (struct proc *)0); 821 } 822 if (orig_resid == uio->uio_resid && orig_resid && 823 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 824 sbunlock(&so->so_rcv); 825 splx(s); 826 goto restart; 827 } 828 829 if (flagsp) 830 *flagsp |= flags; 831 release: 832 sbunlock(&so->so_rcv); 833 splx(s); 834 return (error); 835 } 836 837 int 838 soshutdown(struct socket *so, int how) 839 { 840 struct protosw *pr; 841 842 pr = so->so_proto; 843 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 844 return (EINVAL); 845 846 if (how == SHUT_RD || how == SHUT_RDWR) 847 sorflush(so); 848 if (how == SHUT_WR || how == SHUT_RDWR) 849 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 850 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 851 return (0); 852 } 853 854 void 855 sorflush(struct socket *so) 856 { 857 struct sockbuf *sb, asb; 858 struct protosw *pr; 859 int s; 860 861 sb = &so->so_rcv; 862 pr = so->so_proto; 863 sb->sb_flags |= SB_NOINTR; 864 (void) sblock(sb, M_WAITOK); 865 s = splimp(); 866 socantrcvmore(so); 867 sbunlock(sb); 868 asb = *sb; 869 memset((caddr_t)sb, 0, sizeof(*sb)); 870 splx(s); 871 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 872 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 873 sbrelease(&asb); 874 } 875 876 int 877 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 878 { 879 int error; 880 struct mbuf *m; 881 882 error = 0; 883 m = m0; 884 if (level != SOL_SOCKET) { 885 if (so->so_proto && so->so_proto->pr_ctloutput) 886 return ((*so->so_proto->pr_ctloutput) 887 (PRCO_SETOPT, so, level, optname, &m0)); 888 error = ENOPROTOOPT; 889 } else { 890 switch (optname) { 891 892 case SO_LINGER: 893 if (m == NULL || m->m_len != sizeof(struct linger)) { 894 error = EINVAL; 895 goto bad; 896 } 897 so->so_linger = mtod(m, struct linger *)->l_linger; 898 /* fall thru... */ 899 900 case SO_DEBUG: 901 case SO_KEEPALIVE: 902 case SO_DONTROUTE: 903 case SO_USELOOPBACK: 904 case SO_BROADCAST: 905 case SO_REUSEADDR: 906 case SO_REUSEPORT: 907 case SO_OOBINLINE: 908 case SO_TIMESTAMP: 909 if (m == NULL || m->m_len < sizeof(int)) { 910 error = EINVAL; 911 goto bad; 912 } 913 if (*mtod(m, int *)) 914 so->so_options |= optname; 915 else 916 so->so_options &= ~optname; 917 break; 918 919 case SO_SNDBUF: 920 case SO_RCVBUF: 921 case SO_SNDLOWAT: 922 case SO_RCVLOWAT: 923 { 924 int optval; 925 926 if (m == NULL || m->m_len < sizeof(int)) { 927 error = EINVAL; 928 goto bad; 929 } 930 931 /* 932 * Values < 1 make no sense for any of these 933 * options, so disallow them. 934 */ 935 optval = *mtod(m, int *); 936 if (optval < 1) { 937 error = EINVAL; 938 goto bad; 939 } 940 941 switch (optname) { 942 943 case SO_SNDBUF: 944 case SO_RCVBUF: 945 if (sbreserve(optname == SO_SNDBUF ? 946 &so->so_snd : &so->so_rcv, 947 (u_long) optval) == 0) { 948 error = ENOBUFS; 949 goto bad; 950 } 951 break; 952 953 /* 954 * Make sure the low-water is never greater than 955 * the high-water. 956 */ 957 case SO_SNDLOWAT: 958 so->so_snd.sb_lowat = 959 (optval > so->so_snd.sb_hiwat) ? 960 so->so_snd.sb_hiwat : optval; 961 break; 962 case SO_RCVLOWAT: 963 so->so_rcv.sb_lowat = 964 (optval > so->so_rcv.sb_hiwat) ? 965 so->so_rcv.sb_hiwat : optval; 966 break; 967 } 968 break; 969 } 970 971 case SO_SNDTIMEO: 972 case SO_RCVTIMEO: 973 { 974 struct timeval *tv; 975 short val; 976 977 if (m == NULL || m->m_len < sizeof(*tv)) { 978 error = EINVAL; 979 goto bad; 980 } 981 tv = mtod(m, struct timeval *); 982 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 983 error = EDOM; 984 goto bad; 985 } 986 val = tv->tv_sec * hz + tv->tv_usec / tick; 987 988 switch (optname) { 989 990 case SO_SNDTIMEO: 991 so->so_snd.sb_timeo = val; 992 break; 993 case SO_RCVTIMEO: 994 so->so_rcv.sb_timeo = val; 995 break; 996 } 997 break; 998 } 999 1000 default: 1001 error = ENOPROTOOPT; 1002 break; 1003 } 1004 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1005 (void) ((*so->so_proto->pr_ctloutput) 1006 (PRCO_SETOPT, so, level, optname, &m0)); 1007 m = NULL; /* freed by protocol */ 1008 } 1009 } 1010 bad: 1011 if (m) 1012 (void) m_free(m); 1013 return (error); 1014 } 1015 1016 int 1017 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1018 { 1019 struct mbuf *m; 1020 1021 if (level != SOL_SOCKET) { 1022 if (so->so_proto && so->so_proto->pr_ctloutput) { 1023 return ((*so->so_proto->pr_ctloutput) 1024 (PRCO_GETOPT, so, level, optname, mp)); 1025 } else 1026 return (ENOPROTOOPT); 1027 } else { 1028 m = m_get(M_WAIT, MT_SOOPTS); 1029 m->m_len = sizeof(int); 1030 1031 switch (optname) { 1032 1033 case SO_LINGER: 1034 m->m_len = sizeof(struct linger); 1035 mtod(m, struct linger *)->l_onoff = 1036 so->so_options & SO_LINGER; 1037 mtod(m, struct linger *)->l_linger = so->so_linger; 1038 break; 1039 1040 case SO_USELOOPBACK: 1041 case SO_DONTROUTE: 1042 case SO_DEBUG: 1043 case SO_KEEPALIVE: 1044 case SO_REUSEADDR: 1045 case SO_REUSEPORT: 1046 case SO_BROADCAST: 1047 case SO_OOBINLINE: 1048 case SO_TIMESTAMP: 1049 *mtod(m, int *) = so->so_options & optname; 1050 break; 1051 1052 case SO_TYPE: 1053 *mtod(m, int *) = so->so_type; 1054 break; 1055 1056 case SO_ERROR: 1057 *mtod(m, int *) = so->so_error; 1058 so->so_error = 0; 1059 break; 1060 1061 case SO_SNDBUF: 1062 *mtod(m, int *) = so->so_snd.sb_hiwat; 1063 break; 1064 1065 case SO_RCVBUF: 1066 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1067 break; 1068 1069 case SO_SNDLOWAT: 1070 *mtod(m, int *) = so->so_snd.sb_lowat; 1071 break; 1072 1073 case SO_RCVLOWAT: 1074 *mtod(m, int *) = so->so_rcv.sb_lowat; 1075 break; 1076 1077 case SO_SNDTIMEO: 1078 case SO_RCVTIMEO: 1079 { 1080 int val = (optname == SO_SNDTIMEO ? 1081 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1082 1083 m->m_len = sizeof(struct timeval); 1084 mtod(m, struct timeval *)->tv_sec = val / hz; 1085 mtod(m, struct timeval *)->tv_usec = 1086 (val % hz) * tick; 1087 break; 1088 } 1089 1090 default: 1091 (void)m_free(m); 1092 return (ENOPROTOOPT); 1093 } 1094 *mp = m; 1095 return (0); 1096 } 1097 } 1098 1099 void 1100 sohasoutofband(struct socket *so) 1101 { 1102 struct proc *p; 1103 1104 if (so->so_pgid < 0) 1105 gsignal(-so->so_pgid, SIGURG); 1106 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1107 psignal(p, SIGURG); 1108 selwakeup(&so->so_rcv.sb_sel); 1109 } 1110