1 /* $OpenBSD: uipc_socket.c,v 1.205 2017/09/15 19:29:28 bluhm Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <net/if.h> 51 #include <sys/pool.h> 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #endif 56 57 void sbsync(struct sockbuf *, struct mbuf *); 58 59 int sosplice(struct socket *, int, off_t, struct timeval *); 60 void sounsplice(struct socket *, struct socket *, int); 61 void soidle(void *); 62 void sotask(void *); 63 int somove(struct socket *, int); 64 65 void filt_sordetach(struct knote *kn); 66 int filt_soread(struct knote *kn, long hint); 67 void filt_sowdetach(struct knote *kn); 68 int filt_sowrite(struct knote *kn, long hint); 69 int filt_solisten(struct knote *kn, long hint); 70 71 struct filterops solisten_filtops = 72 { 1, NULL, filt_sordetach, filt_solisten }; 73 struct filterops soread_filtops = 74 { 1, NULL, filt_sordetach, filt_soread }; 75 struct filterops sowrite_filtops = 76 { 1, NULL, filt_sowdetach, filt_sowrite }; 77 78 79 #ifndef SOMINCONN 80 #define SOMINCONN 80 81 #endif /* SOMINCONN */ 82 83 int somaxconn = SOMAXCONN; 84 int sominconn = SOMINCONN; 85 86 struct pool socket_pool; 87 #ifdef SOCKET_SPLICE 88 struct pool sosplice_pool; 89 struct taskq *sosplice_taskq; 90 #endif 91 92 void 93 soinit(void) 94 { 95 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 96 "sockpl", NULL); 97 #ifdef SOCKET_SPLICE 98 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 99 "sosppl", NULL); 100 #endif 101 } 102 103 /* 104 * Socket operation routines. 105 * These routines are called by the routines in 106 * sys_socket.c or from a system process, and 107 * implement the semantics of socket operations by 108 * switching out to the protocol specific routines. 109 */ 110 int 111 socreate(int dom, struct socket **aso, int type, int proto) 112 { 113 struct proc *p = curproc; /* XXX */ 114 struct protosw *prp; 115 struct socket *so; 116 int error, s; 117 118 if (proto) 119 prp = pffindproto(dom, proto, type); 120 else 121 prp = pffindtype(dom, type); 122 if (prp == NULL || prp->pr_attach == NULL) 123 return (EPROTONOSUPPORT); 124 if (prp->pr_type != type) 125 return (EPROTOTYPE); 126 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 127 TAILQ_INIT(&so->so_q0); 128 TAILQ_INIT(&so->so_q); 129 so->so_type = type; 130 if (suser(p, 0) == 0) 131 so->so_state = SS_PRIV; 132 so->so_ruid = p->p_ucred->cr_ruid; 133 so->so_euid = p->p_ucred->cr_uid; 134 so->so_rgid = p->p_ucred->cr_rgid; 135 so->so_egid = p->p_ucred->cr_gid; 136 so->so_cpid = p->p_p->ps_pid; 137 so->so_proto = prp; 138 139 s = solock(so); 140 error = (*prp->pr_attach)(so, proto); 141 if (error) { 142 so->so_state |= SS_NOFDREF; 143 sofree(so); 144 sounlock(s); 145 return (error); 146 } 147 sounlock(s); 148 *aso = so; 149 return (0); 150 } 151 152 int 153 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 154 { 155 int error; 156 157 soassertlocked(so); 158 159 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 160 return (error); 161 } 162 163 int 164 solisten(struct socket *so, int backlog) 165 { 166 int s, error; 167 168 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 169 return (EOPNOTSUPP); 170 #ifdef SOCKET_SPLICE 171 if (isspliced(so) || issplicedback(so)) 172 return (EOPNOTSUPP); 173 #endif /* SOCKET_SPLICE */ 174 s = solock(so); 175 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 176 curproc); 177 if (error) { 178 sounlock(s); 179 return (error); 180 } 181 if (TAILQ_FIRST(&so->so_q) == NULL) 182 so->so_options |= SO_ACCEPTCONN; 183 if (backlog < 0 || backlog > somaxconn) 184 backlog = somaxconn; 185 if (backlog < sominconn) 186 backlog = sominconn; 187 so->so_qlimit = backlog; 188 sounlock(s); 189 return (0); 190 } 191 192 void 193 sofree(struct socket *so) 194 { 195 soassertlocked(so); 196 197 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 198 return; 199 if (so->so_head) { 200 /* 201 * We must not decommission a socket that's on the accept(2) 202 * queue. If we do, then accept(2) may hang after select(2) 203 * indicated that the listening socket was ready. 204 */ 205 if (!soqremque(so, 0)) 206 return; 207 } 208 #ifdef SOCKET_SPLICE 209 if (so->so_sp) { 210 if (issplicedback(so)) 211 sounsplice(so->so_sp->ssp_soback, so, 212 so->so_sp->ssp_soback != so); 213 if (isspliced(so)) 214 sounsplice(so, so->so_sp->ssp_socket, 0); 215 pool_put(&sosplice_pool, so->so_sp); 216 so->so_sp = NULL; 217 } 218 #endif /* SOCKET_SPLICE */ 219 sbrelease(so, &so->so_snd); 220 sorflush(so); 221 pool_put(&socket_pool, so); 222 } 223 224 /* 225 * Close a socket on last file table reference removal. 226 * Initiate disconnect if connected. 227 * Free socket when disconnect complete. 228 */ 229 int 230 soclose(struct socket *so) 231 { 232 struct socket *so2; 233 int s, error = 0; 234 235 s = solock(so); 236 if (so->so_options & SO_ACCEPTCONN) { 237 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 238 (void) soqremque(so2, 0); 239 (void) soabort(so2); 240 } 241 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 242 (void) soqremque(so2, 1); 243 (void) soabort(so2); 244 } 245 } 246 if (so->so_pcb == 0) 247 goto discard; 248 if (so->so_state & SS_ISCONNECTED) { 249 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 250 error = sodisconnect(so); 251 if (error) 252 goto drop; 253 } 254 if (so->so_options & SO_LINGER) { 255 if ((so->so_state & SS_ISDISCONNECTING) && 256 (so->so_state & SS_NBIO)) 257 goto drop; 258 while (so->so_state & SS_ISCONNECTED) { 259 error = sosleep(so, &so->so_timeo, 260 PSOCK | PCATCH, "netcls", 261 so->so_linger * hz); 262 if (error) 263 break; 264 } 265 } 266 } 267 drop: 268 if (so->so_pcb) { 269 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 270 NULL, NULL, curproc); 271 if (error == 0) 272 error = error2; 273 } 274 discard: 275 if (so->so_state & SS_NOFDREF) 276 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 277 so->so_state |= SS_NOFDREF; 278 sofree(so); 279 sounlock(s); 280 return (error); 281 } 282 283 int 284 soabort(struct socket *so) 285 { 286 soassertlocked(so); 287 288 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 289 curproc); 290 } 291 292 int 293 soaccept(struct socket *so, struct mbuf *nam) 294 { 295 int error = 0; 296 297 soassertlocked(so); 298 299 if ((so->so_state & SS_NOFDREF) == 0) 300 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 301 so->so_state &= ~SS_NOFDREF; 302 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 303 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 304 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 305 nam, NULL, curproc); 306 else 307 error = ECONNABORTED; 308 return (error); 309 } 310 311 int 312 soconnect(struct socket *so, struct mbuf *nam) 313 { 314 int error; 315 316 soassertlocked(so); 317 318 if (so->so_options & SO_ACCEPTCONN) 319 return (EOPNOTSUPP); 320 /* 321 * If protocol is connection-based, can only connect once. 322 * Otherwise, if connected, try to disconnect first. 323 * This allows user to disconnect by connecting to, e.g., 324 * a null address. 325 */ 326 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 327 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 328 (error = sodisconnect(so)))) 329 error = EISCONN; 330 else 331 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 332 NULL, nam, NULL, curproc); 333 return (error); 334 } 335 336 int 337 soconnect2(struct socket *so1, struct socket *so2) 338 { 339 int error; 340 341 soassertlocked(so1); 342 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 343 (struct mbuf *)so2, NULL, curproc); 344 return (error); 345 } 346 347 int 348 sodisconnect(struct socket *so) 349 { 350 int error; 351 352 soassertlocked(so); 353 354 if ((so->so_state & SS_ISCONNECTED) == 0) 355 return (ENOTCONN); 356 if (so->so_state & SS_ISDISCONNECTING) 357 return (EALREADY); 358 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 359 NULL, curproc); 360 return (error); 361 } 362 363 int m_getuio(struct mbuf **, int, long, struct uio *); 364 365 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 366 /* 367 * Send on a socket. 368 * If send must go all at once and message is larger than 369 * send buffering, then hard error. 370 * Lock against other senders. 371 * If must go all at once and not enough room now, then 372 * inform user that this would block and do nothing. 373 * Otherwise, if nonblocking, send as much as possible. 374 * The data to be sent is described by "uio" if nonzero, 375 * otherwise by the mbuf chain "top" (which must be null 376 * if uio is not). Data provided in mbuf chain must be small 377 * enough to send all at once. 378 * 379 * Returns nonzero on error, timeout or signal; callers 380 * must check for short counts if EINTR/ERESTART are returned. 381 * Data and control buffers are freed on return. 382 */ 383 int 384 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 385 struct mbuf *control, int flags) 386 { 387 long space, clen = 0; 388 size_t resid; 389 int error, s; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 397 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 398 m_freem(top); 399 m_freem(control); 400 return (EINVAL); 401 } 402 if (uio && uio->uio_procp) 403 uio->uio_procp->p_ru.ru_msgsnd++; 404 if (control) { 405 /* 406 * In theory clen should be unsigned (since control->m_len is). 407 * However, space must be signed, as it might be less than 0 408 * if we over-committed, and we must use a signed comparison 409 * of space and clen. 410 */ 411 clen = control->m_len; 412 /* reserve extra space for AF_LOCAL's internalize */ 413 if (so->so_proto->pr_domain->dom_family == AF_LOCAL && 414 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 415 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 416 clen = CMSG_SPACE( 417 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 418 (sizeof(struct fdpass) / sizeof(int))); 419 } 420 421 #define snderr(errno) { error = errno; goto release; } 422 423 s = solock(so); 424 restart: 425 if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0) 426 goto out; 427 so->so_state |= SS_ISSENDING; 428 do { 429 if (so->so_state & SS_CANTSENDMORE) 430 snderr(EPIPE); 431 if (so->so_error) { 432 error = so->so_error; 433 so->so_error = 0; 434 snderr(error); 435 } 436 if ((so->so_state & SS_ISCONNECTED) == 0) { 437 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 438 if (!(resid == 0 && clen != 0)) 439 snderr(ENOTCONN); 440 } else if (addr == 0) 441 snderr(EDESTADDRREQ); 442 } 443 space = sbspace(so, &so->so_snd); 444 if (flags & MSG_OOB) 445 space += 1024; 446 if ((atomic && resid > so->so_snd.sb_hiwat) || 447 (so->so_proto->pr_domain->dom_family != AF_LOCAL && 448 clen > so->so_snd.sb_hiwat)) 449 snderr(EMSGSIZE); 450 if (space < clen || 451 (space - clen < resid && 452 (atomic || space < so->so_snd.sb_lowat))) { 453 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) 454 snderr(EWOULDBLOCK); 455 sbunlock(&so->so_snd); 456 error = sbwait(so, &so->so_snd); 457 so->so_state &= ~SS_ISSENDING; 458 if (error) 459 goto out; 460 goto restart; 461 } 462 space -= clen; 463 do { 464 if (uio == NULL) { 465 /* 466 * Data is prepackaged in "top". 467 */ 468 resid = 0; 469 if (flags & MSG_EOR) 470 top->m_flags |= M_EOR; 471 } else { 472 sounlock(s); 473 error = m_getuio(&top, atomic, space, uio); 474 s = solock(so); 475 if (error) 476 goto release; 477 space -= top->m_pkthdr.len; 478 resid = uio->uio_resid; 479 if (flags & MSG_EOR) 480 top->m_flags |= M_EOR; 481 } 482 if (resid == 0) 483 so->so_state &= ~SS_ISSENDING; 484 if (top && so->so_options & SO_ZEROIZE) 485 top->m_flags |= M_ZEROIZE; 486 error = (*so->so_proto->pr_usrreq)(so, 487 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 488 top, addr, control, curproc); 489 clen = 0; 490 control = NULL; 491 top = NULL; 492 if (error) 493 goto release; 494 } while (resid && space > 0); 495 } while (resid); 496 497 release: 498 so->so_state &= ~SS_ISSENDING; 499 sbunlock(&so->so_snd); 500 out: 501 sounlock(s); 502 m_freem(top); 503 m_freem(control); 504 return (error); 505 } 506 507 int 508 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 509 { 510 struct mbuf *m, *top = NULL; 511 struct mbuf **nextp = ⊤ 512 u_long len, mlen; 513 size_t resid = uio->uio_resid; 514 int error; 515 516 do { 517 if (top == NULL) { 518 MGETHDR(m, M_WAIT, MT_DATA); 519 mlen = MHLEN; 520 m->m_pkthdr.len = 0; 521 m->m_pkthdr.ph_ifidx = 0; 522 } else { 523 MGET(m, M_WAIT, MT_DATA); 524 mlen = MLEN; 525 } 526 /* chain mbuf together */ 527 *nextp = m; 528 nextp = &m->m_next; 529 530 resid = ulmin(resid, space); 531 if (resid >= MINCLSIZE) { 532 MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); 533 if ((m->m_flags & M_EXT) == 0) 534 MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); 535 if ((m->m_flags & M_EXT) == 0) 536 goto nopages; 537 mlen = m->m_ext.ext_size; 538 len = ulmin(mlen, resid); 539 /* 540 * For datagram protocols, leave room 541 * for protocol headers in first mbuf. 542 */ 543 if (atomic && m == top && len < mlen - max_hdr) 544 m->m_data += max_hdr; 545 } else { 546 nopages: 547 len = ulmin(mlen, resid); 548 /* 549 * For datagram protocols, leave room 550 * for protocol headers in first mbuf. 551 */ 552 if (atomic && m == top && len < mlen - max_hdr) 553 MH_ALIGN(m, len); 554 } 555 556 error = uiomove(mtod(m, caddr_t), len, uio); 557 if (error) { 558 m_freem(top); 559 return (error); 560 } 561 562 /* adjust counters */ 563 resid = uio->uio_resid; 564 space -= len; 565 m->m_len = len; 566 top->m_pkthdr.len += len; 567 568 /* Is there more space and more data? */ 569 } while (space > 0 && resid > 0); 570 571 *mp = top; 572 return 0; 573 } 574 575 /* 576 * Following replacement or removal of the first mbuf on the first 577 * mbuf chain of a socket buffer, push necessary state changes back 578 * into the socket buffer so that other consumers see the values 579 * consistently. 'nextrecord' is the callers locally stored value of 580 * the original value of sb->sb_mb->m_nextpkt which must be restored 581 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 582 */ 583 void 584 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 585 { 586 587 /* 588 * First, update for the new value of nextrecord. If necessary, 589 * make it the first record. 590 */ 591 if (sb->sb_mb != NULL) 592 sb->sb_mb->m_nextpkt = nextrecord; 593 else 594 sb->sb_mb = nextrecord; 595 596 /* 597 * Now update any dependent socket buffer fields to reflect 598 * the new state. This is an inline of SB_EMPTY_FIXUP, with 599 * the addition of a second clause that takes care of the 600 * case where sb_mb has been updated, but remains the last 601 * record. 602 */ 603 if (sb->sb_mb == NULL) { 604 sb->sb_mbtail = NULL; 605 sb->sb_lastrecord = NULL; 606 } else if (sb->sb_mb->m_nextpkt == NULL) 607 sb->sb_lastrecord = sb->sb_mb; 608 } 609 610 /* 611 * Implement receive operations on a socket. 612 * We depend on the way that records are added to the sockbuf 613 * by sbappend*. In particular, each record (mbufs linked through m_next) 614 * must begin with an address if the protocol so specifies, 615 * followed by an optional mbuf or mbufs containing ancillary data, 616 * and then zero or more mbufs of data. 617 * In order to avoid blocking network for the entire time here, we release 618 * the solock() while doing the actual copy to user space. 619 * Although the sockbuf is locked, new data may still be appended, 620 * and thus we must maintain consistency of the sockbuf during that time. 621 * 622 * The caller may receive the data as a single mbuf chain by supplying 623 * an mbuf **mp0 for use in returning the chain. The uio is then used 624 * only for the count in uio_resid. 625 */ 626 int 627 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 628 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 629 socklen_t controllen) 630 { 631 struct mbuf *m, **mp; 632 struct mbuf *cm; 633 u_long len, offset, moff; 634 int flags, error, s, type, uio_error = 0; 635 struct protosw *pr = so->so_proto; 636 struct mbuf *nextrecord; 637 size_t resid, orig_resid = uio->uio_resid; 638 639 mp = mp0; 640 if (paddr) 641 *paddr = 0; 642 if (controlp) 643 *controlp = 0; 644 if (flagsp) 645 flags = *flagsp &~ MSG_EOR; 646 else 647 flags = 0; 648 if (so->so_state & SS_NBIO) 649 flags |= MSG_DONTWAIT; 650 if (flags & MSG_OOB) { 651 m = m_get(M_WAIT, MT_DATA); 652 s = solock(so); 653 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 654 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 655 sounlock(s); 656 if (error) 657 goto bad; 658 do { 659 error = uiomove(mtod(m, caddr_t), 660 ulmin(uio->uio_resid, m->m_len), uio); 661 m = m_free(m); 662 } while (uio->uio_resid && error == 0 && m); 663 bad: 664 m_freem(m); 665 return (error); 666 } 667 if (mp) 668 *mp = NULL; 669 670 restart: 671 s = solock(so); 672 if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { 673 sounlock(s); 674 return (error); 675 } 676 677 m = so->so_rcv.sb_mb; 678 #ifdef SOCKET_SPLICE 679 if (isspliced(so)) 680 m = NULL; 681 #endif /* SOCKET_SPLICE */ 682 /* 683 * If we have less data than requested, block awaiting more 684 * (subject to any timeout) if: 685 * 1. the current count is less than the low water mark, 686 * 2. MSG_WAITALL is set, and it is possible to do the entire 687 * receive operation at once if we block (resid <= hiwat), or 688 * 3. MSG_DONTWAIT is not set. 689 * If MSG_WAITALL is set but resid is larger than the receive buffer, 690 * we have to do the receive in sections, and thus risk returning 691 * a short count if a timeout or signal occurs after we start. 692 */ 693 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 694 so->so_rcv.sb_cc < uio->uio_resid) && 695 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 696 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 697 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 698 #ifdef DIAGNOSTIC 699 if (m == NULL && so->so_rcv.sb_cc) 700 #ifdef SOCKET_SPLICE 701 if (!isspliced(so)) 702 #endif /* SOCKET_SPLICE */ 703 panic("receive 1: so %p, so_type %d, sb_cc %lu", 704 so, so->so_type, so->so_rcv.sb_cc); 705 #endif 706 if (so->so_error) { 707 if (m) 708 goto dontblock; 709 error = so->so_error; 710 if ((flags & MSG_PEEK) == 0) 711 so->so_error = 0; 712 goto release; 713 } 714 if (so->so_state & SS_CANTRCVMORE) { 715 if (m) 716 goto dontblock; 717 else if (so->so_rcv.sb_cc == 0) 718 goto release; 719 } 720 for (; m; m = m->m_next) 721 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 722 m = so->so_rcv.sb_mb; 723 goto dontblock; 724 } 725 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 726 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 727 error = ENOTCONN; 728 goto release; 729 } 730 if (uio->uio_resid == 0 && controlp == NULL) 731 goto release; 732 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 733 error = EWOULDBLOCK; 734 goto release; 735 } 736 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 737 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 738 sbunlock(&so->so_rcv); 739 error = sbwait(so, &so->so_rcv); 740 sounlock(s); 741 if (error) 742 return (error); 743 goto restart; 744 } 745 dontblock: 746 /* 747 * On entry here, m points to the first record of the socket buffer. 748 * From this point onward, we maintain 'nextrecord' as a cache of the 749 * pointer to the next record in the socket buffer. We must keep the 750 * various socket buffer pointers and local stack versions of the 751 * pointers in sync, pushing out modifications before operations that 752 * may sleep, and re-reading them afterwards. 753 * 754 * Otherwise, we will race with the network stack appending new data 755 * or records onto the socket buffer by using inconsistent/stale 756 * versions of the field, possibly resulting in socket buffer 757 * corruption. 758 */ 759 if (uio->uio_procp) 760 uio->uio_procp->p_ru.ru_msgrcv++; 761 KASSERT(m == so->so_rcv.sb_mb); 762 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 763 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 764 nextrecord = m->m_nextpkt; 765 if (pr->pr_flags & PR_ADDR) { 766 #ifdef DIAGNOSTIC 767 if (m->m_type != MT_SONAME) 768 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 769 so, so->so_type, m, m->m_type); 770 #endif 771 orig_resid = 0; 772 if (flags & MSG_PEEK) { 773 if (paddr) 774 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 775 m = m->m_next; 776 } else { 777 sbfree(&so->so_rcv, m); 778 if (paddr) { 779 *paddr = m; 780 so->so_rcv.sb_mb = m->m_next; 781 m->m_next = 0; 782 m = so->so_rcv.sb_mb; 783 } else { 784 so->so_rcv.sb_mb = m_free(m); 785 m = so->so_rcv.sb_mb; 786 } 787 sbsync(&so->so_rcv, nextrecord); 788 } 789 } 790 while (m && m->m_type == MT_CONTROL && error == 0) { 791 if (flags & MSG_PEEK) { 792 if (controlp) 793 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 794 m = m->m_next; 795 } else { 796 sbfree(&so->so_rcv, m); 797 so->so_rcv.sb_mb = m->m_next; 798 m->m_nextpkt = m->m_next = NULL; 799 cm = m; 800 m = so->so_rcv.sb_mb; 801 sbsync(&so->so_rcv, nextrecord); 802 if (controlp) { 803 if (pr->pr_domain->dom_externalize && 804 mtod(cm, struct cmsghdr *)->cmsg_type == 805 SCM_RIGHTS) { 806 error = 807 (*pr->pr_domain->dom_externalize) 808 (cm, controllen, flags); 809 } 810 *controlp = cm; 811 } else { 812 /* 813 * Dispose of any SCM_RIGHTS message that went 814 * through the read path rather than recv. 815 */ 816 if (pr->pr_domain->dom_dispose && 817 mtod(cm, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 818 pr->pr_domain->dom_dispose(cm); 819 m_free(cm); 820 } 821 } 822 if (m != NULL) 823 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 824 else 825 nextrecord = so->so_rcv.sb_mb; 826 if (controlp) { 827 orig_resid = 0; 828 controlp = &(*controlp)->m_next; 829 } 830 } 831 832 /* If m is non-NULL, we have some data to read. */ 833 if (m) { 834 type = m->m_type; 835 if (type == MT_OOBDATA) 836 flags |= MSG_OOB; 837 if (m->m_flags & M_BCAST) 838 flags |= MSG_BCAST; 839 if (m->m_flags & M_MCAST) 840 flags |= MSG_MCAST; 841 } 842 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 843 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 844 845 moff = 0; 846 offset = 0; 847 while (m && uio->uio_resid > 0 && error == 0) { 848 if (m->m_type == MT_OOBDATA) { 849 if (type != MT_OOBDATA) 850 break; 851 } else if (type == MT_OOBDATA) 852 break; 853 #ifdef DIAGNOSTIC 854 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 855 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 856 so, so->so_type, m, m->m_type); 857 #endif 858 so->so_state &= ~SS_RCVATMARK; 859 len = uio->uio_resid; 860 if (so->so_oobmark && len > so->so_oobmark - offset) 861 len = so->so_oobmark - offset; 862 if (len > m->m_len - moff) 863 len = m->m_len - moff; 864 /* 865 * If mp is set, just pass back the mbufs. 866 * Otherwise copy them out via the uio, then free. 867 * Sockbuf must be consistent here (points to current mbuf, 868 * it points to next record) when we drop priority; 869 * we must note any additions to the sockbuf when we 870 * block interrupts again. 871 */ 872 if (mp == NULL && uio_error == 0) { 873 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 874 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 875 resid = uio->uio_resid; 876 sounlock(s); 877 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 878 s = solock(so); 879 if (uio_error) 880 uio->uio_resid = resid - len; 881 } else 882 uio->uio_resid -= len; 883 if (len == m->m_len - moff) { 884 if (m->m_flags & M_EOR) 885 flags |= MSG_EOR; 886 if (flags & MSG_PEEK) { 887 m = m->m_next; 888 moff = 0; 889 } else { 890 nextrecord = m->m_nextpkt; 891 sbfree(&so->so_rcv, m); 892 if (mp) { 893 *mp = m; 894 mp = &m->m_next; 895 so->so_rcv.sb_mb = m = m->m_next; 896 *mp = NULL; 897 } else { 898 so->so_rcv.sb_mb = m_free(m); 899 m = so->so_rcv.sb_mb; 900 } 901 /* 902 * If m != NULL, we also know that 903 * so->so_rcv.sb_mb != NULL. 904 */ 905 KASSERT(so->so_rcv.sb_mb == m); 906 if (m) { 907 m->m_nextpkt = nextrecord; 908 if (nextrecord == NULL) 909 so->so_rcv.sb_lastrecord = m; 910 } else { 911 so->so_rcv.sb_mb = nextrecord; 912 SB_EMPTY_FIXUP(&so->so_rcv); 913 } 914 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 915 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 916 } 917 } else { 918 if (flags & MSG_PEEK) 919 moff += len; 920 else { 921 if (mp) 922 *mp = m_copym(m, 0, len, M_WAIT); 923 m->m_data += len; 924 m->m_len -= len; 925 so->so_rcv.sb_cc -= len; 926 so->so_rcv.sb_datacc -= len; 927 } 928 } 929 if (so->so_oobmark) { 930 if ((flags & MSG_PEEK) == 0) { 931 so->so_oobmark -= len; 932 if (so->so_oobmark == 0) { 933 so->so_state |= SS_RCVATMARK; 934 break; 935 } 936 } else { 937 offset += len; 938 if (offset == so->so_oobmark) 939 break; 940 } 941 } 942 if (flags & MSG_EOR) 943 break; 944 /* 945 * If the MSG_WAITALL flag is set (for non-atomic socket), 946 * we must not quit until "uio->uio_resid == 0" or an error 947 * termination. If a signal/timeout occurs, return 948 * with a short count but without error. 949 * Keep sockbuf locked against other readers. 950 */ 951 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 952 !sosendallatonce(so) && !nextrecord) { 953 if (so->so_error || so->so_state & SS_CANTRCVMORE) 954 break; 955 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 956 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 957 error = sbwait(so, &so->so_rcv); 958 if (error) { 959 sbunlock(&so->so_rcv); 960 sounlock(s); 961 return (0); 962 } 963 if ((m = so->so_rcv.sb_mb) != NULL) 964 nextrecord = m->m_nextpkt; 965 } 966 } 967 968 if (m && pr->pr_flags & PR_ATOMIC) { 969 flags |= MSG_TRUNC; 970 if ((flags & MSG_PEEK) == 0) 971 (void) sbdroprecord(&so->so_rcv); 972 } 973 if ((flags & MSG_PEEK) == 0) { 974 if (m == NULL) { 975 /* 976 * First part is an inline SB_EMPTY_FIXUP(). Second 977 * part makes sure sb_lastrecord is up-to-date if 978 * there is still data in the socket buffer. 979 */ 980 so->so_rcv.sb_mb = nextrecord; 981 if (so->so_rcv.sb_mb == NULL) { 982 so->so_rcv.sb_mbtail = NULL; 983 so->so_rcv.sb_lastrecord = NULL; 984 } else if (nextrecord->m_nextpkt == NULL) 985 so->so_rcv.sb_lastrecord = nextrecord; 986 } 987 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 988 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 989 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 990 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 991 (struct mbuf *)(long)flags, NULL, curproc); 992 } 993 if (orig_resid == uio->uio_resid && orig_resid && 994 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 995 sbunlock(&so->so_rcv); 996 sounlock(s); 997 goto restart; 998 } 999 1000 if (uio_error) 1001 error = uio_error; 1002 1003 if (flagsp) 1004 *flagsp |= flags; 1005 release: 1006 sbunlock(&so->so_rcv); 1007 sounlock(s); 1008 return (error); 1009 } 1010 1011 int 1012 soshutdown(struct socket *so, int how) 1013 { 1014 struct protosw *pr = so->so_proto; 1015 int s, error = 0; 1016 1017 s = solock(so); 1018 switch (how) { 1019 case SHUT_RD: 1020 sorflush(so); 1021 break; 1022 case SHUT_RDWR: 1023 sorflush(so); 1024 /* FALLTHROUGH */ 1025 case SHUT_WR: 1026 error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 1027 curproc); 1028 break; 1029 default: 1030 error = EINVAL; 1031 break; 1032 } 1033 sounlock(s); 1034 1035 return (error); 1036 } 1037 1038 void 1039 sorflush(struct socket *so) 1040 { 1041 struct sockbuf *sb = &so->so_rcv; 1042 struct protosw *pr = so->so_proto; 1043 struct socket aso; 1044 int error; 1045 1046 sb->sb_flags |= SB_NOINTR; 1047 error = sblock(so, sb, M_WAITOK); 1048 /* with SB_NOINTR and M_WAITOK sblock() must not fail */ 1049 KASSERT(error == 0); 1050 socantrcvmore(so); 1051 sbunlock(sb); 1052 aso.so_proto = pr; 1053 aso.so_rcv = *sb; 1054 memset(sb, 0, sizeof (*sb)); 1055 /* XXX - the memset stomps all over so_rcv */ 1056 if (aso.so_rcv.sb_flags & SB_KNOTE) { 1057 sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note; 1058 sb->sb_flags = SB_KNOTE; 1059 } 1060 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1061 (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb); 1062 sbrelease(&aso, &aso.so_rcv); 1063 } 1064 1065 #ifdef SOCKET_SPLICE 1066 1067 #define so_splicelen so_sp->ssp_len 1068 #define so_splicemax so_sp->ssp_max 1069 #define so_idletv so_sp->ssp_idletv 1070 #define so_idleto so_sp->ssp_idleto 1071 #define so_splicetask so_sp->ssp_task 1072 1073 int 1074 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1075 { 1076 struct file *fp; 1077 struct socket *sosp; 1078 struct sosplice *sp; 1079 int error = 0; 1080 1081 soassertlocked(so); 1082 1083 if (sosplice_taskq == NULL) 1084 sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 0); 1085 if (sosplice_taskq == NULL) 1086 return (ENOMEM); 1087 1088 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1089 return (EPROTONOSUPPORT); 1090 if (so->so_options & SO_ACCEPTCONN) 1091 return (EOPNOTSUPP); 1092 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1093 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1094 return (ENOTCONN); 1095 if (so->so_sp == NULL) { 1096 sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1097 if (so->so_sp == NULL) 1098 so->so_sp = sp; 1099 else 1100 pool_put(&sosplice_pool, sp); 1101 } 1102 1103 /* If no fd is given, unsplice by removing existing link. */ 1104 if (fd < 0) { 1105 /* Lock receive buffer. */ 1106 if ((error = sblock(so, &so->so_rcv, 1107 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1108 return (error); 1109 } 1110 if (so->so_sp->ssp_socket) 1111 sounsplice(so, so->so_sp->ssp_socket, 1); 1112 sbunlock(&so->so_rcv); 1113 return (0); 1114 } 1115 1116 if (max && max < 0) 1117 return (EINVAL); 1118 1119 if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0)) 1120 return (EINVAL); 1121 1122 /* Find sosp, the drain socket where data will be spliced into. */ 1123 if ((error = getsock(curproc, fd, &fp)) != 0) 1124 return (error); 1125 sosp = fp->f_data; 1126 if (sosp->so_sp == NULL) { 1127 sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1128 if (sosp->so_sp == NULL) 1129 sosp->so_sp = sp; 1130 else 1131 pool_put(&sosplice_pool, sp); 1132 } 1133 1134 /* Lock both receive and send buffer. */ 1135 if ((error = sblock(so, &so->so_rcv, 1136 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1137 FRELE(fp, curproc); 1138 return (error); 1139 } 1140 if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) { 1141 sbunlock(&so->so_rcv); 1142 FRELE(fp, curproc); 1143 return (error); 1144 } 1145 1146 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1147 error = EBUSY; 1148 goto release; 1149 } 1150 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1151 error = EPROTONOSUPPORT; 1152 goto release; 1153 } 1154 if (sosp->so_options & SO_ACCEPTCONN) { 1155 error = EOPNOTSUPP; 1156 goto release; 1157 } 1158 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1159 error = ENOTCONN; 1160 goto release; 1161 } 1162 1163 /* Splice so and sosp together. */ 1164 so->so_sp->ssp_socket = sosp; 1165 sosp->so_sp->ssp_soback = so; 1166 so->so_splicelen = 0; 1167 so->so_splicemax = max; 1168 if (tv) 1169 so->so_idletv = *tv; 1170 else 1171 timerclear(&so->so_idletv); 1172 timeout_set_proc(&so->so_idleto, soidle, so); 1173 task_set(&so->so_splicetask, sotask, so); 1174 1175 /* 1176 * To prevent softnet interrupt from calling somove() while 1177 * we sleep, the socket buffers are not marked as spliced yet. 1178 */ 1179 if (somove(so, M_WAIT)) { 1180 so->so_rcv.sb_flagsintr |= SB_SPLICE; 1181 sosp->so_snd.sb_flagsintr |= SB_SPLICE; 1182 } 1183 1184 release: 1185 sbunlock(&sosp->so_snd); 1186 sbunlock(&so->so_rcv); 1187 FRELE(fp, curproc); 1188 return (error); 1189 } 1190 1191 void 1192 sounsplice(struct socket *so, struct socket *sosp, int wakeup) 1193 { 1194 soassertlocked(so); 1195 1196 task_del(sosplice_taskq, &so->so_splicetask); 1197 timeout_del(&so->so_idleto); 1198 sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; 1199 so->so_rcv.sb_flagsintr &= ~SB_SPLICE; 1200 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1201 if (wakeup && soreadable(so)) 1202 sorwakeup(so); 1203 } 1204 1205 void 1206 soidle(void *arg) 1207 { 1208 struct socket *so = arg; 1209 int s; 1210 1211 s = solock(so); 1212 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1213 so->so_error = ETIMEDOUT; 1214 sounsplice(so, so->so_sp->ssp_socket, 1); 1215 } 1216 sounlock(s); 1217 } 1218 1219 void 1220 sotask(void *arg) 1221 { 1222 struct socket *so = arg; 1223 int s; 1224 1225 s = solock(so); 1226 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1227 /* 1228 * We may not sleep here as sofree() and unsplice() may be 1229 * called from softnet interrupt context. This would remove 1230 * the socket during somove(). 1231 */ 1232 somove(so, M_DONTWAIT); 1233 } 1234 sounlock(s); 1235 1236 /* Avoid user land starvation. */ 1237 yield(); 1238 } 1239 1240 /* 1241 * Move data from receive buffer of spliced source socket to send 1242 * buffer of drain socket. Try to move as much as possible in one 1243 * big chunk. It is a TCP only implementation. 1244 * Return value 0 means splicing has been finished, 1 continue. 1245 */ 1246 int 1247 somove(struct socket *so, int wait) 1248 { 1249 struct socket *sosp = so->so_sp->ssp_socket; 1250 struct mbuf *m, **mp, *nextrecord; 1251 u_long len, off, oobmark; 1252 long space; 1253 int error = 0, maxreached = 0; 1254 short state; 1255 1256 soassertlocked(so); 1257 1258 nextpkt: 1259 if (so->so_error) { 1260 error = so->so_error; 1261 goto release; 1262 } 1263 if (sosp->so_state & SS_CANTSENDMORE) { 1264 error = EPIPE; 1265 goto release; 1266 } 1267 if (sosp->so_error && sosp->so_error != ETIMEDOUT && 1268 sosp->so_error != EFBIG && sosp->so_error != ELOOP) { 1269 error = sosp->so_error; 1270 goto release; 1271 } 1272 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1273 goto release; 1274 1275 /* Calculate how many bytes can be copied now. */ 1276 len = so->so_rcv.sb_datacc; 1277 if (so->so_splicemax) { 1278 KASSERT(so->so_splicelen < so->so_splicemax); 1279 if (so->so_splicemax <= so->so_splicelen + len) { 1280 len = so->so_splicemax - so->so_splicelen; 1281 maxreached = 1; 1282 } 1283 } 1284 space = sbspace(sosp, &sosp->so_snd); 1285 if (so->so_oobmark && so->so_oobmark < len && 1286 so->so_oobmark < space + 1024) 1287 space += 1024; 1288 if (space <= 0) { 1289 maxreached = 0; 1290 goto release; 1291 } 1292 if (space < len) { 1293 maxreached = 0; 1294 if (space < sosp->so_snd.sb_lowat) 1295 goto release; 1296 len = space; 1297 } 1298 sosp->so_state |= SS_ISSENDING; 1299 1300 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1301 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1302 m = so->so_rcv.sb_mb; 1303 if (m == NULL) 1304 goto release; 1305 nextrecord = m->m_nextpkt; 1306 1307 /* Drop address and control information not used with splicing. */ 1308 if (so->so_proto->pr_flags & PR_ADDR) { 1309 #ifdef DIAGNOSTIC 1310 if (m->m_type != MT_SONAME) 1311 panic("somove soname: so %p, so_type %d, m %p, " 1312 "m_type %d", so, so->so_type, m, m->m_type); 1313 #endif 1314 m = m->m_next; 1315 } 1316 while (m && m->m_type == MT_CONTROL) 1317 m = m->m_next; 1318 if (m == NULL) { 1319 sbdroprecord(&so->so_rcv); 1320 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1321 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1322 NULL, NULL, NULL); 1323 goto nextpkt; 1324 } 1325 1326 /* 1327 * By splicing sockets connected to localhost, userland might create a 1328 * loop. Dissolve splicing with error if loop is detected by counter. 1329 */ 1330 if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 1331 error = ELOOP; 1332 goto release; 1333 } 1334 1335 if (so->so_proto->pr_flags & PR_ATOMIC) { 1336 if ((m->m_flags & M_PKTHDR) == 0) 1337 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1338 "m_type %d", so, so->so_type, m, m->m_type); 1339 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1340 error = EMSGSIZE; 1341 goto release; 1342 } 1343 if (len < m->m_pkthdr.len) 1344 goto release; 1345 if (m->m_pkthdr.len < len) { 1346 maxreached = 0; 1347 len = m->m_pkthdr.len; 1348 } 1349 /* 1350 * Throw away the name mbuf after it has been assured 1351 * that the whole first record can be processed. 1352 */ 1353 m = so->so_rcv.sb_mb; 1354 sbfree(&so->so_rcv, m); 1355 so->so_rcv.sb_mb = m_free(m); 1356 sbsync(&so->so_rcv, nextrecord); 1357 } 1358 /* 1359 * Throw away the control mbufs after it has been assured 1360 * that the whole first record can be processed. 1361 */ 1362 m = so->so_rcv.sb_mb; 1363 while (m && m->m_type == MT_CONTROL) { 1364 sbfree(&so->so_rcv, m); 1365 so->so_rcv.sb_mb = m_free(m); 1366 m = so->so_rcv.sb_mb; 1367 sbsync(&so->so_rcv, nextrecord); 1368 } 1369 1370 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1371 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1372 1373 /* Take at most len mbufs out of receive buffer. */ 1374 for (off = 0, mp = &m; off <= len && *mp; 1375 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1376 u_long size = len - off; 1377 1378 #ifdef DIAGNOSTIC 1379 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1380 panic("somove type: so %p, so_type %d, m %p, " 1381 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1382 #endif 1383 if ((*mp)->m_len > size) { 1384 /* 1385 * Move only a partial mbuf at maximum splice length or 1386 * if the drain buffer is too small for this large mbuf. 1387 */ 1388 if (!maxreached && so->so_snd.sb_datacc > 0) { 1389 len -= size; 1390 break; 1391 } 1392 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1393 if (*mp == NULL) { 1394 len -= size; 1395 break; 1396 } 1397 so->so_rcv.sb_mb->m_data += size; 1398 so->so_rcv.sb_mb->m_len -= size; 1399 so->so_rcv.sb_cc -= size; 1400 so->so_rcv.sb_datacc -= size; 1401 } else { 1402 *mp = so->so_rcv.sb_mb; 1403 sbfree(&so->so_rcv, *mp); 1404 so->so_rcv.sb_mb = (*mp)->m_next; 1405 sbsync(&so->so_rcv, nextrecord); 1406 } 1407 } 1408 *mp = NULL; 1409 1410 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1411 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1412 SBCHECK(&so->so_rcv); 1413 if (m == NULL) 1414 goto release; 1415 m->m_nextpkt = NULL; 1416 if (m->m_flags & M_PKTHDR) { 1417 m_resethdr(m); 1418 m->m_pkthdr.len = len; 1419 } 1420 1421 /* Send window update to source peer as receive buffer has changed. */ 1422 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1423 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1424 NULL, NULL, NULL); 1425 1426 /* Receive buffer did shrink by len bytes, adjust oob. */ 1427 state = so->so_state; 1428 so->so_state &= ~SS_RCVATMARK; 1429 oobmark = so->so_oobmark; 1430 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1431 if (oobmark) { 1432 if (oobmark == len) 1433 so->so_state |= SS_RCVATMARK; 1434 if (oobmark >= len) 1435 oobmark = 0; 1436 } 1437 1438 /* 1439 * Handle oob data. If any malloc fails, ignore error. 1440 * TCP urgent data is not very reliable anyway. 1441 */ 1442 while (((state & SS_RCVATMARK) || oobmark) && 1443 (so->so_options & SO_OOBINLINE)) { 1444 struct mbuf *o = NULL; 1445 1446 if (state & SS_RCVATMARK) { 1447 o = m_get(wait, MT_DATA); 1448 state &= ~SS_RCVATMARK; 1449 } else if (oobmark) { 1450 o = m_split(m, oobmark, wait); 1451 if (o) { 1452 error = (*sosp->so_proto->pr_usrreq)(sosp, 1453 PRU_SEND, m, NULL, NULL, NULL); 1454 if (error) { 1455 if (sosp->so_state & SS_CANTSENDMORE) 1456 error = EPIPE; 1457 m_freem(o); 1458 goto release; 1459 } 1460 len -= oobmark; 1461 so->so_splicelen += oobmark; 1462 m = o; 1463 o = m_get(wait, MT_DATA); 1464 } 1465 oobmark = 0; 1466 } 1467 if (o) { 1468 o->m_len = 1; 1469 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1470 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1471 o, NULL, NULL, NULL); 1472 if (error) { 1473 if (sosp->so_state & SS_CANTSENDMORE) 1474 error = EPIPE; 1475 m_freem(m); 1476 goto release; 1477 } 1478 len -= 1; 1479 so->so_splicelen += 1; 1480 if (oobmark) { 1481 oobmark -= 1; 1482 if (oobmark == 0) 1483 state |= SS_RCVATMARK; 1484 } 1485 m_adj(m, 1); 1486 } 1487 } 1488 1489 /* Append all remaining data to drain socket. */ 1490 if (so->so_rcv.sb_cc == 0 || maxreached) 1491 sosp->so_state &= ~SS_ISSENDING; 1492 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, 1493 NULL); 1494 if (error) { 1495 if (sosp->so_state & SS_CANTSENDMORE) 1496 error = EPIPE; 1497 goto release; 1498 } 1499 so->so_splicelen += len; 1500 1501 /* Move several packets if possible. */ 1502 if (!maxreached && nextrecord) 1503 goto nextpkt; 1504 1505 release: 1506 sosp->so_state &= ~SS_ISSENDING; 1507 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1508 error = EFBIG; 1509 if (error) 1510 so->so_error = error; 1511 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1512 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1513 sounsplice(so, sosp, 1); 1514 return (0); 1515 } 1516 if (timerisset(&so->so_idletv)) 1517 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1518 return (1); 1519 } 1520 1521 #endif /* SOCKET_SPLICE */ 1522 1523 void 1524 sorwakeup(struct socket *so) 1525 { 1526 soassertlocked(so); 1527 1528 #ifdef SOCKET_SPLICE 1529 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1530 /* 1531 * TCP has a sendbuffer that can handle multiple packets 1532 * at once. So queue the stream a bit to accumulate data. 1533 * The sosplice thread will call somove() later and send 1534 * the packets calling tcp_output() only once. 1535 * In the UDP case, send out the packets immediately. 1536 * Using a thread would make things slower. 1537 */ 1538 if (so->so_proto->pr_flags & PR_WANTRCVD) 1539 task_add(sosplice_taskq, &so->so_splicetask); 1540 else 1541 somove(so, M_DONTWAIT); 1542 } 1543 if (isspliced(so)) 1544 return; 1545 #endif 1546 sowakeup(so, &so->so_rcv); 1547 if (so->so_upcall) 1548 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1549 } 1550 1551 void 1552 sowwakeup(struct socket *so) 1553 { 1554 soassertlocked(so); 1555 1556 #ifdef SOCKET_SPLICE 1557 if (so->so_snd.sb_flagsintr & SB_SPLICE) 1558 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); 1559 #endif 1560 sowakeup(so, &so->so_snd); 1561 } 1562 1563 int 1564 sosetopt(struct socket *so, int level, int optname, struct mbuf *m) 1565 { 1566 int error = 0; 1567 1568 soassertlocked(so); 1569 1570 if (level != SOL_SOCKET) { 1571 if (so->so_proto->pr_ctloutput) { 1572 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1573 level, optname, m); 1574 return (error); 1575 } 1576 error = ENOPROTOOPT; 1577 } else { 1578 switch (optname) { 1579 case SO_BINDANY: 1580 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1581 return (error); 1582 break; 1583 } 1584 1585 switch (optname) { 1586 1587 case SO_LINGER: 1588 if (m == NULL || m->m_len != sizeof (struct linger) || 1589 mtod(m, struct linger *)->l_linger < 0 || 1590 mtod(m, struct linger *)->l_linger > SHRT_MAX) 1591 return (EINVAL); 1592 so->so_linger = mtod(m, struct linger *)->l_linger; 1593 /* FALLTHROUGH */ 1594 1595 case SO_BINDANY: 1596 case SO_DEBUG: 1597 case SO_KEEPALIVE: 1598 case SO_USELOOPBACK: 1599 case SO_BROADCAST: 1600 case SO_REUSEADDR: 1601 case SO_REUSEPORT: 1602 case SO_OOBINLINE: 1603 case SO_TIMESTAMP: 1604 case SO_ZEROIZE: 1605 if (m == NULL || m->m_len < sizeof (int)) 1606 return (EINVAL); 1607 if (*mtod(m, int *)) 1608 so->so_options |= optname; 1609 else 1610 so->so_options &= ~optname; 1611 break; 1612 1613 case SO_DONTROUTE: 1614 if (m == NULL || m->m_len < sizeof (int)) 1615 return (EINVAL); 1616 if (*mtod(m, int *)) 1617 error = EOPNOTSUPP; 1618 break; 1619 1620 case SO_SNDBUF: 1621 case SO_RCVBUF: 1622 case SO_SNDLOWAT: 1623 case SO_RCVLOWAT: 1624 { 1625 u_long cnt; 1626 1627 if (m == NULL || m->m_len < sizeof (int)) 1628 return (EINVAL); 1629 cnt = *mtod(m, int *); 1630 if ((long)cnt <= 0) 1631 cnt = 1; 1632 switch (optname) { 1633 1634 case SO_SNDBUF: 1635 if (so->so_state & SS_CANTSENDMORE) 1636 return (EINVAL); 1637 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1638 sbreserve(so, &so->so_snd, cnt)) 1639 return (ENOBUFS); 1640 so->so_snd.sb_wat = cnt; 1641 break; 1642 1643 case SO_RCVBUF: 1644 if (so->so_state & SS_CANTRCVMORE) 1645 return (EINVAL); 1646 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1647 sbreserve(so, &so->so_rcv, cnt)) 1648 return (ENOBUFS); 1649 so->so_rcv.sb_wat = cnt; 1650 break; 1651 1652 case SO_SNDLOWAT: 1653 so->so_snd.sb_lowat = 1654 (cnt > so->so_snd.sb_hiwat) ? 1655 so->so_snd.sb_hiwat : cnt; 1656 break; 1657 case SO_RCVLOWAT: 1658 so->so_rcv.sb_lowat = 1659 (cnt > so->so_rcv.sb_hiwat) ? 1660 so->so_rcv.sb_hiwat : cnt; 1661 break; 1662 } 1663 break; 1664 } 1665 1666 case SO_SNDTIMEO: 1667 case SO_RCVTIMEO: 1668 { 1669 struct timeval tv; 1670 int val; 1671 1672 if (m == NULL || m->m_len < sizeof (tv)) 1673 return (EINVAL); 1674 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 1675 val = tvtohz(&tv); 1676 if (val > USHRT_MAX) 1677 return (EDOM); 1678 1679 switch (optname) { 1680 1681 case SO_SNDTIMEO: 1682 so->so_snd.sb_timeo = val; 1683 break; 1684 case SO_RCVTIMEO: 1685 so->so_rcv.sb_timeo = val; 1686 break; 1687 } 1688 break; 1689 } 1690 1691 case SO_RTABLE: 1692 if (so->so_proto->pr_domain && 1693 so->so_proto->pr_domain->dom_protosw && 1694 so->so_proto->pr_ctloutput) { 1695 struct domain *dom = so->so_proto->pr_domain; 1696 1697 level = dom->dom_protosw->pr_protocol; 1698 error = (*so->so_proto->pr_ctloutput) 1699 (PRCO_SETOPT, so, level, optname, m); 1700 return (error); 1701 } 1702 error = ENOPROTOOPT; 1703 break; 1704 1705 #ifdef SOCKET_SPLICE 1706 case SO_SPLICE: 1707 if (m == NULL) { 1708 error = sosplice(so, -1, 0, NULL); 1709 } else if (m->m_len < sizeof(int)) { 1710 return (EINVAL); 1711 } else if (m->m_len < sizeof(struct splice)) { 1712 error = sosplice(so, *mtod(m, int *), 0, NULL); 1713 } else { 1714 error = sosplice(so, 1715 mtod(m, struct splice *)->sp_fd, 1716 mtod(m, struct splice *)->sp_max, 1717 &mtod(m, struct splice *)->sp_idle); 1718 } 1719 break; 1720 #endif /* SOCKET_SPLICE */ 1721 1722 default: 1723 error = ENOPROTOOPT; 1724 break; 1725 } 1726 if (error == 0 && so->so_proto->pr_ctloutput) { 1727 (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1728 level, optname, m); 1729 } 1730 } 1731 1732 return (error); 1733 } 1734 1735 int 1736 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 1737 { 1738 int error = 0; 1739 1740 soassertlocked(so); 1741 1742 if (level != SOL_SOCKET) { 1743 if (so->so_proto->pr_ctloutput) { 1744 m->m_len = 0; 1745 1746 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 1747 level, optname, m); 1748 if (error) 1749 return (error); 1750 return (0); 1751 } else 1752 return (ENOPROTOOPT); 1753 } else { 1754 m->m_len = sizeof (int); 1755 1756 switch (optname) { 1757 1758 case SO_LINGER: 1759 m->m_len = sizeof (struct linger); 1760 mtod(m, struct linger *)->l_onoff = 1761 so->so_options & SO_LINGER; 1762 mtod(m, struct linger *)->l_linger = so->so_linger; 1763 break; 1764 1765 case SO_BINDANY: 1766 case SO_USELOOPBACK: 1767 case SO_DEBUG: 1768 case SO_KEEPALIVE: 1769 case SO_REUSEADDR: 1770 case SO_REUSEPORT: 1771 case SO_BROADCAST: 1772 case SO_OOBINLINE: 1773 case SO_TIMESTAMP: 1774 case SO_ZEROIZE: 1775 *mtod(m, int *) = so->so_options & optname; 1776 break; 1777 1778 case SO_DONTROUTE: 1779 *mtod(m, int *) = 0; 1780 break; 1781 1782 case SO_TYPE: 1783 *mtod(m, int *) = so->so_type; 1784 break; 1785 1786 case SO_ERROR: 1787 *mtod(m, int *) = so->so_error; 1788 so->so_error = 0; 1789 break; 1790 1791 case SO_SNDBUF: 1792 *mtod(m, int *) = so->so_snd.sb_hiwat; 1793 break; 1794 1795 case SO_RCVBUF: 1796 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1797 break; 1798 1799 case SO_SNDLOWAT: 1800 *mtod(m, int *) = so->so_snd.sb_lowat; 1801 break; 1802 1803 case SO_RCVLOWAT: 1804 *mtod(m, int *) = so->so_rcv.sb_lowat; 1805 break; 1806 1807 case SO_SNDTIMEO: 1808 case SO_RCVTIMEO: 1809 { 1810 struct timeval tv; 1811 int val = (optname == SO_SNDTIMEO ? 1812 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1813 1814 m->m_len = sizeof(struct timeval); 1815 memset(&tv, 0, sizeof(tv)); 1816 tv.tv_sec = val / hz; 1817 tv.tv_usec = (val % hz) * tick; 1818 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 1819 break; 1820 } 1821 1822 case SO_RTABLE: 1823 if (so->so_proto->pr_domain && 1824 so->so_proto->pr_domain->dom_protosw && 1825 so->so_proto->pr_ctloutput) { 1826 struct domain *dom = so->so_proto->pr_domain; 1827 1828 level = dom->dom_protosw->pr_protocol; 1829 error = (*so->so_proto->pr_ctloutput) 1830 (PRCO_GETOPT, so, level, optname, m); 1831 if (error) 1832 return (error); 1833 break; 1834 } 1835 return (ENOPROTOOPT); 1836 1837 #ifdef SOCKET_SPLICE 1838 case SO_SPLICE: 1839 { 1840 off_t len; 1841 1842 m->m_len = sizeof(off_t); 1843 len = so->so_sp ? so->so_sp->ssp_len : 0; 1844 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 1845 break; 1846 } 1847 #endif /* SOCKET_SPLICE */ 1848 1849 case SO_PEERCRED: 1850 if (so->so_proto->pr_protocol == AF_UNIX) { 1851 struct unpcb *unp = sotounpcb(so); 1852 1853 if (unp->unp_flags & UNP_FEIDS) { 1854 m->m_len = sizeof(unp->unp_connid); 1855 memcpy(mtod(m, caddr_t), 1856 &(unp->unp_connid), m->m_len); 1857 break; 1858 } 1859 return (ENOTCONN); 1860 } 1861 return (EOPNOTSUPP); 1862 1863 default: 1864 return (ENOPROTOOPT); 1865 } 1866 return (0); 1867 } 1868 } 1869 1870 void 1871 sohasoutofband(struct socket *so) 1872 { 1873 KERNEL_ASSERT_LOCKED(); 1874 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1875 selwakeup(&so->so_rcv.sb_sel); 1876 } 1877 1878 int 1879 soo_kqfilter(struct file *fp, struct knote *kn) 1880 { 1881 struct socket *so = kn->kn_fp->f_data; 1882 struct sockbuf *sb; 1883 1884 KERNEL_ASSERT_LOCKED(); 1885 1886 switch (kn->kn_filter) { 1887 case EVFILT_READ: 1888 if (so->so_options & SO_ACCEPTCONN) 1889 kn->kn_fop = &solisten_filtops; 1890 else 1891 kn->kn_fop = &soread_filtops; 1892 sb = &so->so_rcv; 1893 break; 1894 case EVFILT_WRITE: 1895 kn->kn_fop = &sowrite_filtops; 1896 sb = &so->so_snd; 1897 break; 1898 default: 1899 return (EINVAL); 1900 } 1901 1902 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1903 sb->sb_flags |= SB_KNOTE; 1904 1905 return (0); 1906 } 1907 1908 void 1909 filt_sordetach(struct knote *kn) 1910 { 1911 struct socket *so = kn->kn_fp->f_data; 1912 1913 KERNEL_ASSERT_LOCKED(); 1914 1915 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1916 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1917 so->so_rcv.sb_flags &= ~SB_KNOTE; 1918 } 1919 1920 int 1921 filt_soread(struct knote *kn, long hint) 1922 { 1923 struct socket *so = kn->kn_fp->f_data; 1924 int rv; 1925 1926 kn->kn_data = so->so_rcv.sb_cc; 1927 #ifdef SOCKET_SPLICE 1928 if (isspliced(so)) { 1929 rv = 0; 1930 } else 1931 #endif /* SOCKET_SPLICE */ 1932 if (so->so_state & SS_CANTRCVMORE) { 1933 kn->kn_flags |= EV_EOF; 1934 kn->kn_fflags = so->so_error; 1935 rv = 1; 1936 } else if (so->so_error) { /* temporary udp error */ 1937 rv = 1; 1938 } else if (kn->kn_sfflags & NOTE_LOWAT) { 1939 rv = (kn->kn_data >= kn->kn_sdata); 1940 } else { 1941 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 1942 } 1943 1944 return rv; 1945 } 1946 1947 void 1948 filt_sowdetach(struct knote *kn) 1949 { 1950 struct socket *so = kn->kn_fp->f_data; 1951 1952 KERNEL_ASSERT_LOCKED(); 1953 1954 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1955 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1956 so->so_snd.sb_flags &= ~SB_KNOTE; 1957 } 1958 1959 int 1960 filt_sowrite(struct knote *kn, long hint) 1961 { 1962 struct socket *so = kn->kn_fp->f_data; 1963 int rv; 1964 1965 kn->kn_data = sbspace(so, &so->so_snd); 1966 if (so->so_state & SS_CANTSENDMORE) { 1967 kn->kn_flags |= EV_EOF; 1968 kn->kn_fflags = so->so_error; 1969 rv = 1; 1970 } else if (so->so_error) { /* temporary udp error */ 1971 rv = 1; 1972 } else if (((so->so_state & SS_ISCONNECTED) == 0) && 1973 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1974 rv = 0; 1975 } else if (kn->kn_sfflags & NOTE_LOWAT) { 1976 rv = (kn->kn_data >= kn->kn_sdata); 1977 } else { 1978 rv = (kn->kn_data >= so->so_snd.sb_lowat); 1979 } 1980 1981 return (rv); 1982 } 1983 1984 int 1985 filt_solisten(struct knote *kn, long hint) 1986 { 1987 struct socket *so = kn->kn_fp->f_data; 1988 1989 kn->kn_data = so->so_qlen; 1990 1991 return (kn->kn_data != 0); 1992 } 1993 1994 #ifdef DDB 1995 void 1996 sobuf_print(struct sockbuf *, 1997 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 1998 1999 void 2000 sobuf_print(struct sockbuf *sb, 2001 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2002 { 2003 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2004 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2005 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2006 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2007 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2008 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2009 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2010 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2011 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2012 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2013 (*pr)("\tsb_sel: ...\n"); 2014 (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); 2015 (*pr)("\tsb_flags: %i\n", sb->sb_flags); 2016 (*pr)("\tsb_timeo: %i\n", sb->sb_timeo); 2017 } 2018 2019 void 2020 so_print(void *v, 2021 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2022 { 2023 struct socket *so = v; 2024 2025 (*pr)("socket %p\n", so); 2026 (*pr)("so_type: %i\n", so->so_type); 2027 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2028 (*pr)("so_linger: %i\n", so->so_linger); 2029 (*pr)("so_state: 0x%04x\n", so->so_state); 2030 (*pr)("so_pcb: %p\n", so->so_pcb); 2031 (*pr)("so_proto: %p\n", so->so_proto); 2032 2033 (*pr)("so_head: %p\n", so->so_head); 2034 (*pr)("so_onq: %p\n", so->so_onq); 2035 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2036 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2037 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2038 (*pr)("so_q0len: %i\n", so->so_q0len); 2039 (*pr)("so_qlen: %i\n", so->so_qlen); 2040 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2041 (*pr)("so_timeo: %i\n", so->so_timeo); 2042 (*pr)("so_pgid: %i\n", so->so_pgid); 2043 (*pr)("so_siguid: %i\n", so->so_siguid); 2044 (*pr)("so_sigeuid: %i\n", so->so_sigeuid); 2045 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2046 2047 (*pr)("so_sp: %p\n", so->so_sp); 2048 if (so->so_sp != NULL) { 2049 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2050 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2051 (*pr)("\tssp_len: %lld\n", 2052 (unsigned long long)so->so_sp->ssp_len); 2053 (*pr)("\tssp_max: %lld\n", 2054 (unsigned long long)so->so_sp->ssp_max); 2055 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2056 so->so_sp->ssp_idletv.tv_usec); 2057 (*pr)("\tssp_idleto: %spending (@%i)\n", 2058 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2059 so->so_sp->ssp_idleto.to_time); 2060 } 2061 2062 (*pr)("so_rcv:\n"); 2063 sobuf_print(&so->so_rcv, pr); 2064 (*pr)("so_snd:\n"); 2065 sobuf_print(&so->so_snd, pr); 2066 2067 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2068 so->so_upcall, so->so_upcallarg); 2069 2070 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2071 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2072 (*pr)("so_cpid: %d\n", so->so_cpid); 2073 } 2074 #endif 2075 2076